raw and staging data
This commit is contained in:
@@ -21,6 +21,7 @@ Kimball ETL Flow:
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
from google.cloud import bigquery
|
||||
from google.oauth2 import service_account
|
||||
@@ -88,25 +89,6 @@ KIMBALL_LAYER_MAP = {
|
||||
"dw" : "gold",
|
||||
}
|
||||
|
||||
# SETUP BIGQUERY CLIENT
|
||||
|
||||
def get_bigquery_client() -> bigquery.Client:
|
||||
"""
|
||||
Create BigQuery client dengan service account credentials
|
||||
|
||||
Returns:
|
||||
bigquery.Client: Authenticated BigQuery client
|
||||
"""
|
||||
credentials = service_account.Credentials.from_service_account_file(
|
||||
CREDENTIALS_PATH,
|
||||
scopes=["https://www.googleapis.com/auth/cloud-platform"]
|
||||
)
|
||||
return bigquery.Client(
|
||||
credentials=credentials,
|
||||
project=PROJECT_ID,
|
||||
location=LOCATION
|
||||
)
|
||||
|
||||
# MATCHING CONFIGURATION
|
||||
|
||||
CONFIG = {
|
||||
@@ -166,7 +148,6 @@ for directory in [EXPORTS_DIR, LOGS_DIR]:
|
||||
# HELPER FUNCTIONS
|
||||
|
||||
def get_table_id(table_name: str, layer: str = "bronze") -> str:
|
||||
|
||||
# Resolve Kimball alias ke layer name
|
||||
resolved = KIMBALL_LAYER_MAP.get(layer.lower(), layer.lower())
|
||||
dataset = LAYER_DATASET_MAP.get(resolved, DATASET_BRONZE)
|
||||
@@ -174,17 +155,6 @@ def get_table_id(table_name: str, layer: str = "bronze") -> str:
|
||||
|
||||
|
||||
def table_exists(client: bigquery.Client, table_name: str, layer: str = "bronze") -> bool:
|
||||
"""
|
||||
Check apakah table ada di BigQuery
|
||||
|
||||
Args:
|
||||
client : BigQuery client
|
||||
table_name : Nama table
|
||||
layer : Layer — 'bronze'/'raw', 'silver'/'staging', 'gold'/'dw'
|
||||
|
||||
Returns:
|
||||
bool: True jika table ada
|
||||
"""
|
||||
try:
|
||||
client.get_table(get_table_id(table_name, layer))
|
||||
return True
|
||||
@@ -193,14 +163,6 @@ def table_exists(client: bigquery.Client, table_name: str, layer: str = "bronze"
|
||||
|
||||
|
||||
def delete_table(client: bigquery.Client, table_name: str, layer: str = "bronze"):
|
||||
"""
|
||||
Delete table jika ada
|
||||
|
||||
Args:
|
||||
client : BigQuery client
|
||||
table_name : Nama table
|
||||
layer : Layer — 'bronze'/'raw', 'silver'/'staging', 'gold'/'dw'
|
||||
"""
|
||||
table_id = get_table_id(table_name, layer)
|
||||
try:
|
||||
client.delete_table(table_id, not_found_ok=True)
|
||||
@@ -210,13 +172,6 @@ def delete_table(client: bigquery.Client, table_name: str, layer: str = "bronze"
|
||||
|
||||
|
||||
def create_dataset_if_not_exists(client: bigquery.Client, dataset_id: str):
|
||||
"""
|
||||
Create dataset jika belum ada
|
||||
|
||||
Args:
|
||||
client : BigQuery client
|
||||
dataset_id : Dataset ID string
|
||||
"""
|
||||
full_id = f"{PROJECT_ID}.{dataset_id}"
|
||||
try:
|
||||
client.get_dataset(full_id)
|
||||
@@ -229,7 +184,6 @@ def create_dataset_if_not_exists(client: bigquery.Client, dataset_id: str):
|
||||
|
||||
|
||||
def create_all_datasets(client: bigquery.Client):
|
||||
"""Create semua 3 dataset (Raw/Staging/DW) jika belum ada"""
|
||||
print("Setting up BigQuery Datasets (Kimball DW)...")
|
||||
for layer, dataset_id in LAYER_DATASET_MAP.items():
|
||||
create_dataset_if_not_exists(client, dataset_id)
|
||||
@@ -238,21 +192,14 @@ def create_all_datasets(client: bigquery.Client):
|
||||
# VERIFICATION
|
||||
|
||||
def verify_setup() -> bool:
|
||||
"""
|
||||
Verify BigQuery setup untuk semua 3 layer (Raw / Staging / DW)
|
||||
|
||||
Checks:
|
||||
1. Credentials file exists
|
||||
2. Koneksi ke BigQuery berhasil
|
||||
3. Semua dataset ada atau berhasil dibuat
|
||||
"""
|
||||
print("=" * 60)
|
||||
print("BIGQUERY SETUP VERIFICATION")
|
||||
print("Kimball DW Architecture")
|
||||
print("=" * 60)
|
||||
|
||||
# 1. Credentials
|
||||
if not os.path.exists(CREDENTIALS_PATH):
|
||||
credentials_json = os.environ.get("GOOGLE_CREDENTIALS_JSON")
|
||||
if not credentials_json and not os.path.exists(CREDENTIALS_PATH):
|
||||
print(f"Credentials not found : {CREDENTIALS_PATH}")
|
||||
return False
|
||||
print(f"✓ Credentials found")
|
||||
@@ -284,15 +231,16 @@ def verify_setup() -> bool:
|
||||
print("=" * 60)
|
||||
return True
|
||||
|
||||
|
||||
# INITIALIZE ON IMPORT
|
||||
|
||||
if __name__ == "__main__":
|
||||
verify_setup()
|
||||
else:
|
||||
print("BigQuery Config Loaded — Kimball DW Architecture")
|
||||
print(f" Project : {PROJECT_ID}")
|
||||
print(f" Project : {PROJECT_ID}")
|
||||
print(f" Raw (Bronze) : {DATASET_BRONZE}")
|
||||
print(f" Staging (Silver) : {DATASET_SILVER}")
|
||||
print(f" DW (Gold) : {DATASET_GOLD}")
|
||||
print(f" Audit : {DATASET_AUDIT}")
|
||||
print(f" Location : {LOCATION}")
|
||||
print(f" Location : {LOCATION}")
|
||||
Reference in New Issue
Block a user