add airflow task functions
This commit is contained in:
@@ -426,16 +426,6 @@ class StagingDataIntegration:
|
|||||||
Input : RAW layer (Bronze) — raw_fao, raw_worldbank, raw_unicef
|
Input : RAW layer (Bronze) — raw_fao, raw_worldbank, raw_unicef
|
||||||
Output : STAGING layer (Silver) — staging_integrated
|
Output : STAGING layer (Silver) — staging_integrated
|
||||||
Audit : etl_logs, etl_metadata (Audit → fs_asean_audit)
|
Audit : etl_logs, etl_metadata (Audit → fs_asean_audit)
|
||||||
|
|
||||||
Schema staging_integrated:
|
|
||||||
source varchar(20)
|
|
||||||
indicator_original varchar(255)
|
|
||||||
indicator_standardized varchar(255)
|
|
||||||
country varchar(100)
|
|
||||||
year int
|
|
||||||
year_range varchar(20)
|
|
||||||
value float
|
|
||||||
unit varchar(20)
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, client: bigquery.Client):
|
def __init__(self, client: bigquery.Client):
|
||||||
@@ -799,3 +789,46 @@ if __name__ == "__main__":
|
|||||||
print(f"STAGING (Silver) : staging_integrated")
|
print(f"STAGING (Silver) : staging_integrated")
|
||||||
print(f"AUDIT : etl_logs, etl_metadata")
|
print(f"AUDIT : etl_logs, etl_metadata")
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
|
|
||||||
|
# AIRFLOW TASK FUNCTIONS
|
||||||
|
|
||||||
|
def run_verify_connection():
|
||||||
|
from scripts.bigquery_config import verify_setup
|
||||||
|
result = verify_setup()
|
||||||
|
if not result:
|
||||||
|
raise Exception("BigQuery connection failed!")
|
||||||
|
print("BigQuery connection OK")
|
||||||
|
|
||||||
|
def run_load_fao():
|
||||||
|
from scripts.bigquery_config import get_bigquery_client
|
||||||
|
client = get_bigquery_client()
|
||||||
|
source = FAODataSource(client)
|
||||||
|
df = source.run()
|
||||||
|
print(f"FAO loaded: {len(df):,} rows")
|
||||||
|
|
||||||
|
def run_load_worldbank():
|
||||||
|
from scripts.bigquery_config import get_bigquery_client
|
||||||
|
client = get_bigquery_client()
|
||||||
|
fao_source = FAODataSource(client)
|
||||||
|
df_fao = fao_source.run()
|
||||||
|
fao_indicators = df_fao['indicator'].unique().tolist()
|
||||||
|
wb_source = WorldBankDataSource(client, fao_indicators)
|
||||||
|
df = wb_source.run()
|
||||||
|
print(f"World Bank loaded: {len(df):,} rows")
|
||||||
|
|
||||||
|
def run_load_unicef():
|
||||||
|
from scripts.bigquery_config import get_bigquery_client
|
||||||
|
client = get_bigquery_client()
|
||||||
|
fao_source = FAODataSource(client)
|
||||||
|
df_fao = fao_source.run()
|
||||||
|
fao_indicators = df_fao['indicator'].unique().tolist()
|
||||||
|
unicef_source = UNICEFDataSource(client, fao_indicators)
|
||||||
|
df = unicef_source.run()
|
||||||
|
print(f"UNICEF loaded: {len(df):,} rows")
|
||||||
|
|
||||||
|
def run_staging_integration():
|
||||||
|
from scripts.bigquery_config import get_bigquery_client
|
||||||
|
client = get_bigquery_client()
|
||||||
|
staging = StagingDataIntegration(client)
|
||||||
|
df = staging.run()
|
||||||
|
print(f"Staging integrated: {len(df):,} rows")
|
||||||
Reference in New Issue
Block a user