""" AIRFLOW DAG — ETL Food Security BigQuery Kimball Data Warehouse Architecture Schedule : Setiap 3 bulan sekali (tanggal 1, pukul 00:00) Cron: "0 0 1 */3 *" -> 1 Jan, 1 Apr, 1 Jul, 1 Okt Catchup : False Kimball ETL Flow: ┌──────────────────────────────────────────────────────────────────────────┐ │ BRONZE (Raw) SILVER (Staging→Cleaned) GOLD (DW → Analytical) │ │ │ │ raw_fao ─┐ dim_country │ │ raw_worldbank ─┼→ staging_integrated dim_indicator │ │ raw_unicef ─┘ ↓ dim_time │ │ cleaned_integrated ───────→ dim_source │ │ dim_pillar │ │ fact_food_security │ │ ↓ │ │ analytical_food_security │ │ ↓ │ │ agg_pillar_composite │ │ agg_pillar_by_country │ │ agg_framework_by_country │ │ agg_framework_asean │ │ ↓ │ │ agg_indicator_norm │ │ │ │ AUDIT : etl_logs, etl_metadata (setiap layer) │ └──────────────────────────────────────────────────────────────────────────┘ Task Order: verify_bigquery_connection → load_fao_to_bronze → load_worldbank_to_bronze → load_unicef_to_bronze → staging_integration_to_silver → cleaned_integration_to_silver → dimensional_model_to_gold → analytical_layer_to_gold → aggregation_to_gold → indicator_norm_aggregation_to_gold Scripts folder harus berisi: - bigquery_raw_layer.py (run_verify_connection, run_load_fao, ...) - bigquery_cleaned_layer.py (run_cleaned_integration) - bigquery_dimensional_model.py (run_dimensional_model) - bigquery_analytical_layer.py (run_analytical_layer) - bigquery_analysis_aggregation.py (run_aggregation) - bigquery_aggraget_fact_selected_layer.py (run_indicator_norm_aggregation) - bigquery_config.py - bigquery_helpers.py - bigquery_datasource.py """ from airflow import DAG from airflow.operators.python import PythonOperator from datetime import datetime from scripts.bigquery_raw_layer import ( run_verify_connection, run_load_fao, run_load_worldbank, run_load_unicef, run_staging_integration, ) from scripts.bigquery_cleaned_layer import ( run_cleaned_integration, ) from scripts.bigquery_dimensional_model import ( run_dimensional_model, ) from scripts.bigquery_analytical_layer import ( run_analytical_layer, ) # FIXED: nama modul disesuaikan dengan nama file yang benar from scripts.bigquery_analysis_aggregation import ( run_aggregation, ) from scripts.bigquery_aggraget_fact_selected_layer import ( run_indicator_norm_aggregation, ) # DEFAULT ARGS default_args = { 'owner': 'data-engineering', 'email': ['d1041221004@student.untan.ac.id'], } # DAG DEFINITION # # schedule_interval = "0 0 1 */3 *" # ┌───── menit : 0 # │ ┌─── jam : 0 (tengah malam) # │ │ ┌─ hari : 1 (tanggal 1 setiap bulan yang cocok) # │ │ │ ┌ bulan : */3 (setiap 3 bulan -> Jan, Apr, Jul, Okt) # │ │ │ │ ┌ hari minggu : * (semua) # 0 0 1 */3 * with DAG( dag_id = "etl_food_security_bigquery", description = "Kimball ETL: FAO, World Bank, UNICEF → BigQuery (Bronze → Silver → Gold) | Schedule: setiap 3 bulan", default_args = default_args, start_date = datetime(2026, 1, 1), schedule_interval = "0 0 1 */3 *", # Setiap 3 bulan sekali catchup = False, tags = ["food-security", "bigquery", "kimball", "quarterly"], ) as dag: task_verify = PythonOperator( task_id = "verify_bigquery_connection", python_callable = run_verify_connection ) task_fao = PythonOperator( task_id = "load_fao_to_bronze", python_callable = run_load_fao ) task_worldbank = PythonOperator( task_id = "load_worldbank_to_bronze", python_callable = run_load_worldbank ) task_unicef = PythonOperator( task_id = "load_unicef_to_bronze", python_callable = run_load_unicef ) task_staging = PythonOperator( task_id = "staging_integration_to_silver", python_callable = run_staging_integration ) task_cleaned = PythonOperator( task_id = "cleaned_integration_to_silver", python_callable = run_cleaned_integration ) task_dimensional = PythonOperator( task_id = "dimensional_model_to_gold", python_callable = run_dimensional_model ) task_analytical = PythonOperator( task_id = "analytical_layer_to_gold", python_callable = run_analytical_layer ) task_aggregation = PythonOperator( task_id = "aggregation_to_gold", python_callable = run_aggregation ) task_indicator_norm = PythonOperator( task_id = "indicator_norm_aggregation_to_gold", python_callable = run_indicator_norm_aggregation ) # Task Dependencies ( task_verify >> task_fao >> task_worldbank >> task_unicef >> task_staging >> task_cleaned >> task_dimensional >> task_analytical >> task_aggregation >> task_indicator_norm )