diff --git a/scripts/bigquery_cleaned_layer.py b/scripts/bigquery_cleaned_layer.py index 8754ad9..cef3d9f 100644 --- a/scripts/bigquery_cleaned_layer.py +++ b/scripts/bigquery_cleaned_layer.py @@ -296,21 +296,22 @@ def assign_direction(indicator_name: str) -> str: # FRAMEWORK CLASSIFICATION (MDGs vs SDGs) # ============================================================================= -# Daftar keyword eksplisit dari SDG Goal 2 Khusus FIES(2030 Agenda for Sustainable Development) +# Daftar keyword eksplisit dari SDG Goal 2 Khusus FIES (2030 Agenda for Sustainable Development). +# Disimpan lowercase agar matching tidak sensitif terhadap kapitalisasi input. SDG_INDICATOR_KEYWORDS = frozenset([ - "Prevalence of severe food insecurity in the total population (percent) (3-year average)", - "Prevalence of severe food insecurity in the male adult population (percent) (3-year average)", - "Prevalence of severe food insecurity in the female adult population (percent) (3-year average)", - "Prevalence of moderate or severe food insecurity in the total population (percent) (3-year average)", - "Prevalence of moderate or severe food insecurity in the male adult population (percent) (3-year average)", - "Prevalence of moderate or severe food insecurity in the female adult population (percent) (3-year average)", - "Number of severely food insecure people (million) (3-year average)", - "Number of severely food insecure male adults (million) (3-year average)", - "Number of severely food insecure female adults (million) (3-year average)", - "Number of moderately or severely food insecure people (million) (3-year average)", - "Number of moderately or severely food insecure male adults (million) (3-year average)", - "Number of moderately or severely food insecure female adults (million) (3-year average)" + "prevalence of severe food insecurity in the total population (percent) (3-year average)", + "prevalence of severe food insecurity in the male adult population (percent) (3-year average)", + "prevalence of severe food insecurity in the female adult population (percent) (3-year average)", + "prevalence of moderate or severe food insecurity in the total population (percent) (3-year average)", + "prevalence of moderate or severe food insecurity in the male adult population (percent) (3-year average)", + "prevalence of moderate or severe food insecurity in the female adult population (percent) (3-year average)", + "number of severely food insecure people (million) (3-year average)", + "number of severely food insecure male adults (million) (3-year average)", + "number of severely food insecure female adults (million) (3-year average)", + "number of moderately or severely food insecure people (million) (3-year average)", + "number of moderately or severely food insecure male adults (million) (3-year average)", + "number of moderately or severely food insecure female adults (million) (3-year average)", ]) @@ -320,19 +321,30 @@ def assign_framework(indicator_name: str) -> str: dari 2030 Agenda for Sustainable Development (versi Maret 2020). Logika: - - Cek apakah nama indikator mengandung keyword SDG yang terdaftar + - Lowercase nama indikator input + - Cek apakah ada keyword SDG (lowercase) yang terkandung di dalam nama indikator - Jika ya -> 'SDGs' - Jika tidak -> 'MDGs' (indikator FAO/lama yang bukan SDG resmi) + FIX: Bug sebelumnya menggunakan `kw in ind` (cek apakah keyword ada di dalam ind), + padahal seharusnya `kw in ind` sudah benar secara logika — tapi keyword di-set + dengan kapitalisasi campuran sementara `ind` sudah di-lowercase, sehingga + perbandingan selalu gagal. Solusi: simpan keyword dalam lowercase di set, + sehingga `kw in ind` bekerja dengan benar. + Return values: 'MDGs' atau 'SDGs' Panjang max 4 chars (dalam constraint varchar(5)). """ if pd.isna(indicator_name): return 'MDGs' + + # Lowercase input agar matching tidak sensitif terhadap kapitalisasi ind = str(indicator_name).lower().strip() - for kw in SDG_INDICATOR_KEYWORDS: - if kw in ind: - return 'SDGs' + + # Cek apakah salah satu keyword SDG (sudah lowercase) ada di dalam ind + if any(kw in ind for kw in SDG_INDICATOR_KEYWORDS): + return 'SDGs' + return 'MDGs'