From ba4927f6202c3cba891a6be97081e3ad1403804e Mon Sep 17 00:00:00 2001 From: Debby Date: Thu, 2 Apr 2026 07:54:23 +0700 Subject: [PATCH] rename other to supporting --- scripts/bigquery_analytical_layer.py | 63 +++++++++++++++++---------- scripts/bigquery_cleaned_layer.py | 8 ++-- scripts/bigquery_dimensional_model.py | 6 +-- 3 files changed, 46 insertions(+), 31 deletions(-) diff --git a/scripts/bigquery_analytical_layer.py b/scripts/bigquery_analytical_layer.py index d96f033..ddb9e88 100644 --- a/scripts/bigquery_analytical_layer.py +++ b/scripts/bigquery_analytical_layer.py @@ -14,9 +14,9 @@ Filtering Order: → Indikator TIDAK di SDG_ONLY_KEYWORDS → 'MDGs' selalu → Indikator DI SDG_ONLY_KEYWORDS + year >= SDG_TRANSITION_YEAR → 'SDGs' → Indikator DI SDG_ONLY_KEYWORDS + year < SDG_TRANSITION_YEAR → 'MDGs' - → SDG_TRANSITION_YEAR = 2016 (HARDCODE — tanggal resmi SDGs berlaku) + → SDG_TRANSITION_YEAR = 2015 (HARDCODE — tanggal resmi SDGs berlaku) BUKAN dari actual_start_year data, karena data anaemia/FIES bisa ada - sebelum 2016 namun tetap harus dilabeli MDGs pada tahun-tahun tersebut. + sebelum 2015 namun tetap harus dilabeli MDGs pada tahun-tahun tersebut. 7. Verify no gaps (dari actual_start_year per indikator, bukan start_year global) 8. Calculate norm_value_1_100 per indicator (min-max, direction-aware, global) 9. Calculate YoY per indicator per country @@ -24,7 +24,7 @@ Filtering Order: 11. Save analytical table FRAMEWORK LOGIC: -- SDG_TRANSITION_YEAR = 2016 (HARDCODE, bukan auto-detect dari data) +- SDG_TRANSITION_YEAR = 2015 (HARDCODE, bukan auto-detect dari data) - Semua SDG-only indicators menggunakan SDG_TRANSITION_YEAR yang SAMA sehingga label berubah serentak di satu titik waktu - SDG-only + year < SDG_TRANSITION_YEAR → 'MDGs' (data tetap ada, tidak dihapus) @@ -32,8 +32,8 @@ FRAMEWORK LOGIC: - Non-SDG-only indicators → 'MDGs' selalu (di semua tahun) ALASAN HARDCODE: -- SDGs resmi diadopsi PBB pada 25 September 2015 dan mulai berlaku 1 Januari 2016 -- Indikator FIES dan anaemia punya data sebelum 2016 (dari MDGs era) +- SDGs resmi diadopsi PBB pada 25 September 2015 dan mulai berlaku 1 Januari 2015 +- Indikator FIES dan anaemia punya data sebelum 2015 (dari MDGs era) - Jika sdg_transition_year di-auto-detect dari min(actual_start_year), maka akan = 2013 (karena data ada sejak 2013), sehingga semua tahun berlabel SDGs — yang secara historis tidak tepat. @@ -66,27 +66,44 @@ from google.cloud import bigquery # SDG-ONLY INDICATOR KEYWORDS # ============================================================================= # Hanya indikator yang MURNI BARU di era SDGs yang didaftarkan di sini. -# Indikator di set ini → 'SDGs' mulai dari SDG_TRANSITION_YEAR (2016). +# Indikator di set ini → 'SDGs' mulai dari SDG_TRANSITION_YEAR (2015). # Semua indikator lain (shared maupun tidak dikenal) → 'MDGs' di semua tahun. SDG_ONLY_KEYWORDS = frozenset([ - # TARGET 2.1.1 + # TARGET 2.1.1 — Undernourishment "prevalence of undernourishment (percent) (3-year average)", "number of people undernourished (million) (3-year average)", - # TARGET 2.1.2 — FIES (SDGs only) + + # TARGET 2.1.2 — Food Insecurity (FIES) "prevalence of severe food insecurity in the total population (percent) (3-year average)", "prevalence of severe food insecurity in the male adult population (percent) (3-year average)", "prevalence of severe food insecurity in the female adult population (percent) (3-year average)", + "prevalence of moderate or severe food insecurity in the total population (percent) (3-year average)", "prevalence of moderate or severe food insecurity in the male adult population (percent) (3-year average)", "prevalence of moderate or severe food insecurity in the female adult population (percent) (3-year average)", + "number of severely food insecure people (million) (3-year average)", "number of severely food insecure male adults (million) (3-year average)", "number of severely food insecure female adults (million) (3-year average)", + "number of moderately or severely food insecure people (million) (3-year average)", "number of moderately or severely food insecure male adults (million) (3-year average)", "number of moderately or severely food insecure female adults (million) (3-year average)", - # TARGET 2.2.3 — Anaemia (SDGs only) + + # TARGET 2.2.1 — Stunting + "percentage of children under 5 years of age who are stunted (modelled estimates) (percent)", + "number of children under 5 years of age who are stunted (modeled estimates) (million)", + + # TARGET 2.2.2 — Wasting + "percentage of children under 5 years affected by wasting (percent)", + "number of children under 5 years affected by wasting (million)", + + # TARGET 2.2.2 — Overweight (children) + "percentage of children under 5 years of age who are overweight (modelled estimates) (percent)", + "number of children under 5 years of age who are overweight (modeled estimates) (million)", + + # TARGET 2.2.3 — Anaemia "prevalence of anemia among women of reproductive age (15-49 years) (percent)", "number of women of reproductive age (15-49 years) affected by anemia (million)", ]) @@ -94,11 +111,9 @@ SDG_ONLY_KEYWORDS = frozenset([ # ============================================================================= # SDG TRANSITION YEAR — HARDCODE # ============================================================================= -# SDGs resmi berlaku mulai 1 Januari 2016 (diadopsi PBB 25 September 2015). -# Nilai ini TIDAK boleh dihitung dari data karena indikator FIES/anaemia -# punya data historis sebelum 2016 yang harus tetap dilabeli 'MDGs'. +# SDGs resmi berlaku mulai 1 Januari 2015 (diadopsi PBB 25 September 2015). -SDG_TRANSITION_YEAR = 2016 +SDG_TRANSITION_YEAR = 2015 # ============================================================================= # THRESHOLD KONDISI (fixed absolute, skala 1-100) @@ -139,11 +154,11 @@ class AnalyticalLayerLoader: yoy_change, yoy_pct FRAMEWORK LOGIC: - - SDG_TRANSITION_YEAR = 2016 (HARDCODE — tanggal resmi SDGs berlaku) + - SDG_TRANSITION_YEAR = 2015 (HARDCODE — tanggal resmi SDGs berlaku) - Indikator TIDAK di SDG_ONLY_KEYWORDS → 'MDGs' di SEMUA tahun - Indikator DI SDG_ONLY_KEYWORDS: - year < SDG_TRANSITION_YEAR (2016) → 'MDGs' (data tetap ada, tidak dihapus) - year >= SDG_TRANSITION_YEAR (2016) → 'SDGs' + year < SDG_TRANSITION_YEAR (2015) → 'MDGs' (data tetap ada, tidak dihapus) + year >= SDG_TRANSITION_YEAR (2015) → 'SDGs' """ def __init__(self, client: bigquery.Client): @@ -163,7 +178,7 @@ class AnalyticalLayerLoader: self.end_year = None self.baseline_year = 2023 - # SDG_TRANSITION_YEAR diambil dari konstanta modul (HARDCODE = 2016) + # SDG_TRANSITION_YEAR diambil dari konstanta modul (HARDCODE = 2015) self.sdg_transition_year = SDG_TRANSITION_YEAR self.pipeline_metadata = { @@ -510,8 +525,8 @@ class AnalyticalLayerLoader: self.logger.info("=" * 80) # ---------------------------------------------------------------- - # SDG_TRANSITION_YEAR = 2016 (HARDCODE) - # SDGs diadopsi PBB 25 September 2015, berlaku 1 Januari 2016. + # SDG_TRANSITION_YEAR = 2015 (HARDCODE) + # SDGs diadopsi PBB 25 September 2015, berlaku 1 Januari 2015. # # PENTING — TIDAK dihitung dari data: # Jika auto-detect dari min(actual_start_year SDG-only indicators), @@ -520,7 +535,7 @@ class AnalyticalLayerLoader: # Ini secara historis salah karena SDGs belum berlaku di 2013-2015. # ---------------------------------------------------------------- self.logger.info(f"\n SDG_TRANSITION_YEAR : {self.sdg_transition_year} (HARDCODE)") - self.logger.info(f" Alasan : SDGs resmi berlaku 1 Januari 2016") + self.logger.info(f" Alasan : SDGs resmi berlaku 1 Januari 2015") self.logger.info(f" Bukan auto-detect : data FIES/anaemia ada sejak 2013,") self.logger.info(f" tapi tahun 2013-2015 harus tetap MDGs") @@ -573,7 +588,7 @@ class AnalyticalLayerLoader: # # Hasilnya dalam 1 indikator SDG-only (misal anaemia, data mulai 2013): # 2013, 2014, 2015 → 'MDGs' (data tetap ada) - # 2016, 2017, ... → 'SDGs' + # 2015, 2017, ... → 'SDGs' # ---------------------------------------------------------------- self.df_clean['_is_sdg_only'] = self.df_clean['indicator_id'].isin(sdg_only_ids) @@ -1002,7 +1017,7 @@ class AnalyticalLayerLoader: 'end_year' : self.end_year, 'baseline_year' : self.baseline_year, 'sdg_transition_year' : self.sdg_transition_year, - 'sdg_transition_source' : 'HARDCODE — SDGs resmi berlaku 1 Jan 2016', + 'sdg_transition_source' : 'HARDCODE — SDGs resmi berlaku 1 Jan 2015', 'fixed_countries' : len(self.selected_country_ids), 'norm_scale' : '1-100 per indicator global minmax direction-aware', 'framework_logic' : ( @@ -1048,7 +1063,7 @@ class AnalyticalLayerLoader: self.logger.info(f"Condition threshold: bad<{THRESHOLD_BAD}, good>{THRESHOLD_GOOD}") self.logger.info( f"Framework: SDG_TRANSITION_YEAR={SDG_TRANSITION_YEAR} (HARDCODE). " - "SDG-only + year >= 2016 → SDGs; sebelumnya MDGs. Non-SDG-only → MDGs selalu." + "SDG-only + year >= 2015 → SDGs; sebelumnya MDGs. Non-SDG-only → MDGs selalu." ) self.logger.info("=" * 80) @@ -1102,7 +1117,7 @@ if __name__ == "__main__": print(f"Condition threshold: bad<{THRESHOLD_BAD}, good>{THRESHOLD_GOOD}") print( f"Framework: SDG_TRANSITION_YEAR={SDG_TRANSITION_YEAR} (HARDCODE). " - "SDG-only + year >= 2016 → SDGs; sebelumnya MDGs. Non-SDG-only → MDGs selalu." + "SDG-only + year >= 2015 → SDGs; sebelumnya MDGs. Non-SDG-only → MDGs selalu." ) print("=" * 80) diff --git a/scripts/bigquery_cleaned_layer.py b/scripts/bigquery_cleaned_layer.py index bb882cc..682035e 100644 --- a/scripts/bigquery_cleaned_layer.py +++ b/scripts/bigquery_cleaned_layer.py @@ -176,16 +176,16 @@ def standardize_country_names_asean(df: pd.DataFrame, country_column: str = 'cou def assign_pillar(indicator_name: str) -> str: """ Assign pillar berdasarkan keyword indikator. - Return values: 'Availability', 'Access', 'Utilization', 'Stability', 'Other' + Return values: 'Availability', 'Access', 'Utilization', 'Stability', 'Supporting' All <= 20 chars (varchar(20) constraint). """ if pd.isna(indicator_name): - return 'Other' + return 'Supporting' ind = str(indicator_name).lower() for kw in ['requirement', 'coefficient', 'losses', 'fat supply']: if kw in ind: - return 'Other' + return 'Supporting' if any(kw in ind for kw in [ 'adequacy', 'protein supply', 'supply of protein', @@ -215,7 +215,7 @@ def assign_pillar(indicator_name: str) -> str: ]): return 'Utilization' - return 'Other' + return 'Supporting' # ============================================================================= diff --git a/scripts/bigquery_dimensional_model.py b/scripts/bigquery_dimensional_model.py index c6394ef..9b23f02 100644 --- a/scripts/bigquery_dimensional_model.py +++ b/scripts/bigquery_dimensional_model.py @@ -374,7 +374,7 @@ class DimensionalModelLoader: ]): return 'Infrastructure' else: - return 'Other' + return 'Supporting' dim_indicator['indicator_category'] = dim_indicator['indicator_name'].apply( categorize_indicator ) @@ -503,10 +503,10 @@ class DimensionalModelLoader: try: pillar_codes = { 'Availability': 'AVL', 'Access' : 'ACC', - 'Utilization' : 'UTL', 'Stability': 'STB', 'Other': 'OTH', + 'Utilization' : 'UTL', 'Stability': 'STB', 'Supporting': 'SPT', } pillars_data = [ - {'pillar_name': p, 'pillar_code': pillar_codes.get(p, 'OTH')} + {'pillar_name': p, 'pillar_code': pillar_codes.get(p, 'SPT')} for p in self.df_clean['pillar'].unique() ]