year hardcode sdgs
This commit is contained in:
@@ -12,10 +12,11 @@ Filtering Order:
|
|||||||
→ Semua baris tetap ada; label framework ditentukan di Step 6
|
→ Semua baris tetap ada; label framework ditentukan di Step 6
|
||||||
6. Assign framework (MDGs/SDGs) per indicator PER ROW
|
6. Assign framework (MDGs/SDGs) per indicator PER ROW
|
||||||
→ Indikator TIDAK di SDG_ONLY_KEYWORDS → 'MDGs' selalu
|
→ Indikator TIDAK di SDG_ONLY_KEYWORDS → 'MDGs' selalu
|
||||||
→ Indikator DI SDG_ONLY_KEYWORDS + year >= sdg_transition_year → 'SDGs'
|
→ Indikator DI SDG_ONLY_KEYWORDS + year >= SDG_TRANSITION_YEAR → 'SDGs'
|
||||||
→ Indikator DI SDG_ONLY_KEYWORDS + year < sdg_transition_year → 'MDGs'
|
→ Indikator DI SDG_ONLY_KEYWORDS + year < SDG_TRANSITION_YEAR → 'MDGs'
|
||||||
→ sdg_transition_year = min(actual_start_year) dari semua SDG-only indicators
|
→ SDG_TRANSITION_YEAR = 2016 (HARDCODE — tanggal resmi SDGs berlaku)
|
||||||
yang lolos filter (= tahun pertama data SDG-only konsisten di semua countries)
|
BUKAN dari actual_start_year data, karena data anaemia/FIES bisa ada
|
||||||
|
sebelum 2016 namun tetap harus dilabeli MDGs pada tahun-tahun tersebut.
|
||||||
7. Verify no gaps (dari actual_start_year per indikator, bukan start_year global)
|
7. Verify no gaps (dari actual_start_year per indikator, bukan start_year global)
|
||||||
8. Calculate norm_value_1_100 per indicator (min-max, direction-aware, global)
|
8. Calculate norm_value_1_100 per indicator (min-max, direction-aware, global)
|
||||||
9. Calculate YoY per indicator per country
|
9. Calculate YoY per indicator per country
|
||||||
@@ -23,12 +24,19 @@ Filtering Order:
|
|||||||
11. Save analytical table
|
11. Save analytical table
|
||||||
|
|
||||||
FRAMEWORK LOGIC:
|
FRAMEWORK LOGIC:
|
||||||
- sdg_transition_year dihitung SATU KALI dari actual_start_year SDG-only indicators
|
- SDG_TRANSITION_YEAR = 2016 (HARDCODE, bukan auto-detect dari data)
|
||||||
- Semua SDG-only indicators menggunakan sdg_transition_year yang SAMA
|
- Semua SDG-only indicators menggunakan SDG_TRANSITION_YEAR yang SAMA
|
||||||
sehingga label berubah serentak di satu titik waktu
|
sehingga label berubah serentak di satu titik waktu
|
||||||
- Baris sebelum sdg_transition_year → 'MDGs' (data tetap ada, tidak dihapus)
|
- SDG-only + year < SDG_TRANSITION_YEAR → 'MDGs' (data tetap ada, tidak dihapus)
|
||||||
- Baris mulai sdg_transition_year → 'SDGs'
|
- SDG-only + year >= SDG_TRANSITION_YEAR → 'SDGs'
|
||||||
- Indikator non-SDG-only → 'MDGs' selalu
|
- Non-SDG-only indicators → 'MDGs' selalu (di semua tahun)
|
||||||
|
|
||||||
|
ALASAN HARDCODE:
|
||||||
|
- SDGs resmi diadopsi PBB pada 25 September 2015 dan mulai berlaku 1 Januari 2016
|
||||||
|
- Indikator FIES dan anaemia punya data sebelum 2016 (dari MDGs era)
|
||||||
|
- Jika sdg_transition_year di-auto-detect dari min(actual_start_year),
|
||||||
|
maka akan = 2013 (karena data ada sejak 2013), sehingga semua tahun
|
||||||
|
berlabel SDGs — yang secara historis tidak tepat.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@@ -58,10 +66,13 @@ from google.cloud import bigquery
|
|||||||
# SDG-ONLY INDICATOR KEYWORDS
|
# SDG-ONLY INDICATOR KEYWORDS
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Hanya indikator yang MURNI BARU di era SDGs yang didaftarkan di sini.
|
# Hanya indikator yang MURNI BARU di era SDGs yang didaftarkan di sini.
|
||||||
# Indikator di set ini → 'SDGs' mulai dari sdg_transition_year.
|
# Indikator di set ini → 'SDGs' mulai dari SDG_TRANSITION_YEAR (2016).
|
||||||
# Semua indikator lain (shared maupun tidak dikenal) → 'MDGs' di semua tahun.
|
# Semua indikator lain (shared maupun tidak dikenal) → 'MDGs' di semua tahun.
|
||||||
|
|
||||||
SDG_ONLY_KEYWORDS = frozenset([
|
SDG_ONLY_KEYWORDS = frozenset([
|
||||||
|
# TARGET 2.1.1
|
||||||
|
"prevalence of undernourishment (percent) (3-year average)",
|
||||||
|
"number of people undernourished (million) (3-year average)",
|
||||||
# TARGET 2.1.2 — FIES (SDGs only)
|
# TARGET 2.1.2 — FIES (SDGs only)
|
||||||
"prevalence of severe food insecurity in the total population (percent) (3-year average)",
|
"prevalence of severe food insecurity in the total population (percent) (3-year average)",
|
||||||
"prevalence of severe food insecurity in the male adult population (percent) (3-year average)",
|
"prevalence of severe food insecurity in the male adult population (percent) (3-year average)",
|
||||||
@@ -80,6 +91,15 @@ SDG_ONLY_KEYWORDS = frozenset([
|
|||||||
"number of women of reproductive age (15-49 years) affected by anemia (million)",
|
"number of women of reproductive age (15-49 years) affected by anemia (million)",
|
||||||
])
|
])
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# SDG TRANSITION YEAR — HARDCODE
|
||||||
|
# =============================================================================
|
||||||
|
# SDGs resmi berlaku mulai 1 Januari 2016 (diadopsi PBB 25 September 2015).
|
||||||
|
# Nilai ini TIDAK boleh dihitung dari data karena indikator FIES/anaemia
|
||||||
|
# punya data historis sebelum 2016 yang harus tetap dilabeli 'MDGs'.
|
||||||
|
|
||||||
|
SDG_TRANSITION_YEAR = 2016
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# THRESHOLD KONDISI (fixed absolute, skala 1-100)
|
# THRESHOLD KONDISI (fixed absolute, skala 1-100)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -119,13 +139,11 @@ class AnalyticalLayerLoader:
|
|||||||
yoy_change, yoy_pct
|
yoy_change, yoy_pct
|
||||||
|
|
||||||
FRAMEWORK LOGIC:
|
FRAMEWORK LOGIC:
|
||||||
|
- SDG_TRANSITION_YEAR = 2016 (HARDCODE — tanggal resmi SDGs berlaku)
|
||||||
- Indikator TIDAK di SDG_ONLY_KEYWORDS → 'MDGs' di SEMUA tahun
|
- Indikator TIDAK di SDG_ONLY_KEYWORDS → 'MDGs' di SEMUA tahun
|
||||||
- Indikator DI SDG_ONLY_KEYWORDS:
|
- Indikator DI SDG_ONLY_KEYWORDS:
|
||||||
year < sdg_transition_year → 'MDGs' (data tetap ada, tidak dihapus)
|
year < SDG_TRANSITION_YEAR (2016) → 'MDGs' (data tetap ada, tidak dihapus)
|
||||||
year >= sdg_transition_year → 'SDGs'
|
year >= SDG_TRANSITION_YEAR (2016) → 'SDGs'
|
||||||
- sdg_transition_year = min(actual_start_year) dari semua SDG-only indicators
|
|
||||||
yang lolos filter Step 3-5. Semua SDG-only indicators menggunakan
|
|
||||||
sdg_transition_year yang SAMA agar label berubah serentak.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, client: bigquery.Client):
|
def __init__(self, client: bigquery.Client):
|
||||||
@@ -140,12 +158,14 @@ class AnalyticalLayerLoader:
|
|||||||
|
|
||||||
self.selected_country_ids = None
|
self.selected_country_ids = None
|
||||||
self.indicator_max_start_map = {} # indicator_id → max_start_year (dari Step 5)
|
self.indicator_max_start_map = {} # indicator_id → max_start_year (dari Step 5)
|
||||||
self.sdg_transition_year = None # tahun SDGs mulai berlaku (dari Step 6)
|
|
||||||
|
|
||||||
self.start_year = 2013
|
self.start_year = 2013
|
||||||
self.end_year = None
|
self.end_year = None
|
||||||
self.baseline_year = 2023
|
self.baseline_year = 2023
|
||||||
|
|
||||||
|
# SDG_TRANSITION_YEAR diambil dari konstanta modul (HARDCODE = 2016)
|
||||||
|
self.sdg_transition_year = SDG_TRANSITION_YEAR
|
||||||
|
|
||||||
self.pipeline_metadata = {
|
self.pipeline_metadata = {
|
||||||
'source_class' : self.__class__.__name__,
|
'source_class' : self.__class__.__name__,
|
||||||
'start_time' : None,
|
'start_time' : None,
|
||||||
@@ -455,14 +475,14 @@ class AnalyticalLayerLoader:
|
|||||||
# Filter hanya indikator yang valid.
|
# Filter hanya indikator yang valid.
|
||||||
# PENTING: TIDAK menghapus baris year < max_start_year.
|
# PENTING: TIDAK menghapus baris year < max_start_year.
|
||||||
# Semua baris tetap ada — label framework ditentukan di Step 6.
|
# Semua baris tetap ada — label framework ditentukan di Step 6.
|
||||||
# max_start_year disimpan sebagai lookup untuk Step 6 & 7.
|
# max_start_year disimpan sebagai lookup untuk Step 7.
|
||||||
# ----------------------------------------------------------------
|
# ----------------------------------------------------------------
|
||||||
original_count = len(self.df_clean)
|
original_count = len(self.df_clean)
|
||||||
self.df_clean = self.df_clean[
|
self.df_clean = self.df_clean[
|
||||||
self.df_clean['indicator_id'].isin(valid_indicators)
|
self.df_clean['indicator_id'].isin(valid_indicators)
|
||||||
].copy()
|
].copy()
|
||||||
|
|
||||||
# Simpan max_start_year per indicator_id untuk Step 6 dan Step 7
|
# Simpan max_start_year per indicator_id untuk Step 7
|
||||||
self.indicator_max_start_map = (
|
self.indicator_max_start_map = (
|
||||||
indicator_max_start[indicator_max_start['indicator_id'].isin(valid_indicators)]
|
indicator_max_start[indicator_max_start['indicator_id'].isin(valid_indicators)]
|
||||||
.set_index('indicator_id')['max_start_year']
|
.set_index('indicator_id')['max_start_year']
|
||||||
@@ -484,86 +504,79 @@ class AnalyticalLayerLoader:
|
|||||||
# STEP 6: ASSIGN FRAMEWORK PER ROW
|
# STEP 6: ASSIGN FRAMEWORK PER ROW
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
def determine_sdg_start_year(self):
|
def assign_framework(self):
|
||||||
self.logger.info("\n" + "=" * 80)
|
self.logger.info("\n" + "=" * 80)
|
||||||
self.logger.info("STEP 6: ASSIGN FRAMEWORK PER ROW")
|
self.logger.info("STEP 6: ASSIGN FRAMEWORK PER ROW")
|
||||||
self.logger.info("=" * 80)
|
self.logger.info("=" * 80)
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
# ----------------------------------------------------------------
|
||||||
# Bangun tabel actual_start_year per indikator dari
|
# SDG_TRANSITION_YEAR = 2016 (HARDCODE)
|
||||||
# indicator_max_start_map yang sudah ditetapkan di Step 5.
|
# SDGs diadopsi PBB 25 September 2015, berlaku 1 Januari 2016.
|
||||||
|
#
|
||||||
|
# PENTING — TIDAK dihitung dari data:
|
||||||
|
# Jika auto-detect dari min(actual_start_year SDG-only indicators),
|
||||||
|
# hasilnya = 2013 (karena data FIES/anaemia ada sejak 2013).
|
||||||
|
# Akibatnya year >= 2013 → SDGs → SEMUA tahun berlabel SDGs.
|
||||||
|
# Ini secara historis salah karena SDGs belum berlaku di 2013-2015.
|
||||||
# ----------------------------------------------------------------
|
# ----------------------------------------------------------------
|
||||||
indicator_actual_start = pd.DataFrame([
|
self.logger.info(f"\n SDG_TRANSITION_YEAR : {self.sdg_transition_year} (HARDCODE)")
|
||||||
{'indicator_id': ind_id, 'actual_start_year': int(start_yr)}
|
self.logger.info(f" Alasan : SDGs resmi berlaku 1 Januari 2016")
|
||||||
for ind_id, start_yr in self.indicator_max_start_map.items()
|
self.logger.info(f" Bukan auto-detect : data FIES/anaemia ada sejak 2013,")
|
||||||
])
|
self.logger.info(f" tapi tahun 2013-2015 harus tetap MDGs")
|
||||||
|
|
||||||
# Merge indicator_name untuk logging
|
# ----------------------------------------------------------------
|
||||||
indicator_actual_start = indicator_actual_start.merge(
|
# Identifikasi indikator SDG-only berdasarkan SDG_ONLY_KEYWORDS
|
||||||
self.df_clean[['indicator_id', 'indicator_name']].drop_duplicates(),
|
# ----------------------------------------------------------------
|
||||||
on='indicator_id', how='left'
|
indicator_info = (
|
||||||
|
self.df_clean[['indicator_id', 'indicator_name']]
|
||||||
|
.drop_duplicates()
|
||||||
|
.copy()
|
||||||
)
|
)
|
||||||
|
indicator_info['is_sdg_only'] = (
|
||||||
# Tandai mana yang SDG-only
|
indicator_info['indicator_name']
|
||||||
indicator_actual_start['is_sdg_only'] = (
|
.str.lower()
|
||||||
indicator_actual_start['indicator_name']
|
.str.strip()
|
||||||
.str.lower().str.strip()
|
|
||||||
.isin(SDG_ONLY_KEYWORDS)
|
.isin(SDG_ONLY_KEYWORDS)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
sdg_only_ids = set(
|
||||||
|
indicator_info.loc[indicator_info['is_sdg_only'], 'indicator_id']
|
||||||
|
)
|
||||||
|
non_sdg_ids = set(
|
||||||
|
indicator_info.loc[~indicator_info['is_sdg_only'], 'indicator_id']
|
||||||
|
)
|
||||||
|
|
||||||
|
self.logger.info(f"\n SDG-only indicators ({len(sdg_only_ids)}):")
|
||||||
|
for _, row in indicator_info[indicator_info['is_sdg_only']].iterrows():
|
||||||
|
actual_start = self.indicator_max_start_map.get(row['indicator_id'], '?')
|
||||||
|
self.logger.info(
|
||||||
|
f" [SDG-only] id={int(row['indicator_id'])} "
|
||||||
|
f"actual_start={actual_start} | {row['indicator_name']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.logger.info(f"\n Non-SDG-only indicators ({len(non_sdg_ids)}): → MDGs selalu")
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
# ----------------------------------------------------------------
|
||||||
# sdg_transition_year = min(actual_start_year) dari semua SDG-only
|
# Validasi: pastikan ada SDG-only indicators yang lolos filter
|
||||||
# indicators yang lolos filter.
|
|
||||||
# Ini adalah satu titik waktu di mana semua SDG-only indicators
|
|
||||||
# berubah dari 'MDGs' ke 'SDGs' secara SERENTAK.
|
|
||||||
# ----------------------------------------------------------------
|
# ----------------------------------------------------------------
|
||||||
sdg_only_df = indicator_actual_start[indicator_actual_start['is_sdg_only']]
|
if not sdg_only_ids:
|
||||||
if sdg_only_df.empty:
|
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Tidak ada indikator SDG-only (FIES/anaemia) yang lolos filter. "
|
"Tidak ada indikator SDG-only (FIES/anaemia) yang lolos filter. "
|
||||||
"Pastikan indikator FIES dan anaemia ada di data."
|
"Pastikan nama indikator di SDG_ONLY_KEYWORDS cocok dengan data BigQuery."
|
||||||
)
|
)
|
||||||
|
|
||||||
self.sdg_transition_year = int(sdg_only_df['actual_start_year'].min())
|
|
||||||
|
|
||||||
self.logger.info(f"\n SDG-only indicators dan actual_start_year masing-masing:")
|
|
||||||
self.logger.info(f" {'-'*80}")
|
|
||||||
for _, row in sdg_only_df.iterrows():
|
|
||||||
self.logger.info(
|
|
||||||
f" [SDG-only] actual_start={int(row['actual_start_year'])} | "
|
|
||||||
f"{row['indicator_name']}"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.logger.info(
|
|
||||||
f"\n sdg_transition_year = {self.sdg_transition_year} "
|
|
||||||
f"(min actual_start_year dari semua SDG-only indicators)"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.logger.info(f"\n Logika assign framework (PER BARIS):")
|
|
||||||
self.logger.info(f" ──────────────────────────────────────────────────────────")
|
|
||||||
self.logger.info(f" Indikator TIDAK di SDG_ONLY_KEYWORDS:")
|
|
||||||
self.logger.info(f" → 'MDGs' di semua tahun")
|
|
||||||
self.logger.info(f" Indikator DI SDG_ONLY_KEYWORDS:")
|
|
||||||
self.logger.info(f" year < {self.sdg_transition_year} → 'MDGs' (data tetap ada)")
|
|
||||||
self.logger.info(f" year >= {self.sdg_transition_year} → 'SDGs'")
|
|
||||||
self.logger.info(f" ──────────────────────────────────────────────────────────")
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
# ----------------------------------------------------------------
|
||||||
# Assign framework dengan vectorized operation menggunakan
|
# Assign framework dengan vectorized np.where:
|
||||||
# sdg_transition_year (SATU nilai untuk semua SDG-only indicators)
|
#
|
||||||
|
# Kondisi SDG-only AND year >= SDG_TRANSITION_YEAR → 'SDGs'
|
||||||
|
# Semua kondisi lain (non-SDG-only ATAU year < SDG_TRANSITION_YEAR) → 'MDGs'
|
||||||
|
#
|
||||||
|
# Hasilnya dalam 1 indikator SDG-only (misal anaemia, data mulai 2013):
|
||||||
|
# 2013, 2014, 2015 → 'MDGs' (data tetap ada)
|
||||||
|
# 2016, 2017, ... → 'SDGs'
|
||||||
# ----------------------------------------------------------------
|
# ----------------------------------------------------------------
|
||||||
# Tandai apakah setiap baris adalah SDG-only indicator
|
|
||||||
sdg_only_ids = set(
|
|
||||||
indicator_actual_start.loc[
|
|
||||||
indicator_actual_start['is_sdg_only'], 'indicator_id'
|
|
||||||
]
|
|
||||||
)
|
|
||||||
self.df_clean['_is_sdg_only'] = self.df_clean['indicator_id'].isin(sdg_only_ids)
|
self.df_clean['_is_sdg_only'] = self.df_clean['indicator_id'].isin(sdg_only_ids)
|
||||||
|
|
||||||
# Assign framework:
|
|
||||||
# - Bukan SDG-only → 'MDGs'
|
|
||||||
# - SDG-only AND year >= sdg_transition_year → 'SDGs'
|
|
||||||
# - SDG-only AND year < sdg_transition_year → 'MDGs'
|
|
||||||
self.df_clean['framework'] = np.where(
|
self.df_clean['framework'] = np.where(
|
||||||
self.df_clean['_is_sdg_only'] &
|
self.df_clean['_is_sdg_only'] &
|
||||||
(self.df_clean['year'] >= self.sdg_transition_year),
|
(self.df_clean['year'] >= self.sdg_transition_year),
|
||||||
@@ -571,19 +584,26 @@ class AnalyticalLayerLoader:
|
|||||||
'MDGs'
|
'MDGs'
|
||||||
)
|
)
|
||||||
|
|
||||||
# Drop kolom bantu
|
|
||||||
self.df_clean = self.df_clean.drop(columns=['_is_sdg_only'])
|
self.df_clean = self.df_clean.drop(columns=['_is_sdg_only'])
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
# ----------------------------------------------------------------
|
||||||
# Log verifikasi per indikator
|
# Log verifikasi per indikator — tampilkan split MDGs/SDGs per tahun
|
||||||
# ----------------------------------------------------------------
|
# ----------------------------------------------------------------
|
||||||
|
self.logger.info(f"\n Logika assign framework (PER BARIS):")
|
||||||
|
self.logger.info(f" {'─'*72}")
|
||||||
|
self.logger.info(f" Indikator TIDAK di SDG_ONLY_KEYWORDS → 'MDGs' di semua tahun")
|
||||||
|
self.logger.info(f" Indikator DI SDG_ONLY_KEYWORDS:")
|
||||||
|
self.logger.info(f" year < {self.sdg_transition_year} → 'MDGs' (data tetap ada, tidak dihapus)")
|
||||||
|
self.logger.info(f" year >= {self.sdg_transition_year} → 'SDGs'")
|
||||||
|
self.logger.info(f" {'─'*72}")
|
||||||
|
|
||||||
self.logger.info(f"\n Verifikasi framework per indikator:")
|
self.logger.info(f"\n Verifikasi framework per indikator:")
|
||||||
self.logger.info(f" {'-'*110}")
|
self.logger.info(f" {'─'*115}")
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
f" {'ID':<5} {'Indicator Name':<52} {'Data From':<12} "
|
f" {'ID':<5} {'Indicator Name':<52} {'Data From':<11} "
|
||||||
f"{'MDGs rows':<12} {'SDGs rows':<12} {'Note'}"
|
f"{'MDGs rows':<11} {'SDGs rows':<11} {'Note'}"
|
||||||
)
|
)
|
||||||
self.logger.info(f" {'-'*110}")
|
self.logger.info(f" {'─'*115}")
|
||||||
|
|
||||||
for ind_id, grp in self.df_clean.groupby('indicator_id'):
|
for ind_id, grp in self.df_clean.groupby('indicator_id'):
|
||||||
ind_name = grp['indicator_name'].iloc[0]
|
ind_name = grp['indicator_name'].iloc[0]
|
||||||
@@ -593,13 +613,17 @@ class AnalyticalLayerLoader:
|
|||||||
data_from = int(grp['year'].min())
|
data_from = int(grp['year'].min())
|
||||||
|
|
||||||
if is_sdg_only:
|
if is_sdg_only:
|
||||||
note = f"SDGs from {self.sdg_transition_year}, MDGs before"
|
mdgs_yrs = sorted(grp[grp['framework'] == 'MDGs']['year'].unique())
|
||||||
|
sdgs_yrs = sorted(grp[grp['framework'] == 'SDGs']['year'].unique())
|
||||||
|
yr_range_mdgs = f"{min(mdgs_yrs)}-{max(mdgs_yrs)}" if mdgs_yrs else "-"
|
||||||
|
yr_range_sdgs = f"{min(sdgs_yrs)}-{max(sdgs_yrs)}" if sdgs_yrs else "-"
|
||||||
|
note = f"MDGs:{yr_range_mdgs} | SDGs:{yr_range_sdgs}"
|
||||||
else:
|
else:
|
||||||
note = "MDGs always"
|
note = "MDGs always"
|
||||||
|
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
f" {int(ind_id):<5} {ind_name[:50]:<52} {data_from:<12} "
|
f" {int(ind_id):<5} {ind_name[:50]:<52} {data_from:<11} "
|
||||||
f"{mdgs_rows:<12} {sdgs_rows:<12} {note}"
|
f"{mdgs_rows:<11} {sdgs_rows:<11} {note}"
|
||||||
)
|
)
|
||||||
|
|
||||||
fw_summary = self.df_clean['framework'].value_counts()
|
fw_summary = self.df_clean['framework'].value_counts()
|
||||||
@@ -978,12 +1002,13 @@ class AnalyticalLayerLoader:
|
|||||||
'end_year' : self.end_year,
|
'end_year' : self.end_year,
|
||||||
'baseline_year' : self.baseline_year,
|
'baseline_year' : self.baseline_year,
|
||||||
'sdg_transition_year' : self.sdg_transition_year,
|
'sdg_transition_year' : self.sdg_transition_year,
|
||||||
|
'sdg_transition_source' : 'HARDCODE — SDGs resmi berlaku 1 Jan 2016',
|
||||||
'fixed_countries' : len(self.selected_country_ids),
|
'fixed_countries' : len(self.selected_country_ids),
|
||||||
'norm_scale' : '1-100 per indicator global minmax direction-aware',
|
'norm_scale' : '1-100 per indicator global minmax direction-aware',
|
||||||
'framework_logic' : (
|
'framework_logic' : (
|
||||||
'sdg_transition_year = min(actual_start_year) dari SDG-only indicators; '
|
f'SDG_TRANSITION_YEAR={SDG_TRANSITION_YEAR} (HARDCODE); '
|
||||||
'SDG-only year >= sdg_transition_year → SDGs; '
|
'SDG-only + year >= SDG_TRANSITION_YEAR → SDGs; '
|
||||||
'SDG-only year < sdg_transition_year → MDGs (data tetap ada); '
|
'SDG-only + year < SDG_TRANSITION_YEAR → MDGs (data tetap ada); '
|
||||||
'non-SDG-only → MDGs selalu'
|
'non-SDG-only → MDGs selalu'
|
||||||
),
|
),
|
||||||
'sdg_only_keywords_count': len(SDG_ONLY_KEYWORDS),
|
'sdg_only_keywords_count': len(SDG_ONLY_KEYWORDS),
|
||||||
@@ -1022,8 +1047,8 @@ class AnalyticalLayerLoader:
|
|||||||
self.logger.info("Kolom baru: norm_value_1_100 (min-max 1-100, direction-aware)")
|
self.logger.info("Kolom baru: norm_value_1_100 (min-max 1-100, direction-aware)")
|
||||||
self.logger.info(f"Condition threshold: bad<{THRESHOLD_BAD}, good>{THRESHOLD_GOOD}")
|
self.logger.info(f"Condition threshold: bad<{THRESHOLD_BAD}, good>{THRESHOLD_GOOD}")
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
"Framework: SDG-only indicators → SDGs mulai sdg_transition_year, "
|
f"Framework: SDG_TRANSITION_YEAR={SDG_TRANSITION_YEAR} (HARDCODE). "
|
||||||
"MDGs sebelumnya (data tetap ada). Non-SDG-only → MDGs selalu."
|
"SDG-only + year >= 2016 → SDGs; sebelumnya MDGs. Non-SDG-only → MDGs selalu."
|
||||||
)
|
)
|
||||||
self.logger.info("=" * 80)
|
self.logger.info("=" * 80)
|
||||||
|
|
||||||
@@ -1032,7 +1057,7 @@ class AnalyticalLayerLoader:
|
|||||||
self.filter_complete_indicators_per_country()
|
self.filter_complete_indicators_per_country()
|
||||||
self.select_countries_with_all_pillars()
|
self.select_countries_with_all_pillars()
|
||||||
self.filter_indicators_consistent_across_fixed_countries()
|
self.filter_indicators_consistent_across_fixed_countries()
|
||||||
self.determine_sdg_start_year()
|
self.assign_framework()
|
||||||
self.verify_no_gaps()
|
self.verify_no_gaps()
|
||||||
self.calculate_norm_value()
|
self.calculate_norm_value()
|
||||||
self.calculate_yoy()
|
self.calculate_yoy()
|
||||||
@@ -1047,7 +1072,7 @@ class AnalyticalLayerLoader:
|
|||||||
self.logger.info("=" * 80)
|
self.logger.info("=" * 80)
|
||||||
self.logger.info(f" Duration : {duration:.2f}s")
|
self.logger.info(f" Duration : {duration:.2f}s")
|
||||||
self.logger.info(f" Year Range : {self.start_year}-{self.end_year}")
|
self.logger.info(f" Year Range : {self.start_year}-{self.end_year}")
|
||||||
self.logger.info(f" SDG Transition Year: {self.sdg_transition_year}")
|
self.logger.info(f" SDG Transition Year: {self.sdg_transition_year} (HARDCODE)")
|
||||||
self.logger.info(f" Countries : {len(self.selected_country_ids)}")
|
self.logger.info(f" Countries : {len(self.selected_country_ids)}")
|
||||||
self.logger.info(f" Indicators : {self.df_clean['indicator_id'].nunique()}")
|
self.logger.info(f" Indicators : {self.df_clean['indicator_id'].nunique()}")
|
||||||
self.logger.info(f" Rows Loaded : {self.pipeline_metadata['rows_loaded']:,}")
|
self.logger.info(f" Rows Loaded : {self.pipeline_metadata['rows_loaded']:,}")
|
||||||
@@ -1076,8 +1101,8 @@ if __name__ == "__main__":
|
|||||||
print(f"Norm: min-max 1-100 per indicator, direction-aware")
|
print(f"Norm: min-max 1-100 per indicator, direction-aware")
|
||||||
print(f"Condition threshold: bad<{THRESHOLD_BAD}, good>{THRESHOLD_GOOD}")
|
print(f"Condition threshold: bad<{THRESHOLD_BAD}, good>{THRESHOLD_GOOD}")
|
||||||
print(
|
print(
|
||||||
"Framework: SDG-only → SDGs mulai sdg_transition_year, MDGs sebelumnya. "
|
f"Framework: SDG_TRANSITION_YEAR={SDG_TRANSITION_YEAR} (HARDCODE). "
|
||||||
"Non-SDG-only → MDGs selalu."
|
"SDG-only + year >= 2016 → SDGs; sebelumnya MDGs. Non-SDG-only → MDGs selalu."
|
||||||
)
|
)
|
||||||
print("=" * 80)
|
print("=" * 80)
|
||||||
|
|
||||||
@@ -1088,6 +1113,6 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
print("\n" + "=" * 80)
|
print("\n" + "=" * 80)
|
||||||
print("[OK] COMPLETED")
|
print("[OK] COMPLETED")
|
||||||
print(f" SDG Transition Year : {loader.sdg_transition_year}")
|
print(f" SDG Transition Year : {loader.sdg_transition_year} (HARDCODE)")
|
||||||
print(f" Rows Loaded : {loader.pipeline_metadata['rows_loaded']:,}")
|
print(f" Rows Loaded : {loader.pipeline_metadata['rows_loaded']:,}")
|
||||||
print("=" * 80)
|
print("=" * 80)
|
||||||
Reference in New Issue
Block a user