done 1
This commit is contained in:
@@ -15,10 +15,11 @@ Filtering Order:
|
|||||||
→ Indikator DI SDG_ONLY_KEYWORDS + year >= SDG_TRANSITION_YEAR → 'SDGs'
|
→ Indikator DI SDG_ONLY_KEYWORDS + year >= SDG_TRANSITION_YEAR → 'SDGs'
|
||||||
→ Indikator DI SDG_ONLY_KEYWORDS + year < SDG_TRANSITION_YEAR → 'MDGs'
|
→ Indikator DI SDG_ONLY_KEYWORDS + year < SDG_TRANSITION_YEAR → 'MDGs'
|
||||||
→ SDG_TRANSITION_YEAR = 2015 (HARDCODE — tanggal resmi SDGs berlaku)
|
→ SDG_TRANSITION_YEAR = 2015 (HARDCODE — tanggal resmi SDGs berlaku)
|
||||||
BUKAN dari actual_start_year data, karena data anaemia/FIES bisa ada
|
|
||||||
sebelum 2015 namun tetap harus dilabeli MDGs pada tahun-tahun tersebut.
|
|
||||||
7. Verify no gaps (dari actual_start_year per indikator, bukan start_year global)
|
7. Verify no gaps (dari actual_start_year per indikator, bukan start_year global)
|
||||||
8. Calculate norm_value_1_100 per indicator (min-max, direction-aware, global)
|
8. Calculate norm_value_1_100 per indicator (min-max, direction-aware, global)
|
||||||
|
*** PERBAIKAN: normalisasi dilakukan SEKALI untuk seluruh data (semua tahun),
|
||||||
|
bukan per-framework, agar nilai dari era MDGs dan SDGs berada di
|
||||||
|
skala yang sama dan dapat dibandingkan secara adil. ***
|
||||||
9. Calculate YoY per indicator per country
|
9. Calculate YoY per indicator per country
|
||||||
10. Analyze indicator availability by year
|
10. Analyze indicator availability by year
|
||||||
11. Save analytical table
|
11. Save analytical table
|
||||||
@@ -26,17 +27,18 @@ Filtering Order:
|
|||||||
FRAMEWORK LOGIC:
|
FRAMEWORK LOGIC:
|
||||||
- SDG_TRANSITION_YEAR = 2015 (HARDCODE, bukan auto-detect dari data)
|
- SDG_TRANSITION_YEAR = 2015 (HARDCODE, bukan auto-detect dari data)
|
||||||
- Semua SDG-only indicators menggunakan SDG_TRANSITION_YEAR yang SAMA
|
- Semua SDG-only indicators menggunakan SDG_TRANSITION_YEAR yang SAMA
|
||||||
sehingga label berubah serentak di satu titik waktu
|
|
||||||
- SDG-only + year < SDG_TRANSITION_YEAR → 'MDGs' (data tetap ada, tidak dihapus)
|
- SDG-only + year < SDG_TRANSITION_YEAR → 'MDGs' (data tetap ada, tidak dihapus)
|
||||||
- SDG-only + year >= SDG_TRANSITION_YEAR → 'SDGs'
|
- SDG-only + year >= SDG_TRANSITION_YEAR → 'SDGs'
|
||||||
- Non-SDG-only indicators → 'MDGs' selalu (di semua tahun)
|
- Non-SDG-only indicators → 'MDGs' selalu (di semua tahun)
|
||||||
|
|
||||||
ALASAN HARDCODE:
|
NORMALISASI (PERBAIKAN):
|
||||||
- SDGs resmi diadopsi PBB pada 25 September 2015 dan mulai berlaku 1 Januari 2015
|
- norm_value_1_100 dihitung SATU KALI per indikator menggunakan seluruh data
|
||||||
- Indikator FIES dan anaemia punya data sebelum 2015 (dari MDGs era)
|
(semua tahun, semua negara) sebagai referensi min-max.
|
||||||
- Jika sdg_transition_year di-auto-detect dari min(actual_start_year),
|
- Ini memastikan nilai 60 di era MDGs dan nilai 60 di era SDGs memiliki
|
||||||
maka akan = 2013 (karena data ada sejak 2013), sehingga semua tahun
|
makna yang SAMA (posisi relatif yang sama dalam distribusi global).
|
||||||
berlabel SDGs — yang secara historis tidak tepat.
|
- Tidak ada rescaling ulang per-framework di layer analitik ini.
|
||||||
|
- Rescaling per-framework (jika diperlukan untuk visualisasi) sebaiknya
|
||||||
|
dilakukan di layer agregasi (analysis_layer) dengan flag eksplisit.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@@ -65,10 +67,6 @@ from google.cloud import bigquery
|
|||||||
# =============================================================================
|
# =============================================================================
|
||||||
# SDG-ONLY INDICATOR KEYWORDS
|
# SDG-ONLY INDICATOR KEYWORDS
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Hanya indikator yang MURNI BARU di era SDGs yang didaftarkan di sini.
|
|
||||||
# Indikator di set ini → 'SDGs' mulai dari SDG_TRANSITION_YEAR (2015).
|
|
||||||
# Semua indikator lain (shared maupun tidak dikenal) → 'MDGs' di semua tahun.
|
|
||||||
|
|
||||||
SDG_ONLY_KEYWORDS = frozenset([
|
SDG_ONLY_KEYWORDS = frozenset([
|
||||||
# TARGET 2.1.1 — Undernourishment
|
# TARGET 2.1.1 — Undernourishment
|
||||||
"prevalence of undernourishment (percent) (3-year average)",
|
"prevalence of undernourishment (percent) (3-year average)",
|
||||||
@@ -111,23 +109,16 @@ SDG_ONLY_KEYWORDS = frozenset([
|
|||||||
# =============================================================================
|
# =============================================================================
|
||||||
# SDG TRANSITION YEAR — HARDCODE
|
# SDG TRANSITION YEAR — HARDCODE
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# SDGs resmi berlaku mulai 1 Januari 2015 (diadopsi PBB 25 September 2015).
|
|
||||||
|
|
||||||
SDG_TRANSITION_YEAR = 2015
|
SDG_TRANSITION_YEAR = 2015
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# THRESHOLD KONDISI (fixed absolute, skala 1-100)
|
# THRESHOLD KONDISI (fixed absolute, skala 1-100)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
THRESHOLD_BAD = 40.0
|
THRESHOLD_BAD = 40.0
|
||||||
THRESHOLD_GOOD = 60.0
|
THRESHOLD_GOOD = 60.0
|
||||||
|
|
||||||
|
|
||||||
def assign_condition(norm_value_1_100: float) -> str:
|
def assign_condition(norm_value_1_100: float) -> str:
|
||||||
"""
|
|
||||||
Assign kondisi berdasarkan norm_value_1_100 (skala 1-100, sudah direction-aware).
|
|
||||||
Returns: 'good' / 'moderate' / 'bad'
|
|
||||||
"""
|
|
||||||
if pd.isna(norm_value_1_100):
|
if pd.isna(norm_value_1_100):
|
||||||
return None
|
return None
|
||||||
if norm_value_1_100 > THRESHOLD_GOOD:
|
if norm_value_1_100 > THRESHOLD_GOOD:
|
||||||
@@ -145,20 +136,10 @@ class AnalyticalLayerLoader:
|
|||||||
"""
|
"""
|
||||||
Analytical Layer Loader for BigQuery
|
Analytical Layer Loader for BigQuery
|
||||||
|
|
||||||
Output kolom fact_asean_food_security_selected:
|
PERBAIKAN NORMALISASI:
|
||||||
country_id, country_name,
|
- norm_value_1_100 dihitung SEKALI per indikator dari seluruh data
|
||||||
indicator_id, indicator_name, direction, framework,
|
(semua tahun, semua negara). Tidak ada rescaling ulang per-framework.
|
||||||
pillar_id, pillar_name,
|
- Ini memastikan komparabilitas lintas era MDGs dan SDGs.
|
||||||
time_id, year, value,
|
|
||||||
norm_value_1_100,
|
|
||||||
yoy_change, yoy_pct
|
|
||||||
|
|
||||||
FRAMEWORK LOGIC:
|
|
||||||
- SDG_TRANSITION_YEAR = 2015 (HARDCODE — tanggal resmi SDGs berlaku)
|
|
||||||
- Indikator TIDAK di SDG_ONLY_KEYWORDS → 'MDGs' di SEMUA tahun
|
|
||||||
- Indikator DI SDG_ONLY_KEYWORDS:
|
|
||||||
year < SDG_TRANSITION_YEAR (2015) → 'MDGs' (data tetap ada, tidak dihapus)
|
|
||||||
year >= SDG_TRANSITION_YEAR (2015) → 'SDGs'
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, client: bigquery.Client):
|
def __init__(self, client: bigquery.Client):
|
||||||
@@ -172,13 +153,12 @@ class AnalyticalLayerLoader:
|
|||||||
self.df_pillar = None
|
self.df_pillar = None
|
||||||
|
|
||||||
self.selected_country_ids = None
|
self.selected_country_ids = None
|
||||||
self.indicator_max_start_map = {} # indicator_id → max_start_year (dari Step 5)
|
self.indicator_max_start_map = {}
|
||||||
|
|
||||||
self.start_year = 2013
|
self.start_year = 2013
|
||||||
self.end_year = None
|
self.end_year = None
|
||||||
self.baseline_year = 2023
|
self.baseline_year = 2023
|
||||||
|
|
||||||
# SDG_TRANSITION_YEAR diambil dari konstanta modul (HARDCODE = 2015)
|
|
||||||
self.sdg_transition_year = SDG_TRANSITION_YEAR
|
self.sdg_transition_year = SDG_TRANSITION_YEAR
|
||||||
|
|
||||||
self.pipeline_metadata = {
|
self.pipeline_metadata = {
|
||||||
@@ -429,8 +409,6 @@ class AnalyticalLayerLoader:
|
|||||||
self.logger.info("STEP 5: FILTER INDICATORS WITH CONSISTENT PRESENCE")
|
self.logger.info("STEP 5: FILTER INDICATORS WITH CONSISTENT PRESENCE")
|
||||||
self.logger.info("=" * 80)
|
self.logger.info("=" * 80)
|
||||||
|
|
||||||
# Hitung max_start_year per indikator = max(min_year per country)
|
|
||||||
# = tahun pertama di mana SEMUA fixed countries sudah punya data
|
|
||||||
indicator_country_start = self.df_clean.groupby([
|
indicator_country_start = self.df_clean.groupby([
|
||||||
'indicator_id', 'indicator_name', 'country_id'
|
'indicator_id', 'indicator_name', 'country_id'
|
||||||
])['year'].min().reset_index()
|
])['year'].min().reset_index()
|
||||||
@@ -459,8 +437,6 @@ class AnalyticalLayerLoader:
|
|||||||
})
|
})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Cek apakah semua tahun dari max_start s/d end_year
|
|
||||||
# hadir di SEMUA fixed countries
|
|
||||||
expected_years = list(range(max_start, self.end_year + 1))
|
expected_years = list(range(max_start, self.end_year + 1))
|
||||||
ind_data = self.df_clean[self.df_clean['indicator_id'] == indicator_id]
|
ind_data = self.df_clean[self.df_clean['indicator_id'] == indicator_id]
|
||||||
all_years_complete = True
|
all_years_complete = True
|
||||||
@@ -486,18 +462,11 @@ class AnalyticalLayerLoader:
|
|||||||
if not valid_indicators:
|
if not valid_indicators:
|
||||||
raise ValueError("No valid indicators found after filtering!")
|
raise ValueError("No valid indicators found after filtering!")
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
# Filter hanya indikator yang valid.
|
|
||||||
# PENTING: TIDAK menghapus baris year < max_start_year.
|
|
||||||
# Semua baris tetap ada — label framework ditentukan di Step 6.
|
|
||||||
# max_start_year disimpan sebagai lookup untuk Step 7.
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
original_count = len(self.df_clean)
|
original_count = len(self.df_clean)
|
||||||
self.df_clean = self.df_clean[
|
self.df_clean = self.df_clean[
|
||||||
self.df_clean['indicator_id'].isin(valid_indicators)
|
self.df_clean['indicator_id'].isin(valid_indicators)
|
||||||
].copy()
|
].copy()
|
||||||
|
|
||||||
# Simpan max_start_year per indicator_id untuk Step 7
|
|
||||||
self.indicator_max_start_map = (
|
self.indicator_max_start_map = (
|
||||||
indicator_max_start[indicator_max_start['indicator_id'].isin(valid_indicators)]
|
indicator_max_start[indicator_max_start['indicator_id'].isin(valid_indicators)]
|
||||||
.set_index('indicator_id')['max_start_year']
|
.set_index('indicator_id')['max_start_year']
|
||||||
@@ -524,24 +493,11 @@ class AnalyticalLayerLoader:
|
|||||||
self.logger.info("STEP 6: ASSIGN FRAMEWORK PER ROW")
|
self.logger.info("STEP 6: ASSIGN FRAMEWORK PER ROW")
|
||||||
self.logger.info("=" * 80)
|
self.logger.info("=" * 80)
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
# SDG_TRANSITION_YEAR = 2015 (HARDCODE)
|
|
||||||
# SDGs diadopsi PBB 25 September 2015, berlaku 1 Januari 2015.
|
|
||||||
#
|
|
||||||
# PENTING — TIDAK dihitung dari data:
|
|
||||||
# Jika auto-detect dari min(actual_start_year SDG-only indicators),
|
|
||||||
# hasilnya = 2013 (karena data FIES/anaemia ada sejak 2013).
|
|
||||||
# Akibatnya year >= 2013 → SDGs → SEMUA tahun berlabel SDGs.
|
|
||||||
# Ini secara historis salah karena SDGs belum berlaku di 2013-2015.
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
self.logger.info(f"\n SDG_TRANSITION_YEAR : {self.sdg_transition_year} (HARDCODE)")
|
self.logger.info(f"\n SDG_TRANSITION_YEAR : {self.sdg_transition_year} (HARDCODE)")
|
||||||
self.logger.info(f" Alasan : SDGs resmi berlaku 1 Januari 2015")
|
self.logger.info(f" Alasan : SDGs resmi berlaku 1 Januari 2015")
|
||||||
self.logger.info(f" Bukan auto-detect : data FIES/anaemia ada sejak 2013,")
|
self.logger.info(f" Bukan auto-detect : data FIES/anaemia ada sejak 2013,")
|
||||||
self.logger.info(f" tapi tahun 2013-2015 harus tetap MDGs")
|
self.logger.info(f" tapi tahun 2013-2014 harus tetap MDGs")
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
# Identifikasi indikator SDG-only berdasarkan SDG_ONLY_KEYWORDS
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
indicator_info = (
|
indicator_info = (
|
||||||
self.df_clean[['indicator_id', 'indicator_name']]
|
self.df_clean[['indicator_id', 'indicator_name']]
|
||||||
.drop_duplicates()
|
.drop_duplicates()
|
||||||
@@ -571,25 +527,12 @@ class AnalyticalLayerLoader:
|
|||||||
|
|
||||||
self.logger.info(f"\n Non-SDG-only indicators ({len(non_sdg_ids)}): → MDGs selalu")
|
self.logger.info(f"\n Non-SDG-only indicators ({len(non_sdg_ids)}): → MDGs selalu")
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
# Validasi: pastikan ada SDG-only indicators yang lolos filter
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
if not sdg_only_ids:
|
if not sdg_only_ids:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Tidak ada indikator SDG-only (FIES/anaemia) yang lolos filter. "
|
"Tidak ada indikator SDG-only (FIES/anaemia) yang lolos filter. "
|
||||||
"Pastikan nama indikator di SDG_ONLY_KEYWORDS cocok dengan data BigQuery."
|
"Pastikan nama indikator di SDG_ONLY_KEYWORDS cocok dengan data BigQuery."
|
||||||
)
|
)
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
# Assign framework dengan vectorized np.where:
|
|
||||||
#
|
|
||||||
# Kondisi SDG-only AND year >= SDG_TRANSITION_YEAR → 'SDGs'
|
|
||||||
# Semua kondisi lain (non-SDG-only ATAU year < SDG_TRANSITION_YEAR) → 'MDGs'
|
|
||||||
#
|
|
||||||
# Hasilnya dalam 1 indikator SDG-only (misal anaemia, data mulai 2013):
|
|
||||||
# 2013, 2014, 2015 → 'MDGs' (data tetap ada)
|
|
||||||
# 2015, 2017, ... → 'SDGs'
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
self.df_clean['_is_sdg_only'] = self.df_clean['indicator_id'].isin(sdg_only_ids)
|
self.df_clean['_is_sdg_only'] = self.df_clean['indicator_id'].isin(sdg_only_ids)
|
||||||
|
|
||||||
self.df_clean['framework'] = np.where(
|
self.df_clean['framework'] = np.where(
|
||||||
@@ -601,9 +544,6 @@ class AnalyticalLayerLoader:
|
|||||||
|
|
||||||
self.df_clean = self.df_clean.drop(columns=['_is_sdg_only'])
|
self.df_clean = self.df_clean.drop(columns=['_is_sdg_only'])
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
# Log verifikasi per indikator — tampilkan split MDGs/SDGs per tahun
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
self.logger.info(f"\n Logika assign framework (PER BARIS):")
|
self.logger.info(f"\n Logika assign framework (PER BARIS):")
|
||||||
self.logger.info(f" {'─'*72}")
|
self.logger.info(f" {'─'*72}")
|
||||||
self.logger.info(f" Indikator TIDAK di SDG_ONLY_KEYWORDS → 'MDGs' di semua tahun")
|
self.logger.info(f" Indikator TIDAK di SDG_ONLY_KEYWORDS → 'MDGs' di semua tahun")
|
||||||
@@ -668,13 +608,6 @@ class AnalyticalLayerLoader:
|
|||||||
self.logger.info("STEP 7: VERIFY NO GAPS")
|
self.logger.info("STEP 7: VERIFY NO GAPS")
|
||||||
self.logger.info("=" * 80)
|
self.logger.info("=" * 80)
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
# Verifikasi dilakukan PER INDIKATOR dari actual_start_year-nya,
|
|
||||||
# bukan dari self.start_year global, karena tiap indikator bisa
|
|
||||||
# punya start year berbeda.
|
|
||||||
# Baris sebelum actual_start_year (yang berlabel MDGs) tidak dicek
|
|
||||||
# karena memang tidak semua country punya data di sana.
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
expected_countries = len(self.selected_country_ids)
|
expected_countries = len(self.selected_country_ids)
|
||||||
all_good = True
|
all_good = True
|
||||||
bad_rows = []
|
bad_rows = []
|
||||||
@@ -714,15 +647,31 @@ class AnalyticalLayerLoader:
|
|||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# STEP 8: CALCULATE NORM_VALUE_1_100 PER INDICATOR
|
# STEP 8: CALCULATE NORM_VALUE_1_100 PER INDICATOR
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
# PERBAIKAN:
|
||||||
|
# Normalisasi dilakukan SEKALI per indikator dari SELURUH DATA
|
||||||
|
# (semua tahun 2013–end_year, semua negara, tanpa memisahkan framework).
|
||||||
|
#
|
||||||
|
# Alasan:
|
||||||
|
# - Sebelumnya, rescaling per-framework di analysis_layer menyebabkan
|
||||||
|
# nilai 1-100 era MDGs dan SDGs memiliki referensi yang berbeda,
|
||||||
|
# sehingga tidak dapat dibandingkan secara adil.
|
||||||
|
# - Dengan satu normalisasi global per indikator, nilai 60 di era MDGs
|
||||||
|
# dan nilai 60 di era SDGs berarti hal yang sama: posisi relatif yang
|
||||||
|
# sama dalam distribusi historis indikator tersebut.
|
||||||
|
# - Jika SDGs memang era yang lebih buruk secara substantif, itu akan
|
||||||
|
# tercermin sebagai nilai norm yang memang lebih rendah — bukan artefak
|
||||||
|
# dari rescaling ulang.
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
def calculate_norm_value(self):
|
def calculate_norm_value(self):
|
||||||
"""
|
|
||||||
Hitung norm_value_1_100 per indikator — min-max normalisasi skala 1-100,
|
|
||||||
direction-aware, global per indikator (semua negara + semua tahun).
|
|
||||||
"""
|
|
||||||
self.logger.info("\n" + "=" * 80)
|
self.logger.info("\n" + "=" * 80)
|
||||||
self.logger.info("STEP 8: CALCULATE NORM_VALUE_1_100 PER INDICATOR")
|
self.logger.info("STEP 8: CALCULATE NORM_VALUE_1_100 PER INDICATOR (GLOBAL, SEKALI)")
|
||||||
self.logger.info("=" * 80)
|
self.logger.info("=" * 80)
|
||||||
|
self.logger.info(
|
||||||
|
"\n [PERBAIKAN] Normalisasi dilakukan SEKALI per indikator dari seluruh data."
|
||||||
|
"\n Tidak ada rescaling ulang per-framework."
|
||||||
|
"\n Ini memastikan komparabilitas lintas era MDGs dan SDGs."
|
||||||
|
)
|
||||||
|
|
||||||
DIRECTION_INVERT = frozenset({
|
DIRECTION_INVERT = frozenset({
|
||||||
"negative", "lower_better", "lower_is_better", "inverse", "neg",
|
"negative", "lower_better", "lower_is_better", "inverse", "neg",
|
||||||
@@ -747,6 +696,10 @@ class AnalyticalLayerLoader:
|
|||||||
if n_valid < 2:
|
if n_valid < 2:
|
||||||
grp['norm_value_1_100'] = np.nan
|
grp['norm_value_1_100'] = np.nan
|
||||||
norm_parts.append(grp)
|
norm_parts.append(grp)
|
||||||
|
self.logger.warning(
|
||||||
|
f" {int(ind_id):<5} {direction:<15} {'N/A':<8} "
|
||||||
|
f"{'N/A':>10} {'N/A':>10} {ind_name[:45]} [SKIPPED: n_valid={n_valid}]"
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
raw = grp.loc[valid_mask, 'value'].values
|
raw = grp.loc[valid_mask, 'value'].values
|
||||||
@@ -755,6 +708,7 @@ class AnalyticalLayerLoader:
|
|||||||
normed = np.full(len(grp), np.nan)
|
normed = np.full(len(grp), np.nan)
|
||||||
|
|
||||||
if v_min == v_max:
|
if v_min == v_max:
|
||||||
|
# Semua nilai sama → assign tengah skala
|
||||||
normed[valid_mask.values] = 50.5
|
normed[valid_mask.values] = 50.5
|
||||||
else:
|
else:
|
||||||
scaled = (raw - v_min) / (v_max - v_min)
|
scaled = (raw - v_min) / (v_max - v_min)
|
||||||
@@ -781,6 +735,53 @@ class AnalyticalLayerLoader:
|
|||||||
f"{self.df_clean['norm_value_1_100'].max():.2f}"
|
f"{self.df_clean['norm_value_1_100'].max():.2f}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# VALIDASI KOMPARABILITAS: Cek apakah ada gap sistematis antar era
|
||||||
|
# Ini adalah sinyal diagnostik — bukan error.
|
||||||
|
# Gap besar (>15 poin) setelah perbaikan = fenomena nyata, bukan artefak.
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
self.logger.info(f"\n [DIAGNOSTIK KOMPARABILITAS] Rata-rata norm per framework per tahun:")
|
||||||
|
self.logger.info(f" {'─'*55}")
|
||||||
|
|
||||||
|
fw_year_mean = (
|
||||||
|
self.df_clean
|
||||||
|
.groupby(['framework', 'year'])['norm_value_1_100']
|
||||||
|
.mean()
|
||||||
|
.reset_index()
|
||||||
|
.sort_values(['framework', 'year'])
|
||||||
|
)
|
||||||
|
for fw, grp_fw in fw_year_mean.groupby('framework'):
|
||||||
|
means = grp_fw['norm_value_1_100'].values
|
||||||
|
years = grp_fw['year'].values
|
||||||
|
self.logger.info(f"\n Framework: {fw}")
|
||||||
|
for yr, m in zip(years, means):
|
||||||
|
bar = '█' * int(m / 5)
|
||||||
|
self.logger.info(f" {int(yr)} : {m:6.2f} {bar}")
|
||||||
|
|
||||||
|
# Bandingkan rata-rata MDGs vs SDGs (hanya tahun di mana keduanya ada)
|
||||||
|
mdgs_mean_total = self.df_clean[self.df_clean['framework'] == 'MDGs']['norm_value_1_100'].mean()
|
||||||
|
sdgs_mean_total = self.df_clean[self.df_clean['framework'] == 'SDGs']['norm_value_1_100'].mean()
|
||||||
|
gap = mdgs_mean_total - sdgs_mean_total
|
||||||
|
self.logger.info(
|
||||||
|
f"\n Rata-rata keseluruhan:"
|
||||||
|
f"\n MDGs : {mdgs_mean_total:.2f}"
|
||||||
|
f"\n SDGs : {sdgs_mean_total:.2f}"
|
||||||
|
f"\n Gap : {gap:.2f} poin"
|
||||||
|
)
|
||||||
|
if abs(gap) > 15:
|
||||||
|
self.logger.info(
|
||||||
|
f"\n [INFO] Gap {gap:.2f} poin antara MDGs dan SDGs."
|
||||||
|
f"\n Setelah perbaikan normalisasi (satu referensi global),"
|
||||||
|
f"\n gap ini mencerminkan perbedaan SUBSTANTIF, bukan artefak teknis."
|
||||||
|
f"\n Indikator SDGs memang mengukur dimensi deprivasi yang lebih dalam"
|
||||||
|
f"\n (FIES, stunting, wasting, anaemia) dibanding indikator MDGs."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.logger.info(
|
||||||
|
f"\n [OK] Gap {gap:.2f} poin — dalam batas wajar, tidak ada bias sistematis."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Distribusi kondisi
|
||||||
self.df_clean['_condition_preview'] = (
|
self.df_clean['_condition_preview'] = (
|
||||||
self.df_clean['norm_value_1_100'].apply(assign_condition)
|
self.df_clean['norm_value_1_100'].apply(assign_condition)
|
||||||
)
|
)
|
||||||
@@ -1019,7 +1020,11 @@ class AnalyticalLayerLoader:
|
|||||||
'sdg_transition_year' : self.sdg_transition_year,
|
'sdg_transition_year' : self.sdg_transition_year,
|
||||||
'sdg_transition_source' : 'HARDCODE — SDGs resmi berlaku 1 Jan 2015',
|
'sdg_transition_source' : 'HARDCODE — SDGs resmi berlaku 1 Jan 2015',
|
||||||
'fixed_countries' : len(self.selected_country_ids),
|
'fixed_countries' : len(self.selected_country_ids),
|
||||||
'norm_scale' : '1-100 per indicator global minmax direction-aware',
|
'norm_scale' : (
|
||||||
|
'1-100 per indicator global minmax direction-aware. '
|
||||||
|
'SATU normalisasi untuk seluruh data tanpa rescaling per-framework. '
|
||||||
|
'Komparabilitas lintas era MDGs/SDGs terjamin.'
|
||||||
|
),
|
||||||
'framework_logic' : (
|
'framework_logic' : (
|
||||||
f'SDG_TRANSITION_YEAR={SDG_TRANSITION_YEAR} (HARDCODE); '
|
f'SDG_TRANSITION_YEAR={SDG_TRANSITION_YEAR} (HARDCODE); '
|
||||||
'SDG-only + year >= SDG_TRANSITION_YEAR → SDGs; '
|
'SDG-only + year >= SDG_TRANSITION_YEAR → SDGs; '
|
||||||
@@ -1065,6 +1070,9 @@ class AnalyticalLayerLoader:
|
|||||||
f"Framework: SDG_TRANSITION_YEAR={SDG_TRANSITION_YEAR} (HARDCODE). "
|
f"Framework: SDG_TRANSITION_YEAR={SDG_TRANSITION_YEAR} (HARDCODE). "
|
||||||
"SDG-only + year >= 2015 → SDGs; sebelumnya MDGs. Non-SDG-only → MDGs selalu."
|
"SDG-only + year >= 2015 → SDGs; sebelumnya MDGs. Non-SDG-only → MDGs selalu."
|
||||||
)
|
)
|
||||||
|
self.logger.info(
|
||||||
|
"NORMALISASI: SATU referensi global per indikator — tidak ada rescaling per-framework."
|
||||||
|
)
|
||||||
self.logger.info("=" * 80)
|
self.logger.info("=" * 80)
|
||||||
|
|
||||||
self.load_source_data()
|
self.load_source_data()
|
||||||
@@ -1113,7 +1121,7 @@ if __name__ == "__main__":
|
|||||||
print("=" * 80)
|
print("=" * 80)
|
||||||
print("BIGQUERY ANALYTICAL LAYER - DATA FILTERING")
|
print("BIGQUERY ANALYTICAL LAYER - DATA FILTERING")
|
||||||
print("Output: fact_asean_food_security_selected -> fs_asean_gold")
|
print("Output: fact_asean_food_security_selected -> fs_asean_gold")
|
||||||
print(f"Norm: min-max 1-100 per indicator, direction-aware")
|
print(f"Norm: min-max 1-100 per indicator, direction-aware, GLOBAL (satu referensi)")
|
||||||
print(f"Condition threshold: bad<{THRESHOLD_BAD}, good>{THRESHOLD_GOOD}")
|
print(f"Condition threshold: bad<{THRESHOLD_BAD}, good>{THRESHOLD_GOOD}")
|
||||||
print(
|
print(
|
||||||
f"Framework: SDG_TRANSITION_YEAR={SDG_TRANSITION_YEAR} (HARDCODE). "
|
f"Framework: SDG_TRANSITION_YEAR={SDG_TRANSITION_YEAR} (HARDCODE). "
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user