salah file
This commit is contained in:
@@ -2,19 +2,32 @@
|
|||||||
BIGQUERY ANALYSIS LAYER - FOOD SECURITY AGGREGATION
|
BIGQUERY ANALYSIS LAYER - FOOD SECURITY AGGREGATION
|
||||||
Semua agregasi pakai norm_value dari _get_norm_value_df()
|
Semua agregasi pakai norm_value dari _get_norm_value_df()
|
||||||
|
|
||||||
UPDATED:
|
PERBAIKAN (vs versi sebelumnya):
|
||||||
- _classify_indicators() membaca kolom 'framework' langsung dari
|
─────────────────────────────────────────────────────────────────────────────
|
||||||
fact_asean_food_security_selected (sudah di-assign di analytical_layer
|
1. NORMALIZE_FRAMEWORKS_JOINTLY dihapus.
|
||||||
berdasarkan SDG_INDICATOR_KEYWORDS + actual_start_year).
|
Setelah perbaikan di analytical_layer, norm_value_1_100 sudah dihitung
|
||||||
- Kolom 'condition' (good/moderate/bad) ditambahkan ke semua tabel agregasi:
|
SEKALI per indikator dari seluruh data (semua tahun, semua negara).
|
||||||
* agg_pillar_composite
|
Tidak ada lagi rescaling ulang per-framework di layer ini.
|
||||||
* agg_pillar_by_country
|
Semua framework (MDGs, SDGs, Total) menggunakan norm_value yang SAMA
|
||||||
* agg_framework_by_country
|
sebagai basis, sehingga skor mereka berada pada skala yang setara.
|
||||||
* agg_framework_asean
|
|
||||||
Threshold fixed absolute (skala 1-100, direction-aware):
|
2. _get_norm_value_df() DISEDERHANAKAN.
|
||||||
bad : score < 40
|
Fungsi ini sekarang hanya membaca kolom norm_value_1_100 yang sudah ada
|
||||||
moderate : 40 <= score <= 60
|
di fact_asean_food_security_selected (hasil dari analytical_layer),
|
||||||
good : score > 60
|
kemudian memetakan ke skala 0-1 untuk keperluan agregasi internal.
|
||||||
|
TIDAK ada lagi normalisasi ulang per indikator di sini.
|
||||||
|
|
||||||
|
3. global_minmax() TETAP DIGUNAKAN untuk mengubah rata-rata norm (0-1) menjadi
|
||||||
|
skor 1-100 di level agregasi (pillar / country / asean).
|
||||||
|
Ini adalah rescaling level AGREGAT (bukan level indikator), sehingga masih
|
||||||
|
valid dan tidak menimbulkan bias komparabilitas.
|
||||||
|
|
||||||
|
4. Framework MDGs dan SDGs sekarang comparable:
|
||||||
|
- Jika skor SDGs < skor MDGs → memang karena indikator SDGs mengukur
|
||||||
|
dimensi deprivasi yang lebih dalam (substantif), bukan artefak teknis.
|
||||||
|
- Log diagnostik ditambahkan untuk memverifikasi ini.
|
||||||
|
|
||||||
|
5. Kolom 'condition' (good/moderate/bad) TETAP dengan threshold yang sama.
|
||||||
|
|
||||||
Simpan 6 tabel ke fs_asean_gold (layer='gold'):
|
Simpan 6 tabel ke fs_asean_gold (layer='gold'):
|
||||||
- agg_pillar_composite
|
- agg_pillar_composite
|
||||||
@@ -57,10 +70,7 @@ DIRECTION_POSITIVE_KEYWORDS = frozenset({
|
|||||||
"positive", "higher_better", "higher_is_better",
|
"positive", "higher_better", "higher_is_better",
|
||||||
})
|
})
|
||||||
|
|
||||||
NORMALIZE_FRAMEWORKS_JOINTLY = False
|
|
||||||
|
|
||||||
# Threshold kondisi — fixed absolute, skala 1-100
|
# Threshold kondisi — fixed absolute, skala 1-100
|
||||||
# Konsisten dengan THRESHOLD_BAD / THRESHOLD_GOOD di analytical_layer
|
|
||||||
THRESHOLD_BAD = 40.0
|
THRESHOLD_BAD = 40.0
|
||||||
THRESHOLD_GOOD = 60.0
|
THRESHOLD_GOOD = 60.0
|
||||||
|
|
||||||
@@ -118,6 +128,11 @@ def _should_invert(direction: str, logger=None, context: str = "") -> bool:
|
|||||||
|
|
||||||
|
|
||||||
def global_minmax(series: pd.Series, lo: float = 1.0, hi: float = 100.0) -> pd.Series:
|
def global_minmax(series: pd.Series, lo: float = 1.0, hi: float = 100.0) -> pd.Series:
|
||||||
|
"""
|
||||||
|
Rescale series ke rentang [lo, hi].
|
||||||
|
Digunakan untuk mengubah norm agregat (0-1) menjadi skor 1-100 di level
|
||||||
|
pillar / country / asean. Bukan untuk normalisasi indikator mentah.
|
||||||
|
"""
|
||||||
values = series.dropna().values
|
values = series.dropna().values
|
||||||
if len(values) == 0:
|
if len(values) == 0:
|
||||||
return pd.Series(np.nan, index=series.index)
|
return pd.Series(np.nan, index=series.index)
|
||||||
@@ -172,16 +187,11 @@ def check_and_dedup(
|
|||||||
|
|
||||||
|
|
||||||
def add_condition_column(df: pd.DataFrame, score_col: str) -> pd.DataFrame:
|
def add_condition_column(df: pd.DataFrame, score_col: str) -> pd.DataFrame:
|
||||||
"""
|
|
||||||
Tambahkan kolom 'condition' berdasarkan score_col.
|
|
||||||
Threshold: bad < 40, moderate 40-60, good > 60 (skala 1-100).
|
|
||||||
"""
|
|
||||||
df['condition'] = df[score_col].apply(assign_condition)
|
df['condition'] = df[score_col].apply(assign_condition)
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
def log_condition_summary(df: pd.DataFrame, context: str, logger) -> None:
|
def log_condition_summary(df: pd.DataFrame, context: str, logger) -> None:
|
||||||
"""Log distribusi kondisi untuk verifikasi."""
|
|
||||||
dist = df['condition'].value_counts()
|
dist = df['condition'].value_counts()
|
||||||
logger.info(
|
logger.info(
|
||||||
f" Condition distribution ({context}): " +
|
f" Condition distribution ({context}): " +
|
||||||
@@ -190,7 +200,7 @@ def log_condition_summary(df: pd.DataFrame, context: str, logger) -> None:
|
|||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# NARRATIVE BUILDER FUNCTIONS
|
# NARRATIVE BUILDER FUNCTIONS (tidak berubah)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
def _fmt_score(score) -> str:
|
def _fmt_score(score) -> str:
|
||||||
@@ -426,6 +436,8 @@ class FoodSecurityAggregator:
|
|||||||
"indicator_id", "indicator_name", "direction", "framework",
|
"indicator_id", "indicator_name", "direction", "framework",
|
||||||
"pillar_id", "pillar_name",
|
"pillar_id", "pillar_name",
|
||||||
"time_id", "year", "value",
|
"time_id", "year", "value",
|
||||||
|
# PERBAIKAN: norm_value_1_100 wajib ada (hasil analytical_layer)
|
||||||
|
"norm_value_1_100",
|
||||||
}
|
}
|
||||||
missing_cols = required_cols - set(self.df.columns)
|
missing_cols = required_cols - set(self.df.columns)
|
||||||
if missing_cols:
|
if missing_cols:
|
||||||
@@ -434,12 +446,13 @@ class FoodSecurityAggregator:
|
|||||||
f"Pastikan pipeline dijalankan berurutan:\n"
|
f"Pastikan pipeline dijalankan berurutan:\n"
|
||||||
f" 1. bigquery_cleaned_layer.py\n"
|
f" 1. bigquery_cleaned_layer.py\n"
|
||||||
f" 2. bigquery_dimensional_model.py\n"
|
f" 2. bigquery_dimensional_model.py\n"
|
||||||
f" 3. bigquery_analytical_layer.py\n"
|
f" 3. bigquery_analytical_layer.py ← harus dijalankan dulu\n"
|
||||||
f" 4. bigquery_analysis_layer.py (file ini)"
|
f" 4. bigquery_analysis_layer.py (file ini)"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.df["direction"] = self.df["direction"].fillna("positive")
|
self.df["direction"] = self.df["direction"].fillna("positive")
|
||||||
self.df["framework"] = self.df["framework"].fillna("MDGs")
|
self.df["framework"] = self.df["framework"].fillna("MDGs")
|
||||||
|
self.df["norm_value_1_100"] = self.df["norm_value_1_100"].astype(float)
|
||||||
|
|
||||||
dir_dist = self.df.drop_duplicates("indicator_id")["direction"].value_counts()
|
dir_dist = self.df.drop_duplicates("indicator_id")["direction"].value_counts()
|
||||||
self.logger.info(f"\n Distribusi direction per indikator:")
|
self.logger.info(f"\n Distribusi direction per indikator:")
|
||||||
@@ -458,6 +471,45 @@ class FoodSecurityAggregator:
|
|||||||
f"Tahun: {int(self.df['year'].min())}-{int(self.df['year'].max())}"
|
f"Tahun: {int(self.df['year'].min())}-{int(self.df['year'].max())}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Diagnostik: cek komparabilitas norm antar framework
|
||||||
|
self._log_norm_comparability_diagnostics()
|
||||||
|
|
||||||
|
def _log_norm_comparability_diagnostics(self):
|
||||||
|
"""
|
||||||
|
Log diagnostik untuk memverifikasi bahwa norm_value_1_100 sudah comparable
|
||||||
|
antar framework setelah perbaikan di analytical_layer.
|
||||||
|
"""
|
||||||
|
self.logger.info(f"\n [DIAGNOSTIK] Komparabilitas norm_value_1_100 antar framework:")
|
||||||
|
self.logger.info(f" {'─'*60}")
|
||||||
|
|
||||||
|
fw_stats = (
|
||||||
|
self.df.groupby('framework')['norm_value_1_100']
|
||||||
|
.agg(['mean', 'median', 'std', 'min', 'max'])
|
||||||
|
.round(2)
|
||||||
|
)
|
||||||
|
for fw, row in fw_stats.iterrows():
|
||||||
|
self.logger.info(
|
||||||
|
f" {fw:<8} mean={row['mean']:>6.2f} median={row['median']:>6.2f} "
|
||||||
|
f"std={row['std']:>5.2f} range=[{row['min']:.2f},{row['max']:.2f}]"
|
||||||
|
)
|
||||||
|
|
||||||
|
mdgs_mean = self.df[self.df['framework'] == 'MDGs']['norm_value_1_100'].mean()
|
||||||
|
sdgs_mean = self.df[self.df['framework'] == 'SDGs']['norm_value_1_100'].mean()
|
||||||
|
gap = mdgs_mean - sdgs_mean
|
||||||
|
|
||||||
|
if abs(gap) > 15:
|
||||||
|
self.logger.info(
|
||||||
|
f"\n [INFO] Gap MDGs-SDGs = {gap:.2f} poin."
|
||||||
|
f"\n Ini adalah perbedaan SUBSTANTIF (bukan artefak normalisasi):"
|
||||||
|
f"\n Indikator SDGs mengukur deprivasi yang lebih dalam"
|
||||||
|
f"\n (FIES, stunting, wasting, anaemia) vs indikator MDGs."
|
||||||
|
f"\n Gap ini valid untuk dilaporkan sebagai temuan analisis."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.logger.info(
|
||||||
|
f"\n [OK] Gap MDGs-SDGs = {gap:.2f} poin — dalam batas wajar."
|
||||||
|
)
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# STEP 1b: Klasifikasi indikator
|
# STEP 1b: Klasifikasi indikator
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
@@ -474,8 +526,6 @@ class FoodSecurityAggregator:
|
|||||||
self.df[self.df["framework"] == "SDGs"]["indicator_id"].unique().tolist()
|
self.df[self.df["framework"] == "SDGs"]["indicator_id"].unique().tolist()
|
||||||
)
|
)
|
||||||
|
|
||||||
# sdgs_start_year: ambil dari proxy SDGs-only (FIES/anaemia)
|
|
||||||
# Konsisten dengan cara analytical_layer mendeteksinya
|
|
||||||
_PROXY_KW = frozenset(['food insecurity', 'anemia', 'anaemia'])
|
_PROXY_KW = frozenset(['food insecurity', 'anemia', 'anaemia'])
|
||||||
proxy_mask = (
|
proxy_mask = (
|
||||||
(self.df["framework"] == "SDGs") &
|
(self.df["framework"] == "SDGs") &
|
||||||
@@ -492,7 +542,6 @@ class FoodSecurityAggregator:
|
|||||||
f"(dari proxy FIES/anaemia di tabel)"
|
f"(dari proxy FIES/anaemia di tabel)"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Fallback: min year dari semua SDGs rows
|
|
||||||
sdgs_rows = self.df[self.df["framework"] == "SDGs"]
|
sdgs_rows = self.df[self.df["framework"] == "SDGs"]
|
||||||
if not sdgs_rows.empty:
|
if not sdgs_rows.empty:
|
||||||
self.sdgs_start_year = int(sdgs_rows["year"].min())
|
self.sdgs_start_year = int(sdgs_rows["year"].min())
|
||||||
@@ -520,39 +569,48 @@ class FoodSecurityAggregator:
|
|||||||
self.logger.info(f" [{int(row['indicator_id'])}] {row['indicator_name']}")
|
self.logger.info(f" [{int(row['indicator_id'])}] {row['indicator_name']}")
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# CORE HELPER: normalisasi 0-1 per indikator (untuk composite score)
|
# CORE HELPER: _get_norm_value_df()
|
||||||
|
# =========================================================================
|
||||||
|
# PERBAIKAN:
|
||||||
|
# Fungsi ini TIDAK lagi melakukan normalisasi ulang per indikator.
|
||||||
|
# Kolom norm_value_1_100 sudah dihitung sekali di analytical_layer
|
||||||
|
# dengan referensi global (semua tahun, semua negara, per indikator).
|
||||||
|
#
|
||||||
|
# Yang dilakukan di sini hanya:
|
||||||
|
# 1. Membaca norm_value_1_100 dari df
|
||||||
|
# 2. Mengubah skala 1-100 → 0-1 (untuk keperluan rata-rata agregat)
|
||||||
|
# dengan rumus linear: norm_0_1 = (norm_1_100 - 1) / 99
|
||||||
|
#
|
||||||
|
# Rescaling agregat (0-1 → 1-100) tetap dilakukan via global_minmax()
|
||||||
|
# di masing-masing fungsi calc_* untuk menghasilkan skor level pillar/country/asean.
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
def _get_norm_value_df(self) -> pd.DataFrame:
|
def _get_norm_value_df(self) -> pd.DataFrame:
|
||||||
norm_parts = []
|
"""
|
||||||
for ind_id, grp in self.df.groupby("indicator_id"):
|
Mengembalikan df dengan kolom 'norm_value' (skala 0-1) yang diturunkan
|
||||||
grp = grp.copy()
|
dari norm_value_1_100 (sudah ada di source, dihitung di analytical_layer).
|
||||||
direction = str(grp["direction"].iloc[0])
|
|
||||||
do_invert = _should_invert(direction, self.logger, context=f"indicator_id={ind_id}")
|
|
||||||
valid_mask = grp["value"].notna()
|
|
||||||
n_valid = valid_mask.sum()
|
|
||||||
|
|
||||||
if n_valid < 2:
|
Transformasi: norm_value = (norm_value_1_100 - 1) / 99
|
||||||
grp["norm_value"] = np.nan
|
Ini adalah transformasi LINEAR — tidak mengubah urutan relatif antar indikator,
|
||||||
norm_parts.append(grp)
|
negara, atau tahun. Komparabilitas lintas framework tetap terjaga.
|
||||||
continue
|
"""
|
||||||
|
df = self.df.copy()
|
||||||
|
|
||||||
raw = grp.loc[valid_mask, "value"].values
|
# Konversi 1-100 → 0-1 secara linear
|
||||||
v_min, v_max = raw.min(), raw.max()
|
df["norm_value"] = np.where(
|
||||||
normed = np.full(len(grp), np.nan)
|
df["norm_value_1_100"].notna(),
|
||||||
|
(df["norm_value_1_100"] - 1.0) / 99.0,
|
||||||
|
np.nan
|
||||||
|
)
|
||||||
|
|
||||||
if v_min == v_max:
|
n_null = df["norm_value"].isna().sum()
|
||||||
normed[valid_mask.values] = 0.5
|
n_valid = df["norm_value"].notna().sum()
|
||||||
else:
|
self.logger.debug(
|
||||||
normed[valid_mask.values] = (raw - v_min) / (v_max - v_min)
|
f" _get_norm_value_df: {n_valid:,} valid | {n_null:,} null "
|
||||||
|
f"(dari norm_value_1_100 analytical_layer)"
|
||||||
|
)
|
||||||
|
|
||||||
if do_invert:
|
return df
|
||||||
normed = np.where(np.isnan(normed), np.nan, 1.0 - normed)
|
|
||||||
|
|
||||||
grp["norm_value"] = normed
|
|
||||||
norm_parts.append(grp)
|
|
||||||
|
|
||||||
return pd.concat(norm_parts, ignore_index=True)
|
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# STEP 2: agg_pillar_composite
|
# STEP 2: agg_pillar_composite
|
||||||
@@ -674,6 +732,16 @@ class FoodSecurityAggregator:
|
|||||||
# =========================================================================
|
# =========================================================================
|
||||||
# STEP 4: agg_framework_by_country
|
# STEP 4: agg_framework_by_country
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
# PERBAIKAN:
|
||||||
|
# - Flag NORMALIZE_FRAMEWORKS_JOINTLY dihapus.
|
||||||
|
# - Tidak ada lagi rescaling ulang per-framework di sini.
|
||||||
|
# - Semua framework (Total, MDGs, SDGs) menggunakan norm_value yang SAMA
|
||||||
|
# sebagai basis (sudah comparable dari analytical_layer).
|
||||||
|
# - global_minmax() hanya digunakan SEKALI untuk mengubah norm agregat
|
||||||
|
# (rata-rata norm_value per country-framework-year) menjadi skor 1-100
|
||||||
|
# di level country-framework, menggunakan SATU POOL DATA BERSAMA.
|
||||||
|
# - Dengan ini, perbandingan skor MDGs vs SDGs per negara adalah valid.
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
def _calc_country_composite_inmemory(self) -> pd.DataFrame:
|
def _calc_country_composite_inmemory(self) -> pd.DataFrame:
|
||||||
df_normed = self._get_norm_value_df()
|
df_normed = self._get_norm_value_df()
|
||||||
@@ -707,12 +775,16 @@ class FoodSecurityAggregator:
|
|||||||
self.logger.info("\n" + "=" * 70)
|
self.logger.info("\n" + "=" * 70)
|
||||||
self.logger.info(f"STEP 4: {table_name}")
|
self.logger.info(f"STEP 4: {table_name}")
|
||||||
self.logger.info("=" * 70)
|
self.logger.info("=" * 70)
|
||||||
|
self.logger.info(
|
||||||
|
" [PERBAIKAN] Semua framework di-aggregate dari norm_value yang SAMA."
|
||||||
|
"\n Tidak ada rescaling per-framework. Skor MDGs dan SDGs comparable."
|
||||||
|
)
|
||||||
|
|
||||||
country_composite = self._calc_country_composite_inmemory()
|
country_composite = self._calc_country_composite_inmemory()
|
||||||
df_normed = self._get_norm_value_df()
|
df_normed = self._get_norm_value_df()
|
||||||
parts = []
|
parts = []
|
||||||
|
|
||||||
# Layer TOTAL
|
# ── Layer TOTAL ───────────────────────────────────────────────────────
|
||||||
agg_total = (
|
agg_total = (
|
||||||
country_composite[[
|
country_composite[[
|
||||||
"country_id", "country_name", "year",
|
"country_id", "country_name", "year",
|
||||||
@@ -727,8 +799,10 @@ class FoodSecurityAggregator:
|
|||||||
agg_total["framework"] = "Total"
|
agg_total["framework"] = "Total"
|
||||||
parts.append(agg_total)
|
parts.append(agg_total)
|
||||||
|
|
||||||
# Layer MDGs pre-SDGs
|
# ── Layer MDGs pre-SDGs (tahun sebelum sdgs_start_year) ──────────────
|
||||||
pre_sdgs_rows = country_composite[country_composite["year"] < self.sdgs_start_year].copy()
|
pre_sdgs_rows = country_composite[
|
||||||
|
country_composite["year"] < self.sdgs_start_year
|
||||||
|
].copy()
|
||||||
if not pre_sdgs_rows.empty:
|
if not pre_sdgs_rows.empty:
|
||||||
mdgs_pre = (
|
mdgs_pre = (
|
||||||
pre_sdgs_rows[[
|
pre_sdgs_rows[[
|
||||||
@@ -744,7 +818,7 @@ class FoodSecurityAggregator:
|
|||||||
mdgs_pre["framework"] = "MDGs"
|
mdgs_pre["framework"] = "MDGs"
|
||||||
parts.append(mdgs_pre)
|
parts.append(mdgs_pre)
|
||||||
|
|
||||||
# Layer MDGs mixed (setelah SDGs masuk)
|
# ── Layer MDGs mixed (setelah SDGs masuk, hanya indikator MDGs) ──────
|
||||||
if self.mdgs_indicator_ids:
|
if self.mdgs_indicator_ids:
|
||||||
df_mdgs_mixed = df_normed[
|
df_mdgs_mixed = df_normed[
|
||||||
(df_normed["indicator_id"].isin(self.mdgs_indicator_ids)) &
|
(df_normed["indicator_id"].isin(self.mdgs_indicator_ids)) &
|
||||||
@@ -754,15 +828,17 @@ class FoodSecurityAggregator:
|
|||||||
agg_mdgs_mixed = (
|
agg_mdgs_mixed = (
|
||||||
df_mdgs_mixed
|
df_mdgs_mixed
|
||||||
.groupby(["country_id", "country_name", "year"])
|
.groupby(["country_id", "country_name", "year"])
|
||||||
.agg(framework_norm=("norm_value", "mean"), n_indicators=("indicator_id", "nunique"))
|
.agg(
|
||||||
|
framework_norm=("norm_value", "mean"),
|
||||||
|
n_indicators =("indicator_id", "nunique")
|
||||||
|
)
|
||||||
.reset_index()
|
.reset_index()
|
||||||
)
|
)
|
||||||
if not NORMALIZE_FRAMEWORKS_JOINTLY:
|
# PERBAIKAN: rescale dari POOL GABUNGAN bersama SDGs (lihat bawah)
|
||||||
agg_mdgs_mixed["framework_score_1_100"] = global_minmax(agg_mdgs_mixed["framework_norm"])
|
|
||||||
agg_mdgs_mixed["framework"] = "MDGs"
|
agg_mdgs_mixed["framework"] = "MDGs"
|
||||||
parts.append(agg_mdgs_mixed)
|
parts.append(agg_mdgs_mixed)
|
||||||
|
|
||||||
# Layer SDGs
|
# ── Layer SDGs (hanya indikator SDGs, mulai sdgs_start_year) ─────────
|
||||||
if self.sdgs_indicator_ids:
|
if self.sdgs_indicator_ids:
|
||||||
df_sdgs = df_normed[
|
df_sdgs = df_normed[
|
||||||
(df_normed["indicator_id"].isin(self.sdgs_indicator_ids)) &
|
(df_normed["indicator_id"].isin(self.sdgs_indicator_ids)) &
|
||||||
@@ -772,22 +848,40 @@ class FoodSecurityAggregator:
|
|||||||
agg_sdgs = (
|
agg_sdgs = (
|
||||||
df_sdgs
|
df_sdgs
|
||||||
.groupby(["country_id", "country_name", "year"])
|
.groupby(["country_id", "country_name", "year"])
|
||||||
.agg(framework_norm=("norm_value", "mean"), n_indicators=("indicator_id", "nunique"))
|
.agg(
|
||||||
|
framework_norm=("norm_value", "mean"),
|
||||||
|
n_indicators =("indicator_id", "nunique")
|
||||||
|
)
|
||||||
.reset_index()
|
.reset_index()
|
||||||
)
|
)
|
||||||
if not NORMALIZE_FRAMEWORKS_JOINTLY:
|
|
||||||
agg_sdgs["framework_score_1_100"] = global_minmax(agg_sdgs["framework_norm"])
|
|
||||||
agg_sdgs["framework"] = "SDGs"
|
agg_sdgs["framework"] = "SDGs"
|
||||||
parts.append(agg_sdgs)
|
parts.append(agg_sdgs)
|
||||||
|
|
||||||
df = pd.concat(parts, ignore_index=True)
|
df = pd.concat(parts, ignore_index=True)
|
||||||
|
|
||||||
if NORMALIZE_FRAMEWORKS_JOINTLY:
|
# PERBAIKAN: Rescale framework_score_1_100 dari SATU POOL BERSAMA
|
||||||
mixed_mask = (df["framework"].isin(["MDGs", "SDGs"])) & (df["year"] >= self.sdgs_start_year)
|
# untuk semua framework (MDGs mixed + SDGs) sekaligus.
|
||||||
if mixed_mask.any():
|
# Ini memastikan skor 60 di MDGs dan skor 60 di SDGs memiliki makna
|
||||||
df.loc[mixed_mask, "framework_score_1_100"] = global_minmax(df.loc[mixed_mask, "framework_norm"])
|
# yang sama: posisi relatif yang sama dalam distribusi gabungan.
|
||||||
|
mixed_mask = df["framework"].isin(["MDGs", "SDGs"])
|
||||||
|
mixed_pre_mask = (df["framework"] == "MDGs") & (df["year"] < self.sdgs_start_year)
|
||||||
|
|
||||||
|
# Rescale pre-SDGs MDGs dari pool Total (sudah dihitung)
|
||||||
|
# → sudah ada di agg_total (framework_score_1_100 = dari country_composite)
|
||||||
|
|
||||||
|
# Rescale MDGs mixed + SDGs dari SATU POOL BERSAMA
|
||||||
|
post_sdgs_mask = mixed_mask & ~mixed_pre_mask & df["framework_norm"].notna()
|
||||||
|
if post_sdgs_mask.any():
|
||||||
|
df.loc[post_sdgs_mask, "framework_score_1_100"] = global_minmax(
|
||||||
|
df.loc[post_sdgs_mask, "framework_norm"]
|
||||||
|
)
|
||||||
|
|
||||||
df = check_and_dedup(df, ["country_id", "framework", "year"], context=table_name, logger=self.logger)
|
df = check_and_dedup(df, ["country_id", "framework", "year"], context=table_name, logger=self.logger)
|
||||||
|
|
||||||
|
# Pastikan kolom framework_score_1_100 ada untuk semua baris
|
||||||
|
if "framework_score_1_100" not in df.columns:
|
||||||
|
df["framework_score_1_100"] = np.nan
|
||||||
|
|
||||||
df["rank_in_framework_year"] = (
|
df["rank_in_framework_year"] = (
|
||||||
df.groupby(["framework", "year"])["framework_score_1_100"]
|
df.groupby(["framework", "year"])["framework_score_1_100"]
|
||||||
.rank(method="min", ascending=False)
|
.rank(method="min", ascending=False)
|
||||||
@@ -797,6 +891,9 @@ class FoodSecurityAggregator:
|
|||||||
df = add_condition_column(df, "framework_score_1_100")
|
df = add_condition_column(df, "framework_score_1_100")
|
||||||
log_condition_summary(df, table_name, self.logger)
|
log_condition_summary(df, table_name, self.logger)
|
||||||
|
|
||||||
|
# Log diagnostik: bandingkan skor MDGs vs SDGs
|
||||||
|
self._log_framework_score_diagnostics(df, table_name)
|
||||||
|
|
||||||
df["country_id"] = df["country_id"].astype(int)
|
df["country_id"] = df["country_id"].astype(int)
|
||||||
df["year"] = df["year"].astype(int)
|
df["year"] = df["year"].astype(int)
|
||||||
df["n_indicators"] = safe_int(df["n_indicators"], col_name="n_indicators", logger=self.logger)
|
df["n_indicators"] = safe_int(df["n_indicators"], col_name="n_indicators", logger=self.logger)
|
||||||
@@ -828,6 +925,9 @@ class FoodSecurityAggregator:
|
|||||||
# =========================================================================
|
# =========================================================================
|
||||||
# STEP 5: agg_framework_asean
|
# STEP 5: agg_framework_asean
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
# PERBAIKAN: Sama dengan framework_by_country — tidak ada rescaling terpisah
|
||||||
|
# per framework. MDGs mixed dan SDGs di-rescale dari satu pool bersama.
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
def calc_framework_asean(self) -> pd.DataFrame:
|
def calc_framework_asean(self) -> pd.DataFrame:
|
||||||
table_name = "agg_framework_asean"
|
table_name = "agg_framework_asean"
|
||||||
@@ -835,6 +935,10 @@ class FoodSecurityAggregator:
|
|||||||
self.logger.info("\n" + "=" * 70)
|
self.logger.info("\n" + "=" * 70)
|
||||||
self.logger.info(f"STEP 5: {table_name}")
|
self.logger.info(f"STEP 5: {table_name}")
|
||||||
self.logger.info("=" * 70)
|
self.logger.info("=" * 70)
|
||||||
|
self.logger.info(
|
||||||
|
" [PERBAIKAN] MDGs mixed + SDGs di-rescale dari SATU POOL BERSAMA."
|
||||||
|
"\n Skor ASEAN MDGs dan SDGs sekarang comparable."
|
||||||
|
)
|
||||||
|
|
||||||
df_normed = self._get_norm_value_df()
|
df_normed = self._get_norm_value_df()
|
||||||
country_composite = self._calc_country_composite_inmemory()
|
country_composite = self._calc_country_composite_inmemory()
|
||||||
@@ -847,14 +951,18 @@ class FoodSecurityAggregator:
|
|||||||
)
|
)
|
||||||
asean_overall = (
|
asean_overall = (
|
||||||
country_norm.groupby("year")
|
country_norm.groupby("year")
|
||||||
.agg(asean_norm=("country_norm", "mean"), std_norm=("country_norm", "std"), n_countries=("country_norm", "count"))
|
.agg(
|
||||||
|
asean_norm =("country_norm", "mean"),
|
||||||
|
std_norm =("country_norm", "std"),
|
||||||
|
n_countries =("country_norm", "count")
|
||||||
|
)
|
||||||
.reset_index()
|
.reset_index()
|
||||||
)
|
)
|
||||||
asean_overall["asean_score_1_100"] = global_minmax(asean_overall["asean_norm"])
|
asean_overall["asean_score_1_100"] = global_minmax(asean_overall["asean_norm"])
|
||||||
|
|
||||||
parts = []
|
parts = []
|
||||||
|
|
||||||
# Layer TOTAL
|
# ── Layer TOTAL ───────────────────────────────────────────────────────
|
||||||
total_cols = asean_overall[["year", "asean_score_1_100", "asean_norm", "std_norm", "n_countries"]].copy()
|
total_cols = asean_overall[["year", "asean_score_1_100", "asean_norm", "std_norm", "n_countries"]].copy()
|
||||||
total_cols = total_cols.rename(columns={
|
total_cols = total_cols.rename(columns={
|
||||||
"asean_score_1_100": "framework_score_1_100",
|
"asean_score_1_100": "framework_score_1_100",
|
||||||
@@ -866,7 +974,7 @@ class FoodSecurityAggregator:
|
|||||||
total_cols["framework"] = "Total"
|
total_cols["framework"] = "Total"
|
||||||
parts.append(total_cols)
|
parts.append(total_cols)
|
||||||
|
|
||||||
# Layer MDGs pre-SDGs
|
# ── Layer MDGs pre-SDGs ───────────────────────────────────────────────
|
||||||
pre_sdgs = asean_overall[asean_overall["year"] < self.sdgs_start_year].copy()
|
pre_sdgs = asean_overall[asean_overall["year"] < self.sdgs_start_year].copy()
|
||||||
if not pre_sdgs.empty:
|
if not pre_sdgs.empty:
|
||||||
mdgs_pre = pre_sdgs[["year", "asean_score_1_100", "asean_norm", "std_norm", "n_countries"]].copy()
|
mdgs_pre = pre_sdgs[["year", "asean_score_1_100", "asean_norm", "std_norm", "n_countries"]].copy()
|
||||||
@@ -884,7 +992,9 @@ class FoodSecurityAggregator:
|
|||||||
mdgs_pre["framework"] = "MDGs"
|
mdgs_pre["framework"] = "MDGs"
|
||||||
parts.append(mdgs_pre)
|
parts.append(mdgs_pre)
|
||||||
|
|
||||||
# Layer MDGs mixed
|
# ── Siapkan MDGs mixed dan SDGs untuk rescaling BERSAMA ───────────────
|
||||||
|
mixed_parts = []
|
||||||
|
|
||||||
if self.mdgs_indicator_ids:
|
if self.mdgs_indicator_ids:
|
||||||
df_mdgs_mixed = df_normed[
|
df_mdgs_mixed = df_normed[
|
||||||
(df_normed["indicator_id"].isin(self.mdgs_indicator_ids)) &
|
(df_normed["indicator_id"].isin(self.mdgs_indicator_ids)) &
|
||||||
@@ -902,12 +1012,9 @@ class FoodSecurityAggregator:
|
|||||||
).reset_index()
|
).reset_index()
|
||||||
n_ind_mdgs = df_mdgs_mixed.groupby("year")["indicator_id"].nunique().reset_index().rename(columns={"indicator_id": "n_indicators"})
|
n_ind_mdgs = df_mdgs_mixed.groupby("year")["indicator_id"].nunique().reset_index().rename(columns={"indicator_id": "n_indicators"})
|
||||||
asean_mdgs = asean_mdgs.merge(n_ind_mdgs, on="year", how="left")
|
asean_mdgs = asean_mdgs.merge(n_ind_mdgs, on="year", how="left")
|
||||||
if not NORMALIZE_FRAMEWORKS_JOINTLY:
|
|
||||||
asean_mdgs["framework_score_1_100"] = global_minmax(asean_mdgs["framework_norm"])
|
|
||||||
asean_mdgs["framework"] = "MDGs"
|
asean_mdgs["framework"] = "MDGs"
|
||||||
parts.append(asean_mdgs)
|
mixed_parts.append(asean_mdgs)
|
||||||
|
|
||||||
# Layer SDGs
|
|
||||||
if self.sdgs_indicator_ids:
|
if self.sdgs_indicator_ids:
|
||||||
df_sdgs = df_normed[
|
df_sdgs = df_normed[
|
||||||
(df_normed["indicator_id"].isin(self.sdgs_indicator_ids)) &
|
(df_normed["indicator_id"].isin(self.sdgs_indicator_ids)) &
|
||||||
@@ -925,23 +1032,25 @@ class FoodSecurityAggregator:
|
|||||||
).reset_index()
|
).reset_index()
|
||||||
n_ind_sdgs = df_sdgs.groupby("year")["indicator_id"].nunique().reset_index().rename(columns={"indicator_id": "n_indicators"})
|
n_ind_sdgs = df_sdgs.groupby("year")["indicator_id"].nunique().reset_index().rename(columns={"indicator_id": "n_indicators"})
|
||||||
asean_sdgs = asean_sdgs.merge(n_ind_sdgs, on="year", how="left")
|
asean_sdgs = asean_sdgs.merge(n_ind_sdgs, on="year", how="left")
|
||||||
if not NORMALIZE_FRAMEWORKS_JOINTLY:
|
|
||||||
asean_sdgs["framework_score_1_100"] = global_minmax(asean_sdgs["framework_norm"])
|
|
||||||
asean_sdgs["framework"] = "SDGs"
|
asean_sdgs["framework"] = "SDGs"
|
||||||
parts.append(asean_sdgs)
|
mixed_parts.append(asean_sdgs)
|
||||||
|
|
||||||
|
# PERBAIKAN: Rescale MDGs mixed + SDGs dari SATU POOL BERSAMA
|
||||||
|
if mixed_parts:
|
||||||
|
df_mixed = pd.concat(mixed_parts, ignore_index=True)
|
||||||
|
df_mixed["framework_score_1_100"] = global_minmax(df_mixed["framework_norm"])
|
||||||
|
parts.append(df_mixed)
|
||||||
|
|
||||||
df = pd.concat(parts, ignore_index=True)
|
df = pd.concat(parts, ignore_index=True)
|
||||||
|
|
||||||
if NORMALIZE_FRAMEWORKS_JOINTLY:
|
|
||||||
mixed_mask = (df["framework"].isin(["MDGs", "SDGs"])) & (df["year"] >= self.sdgs_start_year)
|
|
||||||
if mixed_mask.any():
|
|
||||||
df.loc[mixed_mask, "framework_score_1_100"] = global_minmax(df.loc[mixed_mask, "framework_norm"])
|
|
||||||
|
|
||||||
df = check_and_dedup(df, ["framework", "year"], context=table_name, logger=self.logger)
|
df = check_and_dedup(df, ["framework", "year"], context=table_name, logger=self.logger)
|
||||||
df = add_yoy(df, ["framework"], "framework_score_1_100")
|
df = add_yoy(df, ["framework"], "framework_score_1_100")
|
||||||
df = add_condition_column(df, "framework_score_1_100")
|
df = add_condition_column(df, "framework_score_1_100")
|
||||||
log_condition_summary(df, table_name, self.logger)
|
log_condition_summary(df, table_name, self.logger)
|
||||||
|
|
||||||
|
# Log diagnostik: bandingkan skor ASEAN MDGs vs SDGs
|
||||||
|
self._log_framework_score_diagnostics(df, table_name)
|
||||||
|
|
||||||
df["year"] = df["year"].astype(int)
|
df["year"] = df["year"].astype(int)
|
||||||
df["n_indicators"] = safe_int(df["n_indicators"], col_name="n_indicators", logger=self.logger)
|
df["n_indicators"] = safe_int(df["n_indicators"], col_name="n_indicators", logger=self.logger)
|
||||||
df["n_countries_with_data"] = safe_int(df["n_countries_with_data"], col_name="n_countries_with_data", logger=self.logger)
|
df["n_countries_with_data"] = safe_int(df["n_countries_with_data"], col_name="n_countries_with_data", logger=self.logger)
|
||||||
@@ -1164,9 +1273,34 @@ class FoodSecurityAggregator:
|
|||||||
return df
|
return df
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# HELPERS
|
# DIAGNOSTIK & VALIDASI
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
|
def _log_framework_score_diagnostics(self, df: pd.DataFrame, context: str):
|
||||||
|
"""
|
||||||
|
Log perbandingan rata-rata skor per framework.
|
||||||
|
Setelah perbaikan, gap antar framework mencerminkan perbedaan substantif,
|
||||||
|
bukan artefak normalisasi.
|
||||||
|
"""
|
||||||
|
self.logger.info(f"\n [DIAGNOSTIK] Rata-rata skor per framework ({context}):")
|
||||||
|
fw_means = df.groupby("framework")["framework_score_1_100"].agg(['mean', 'min', 'max']).round(2)
|
||||||
|
for fw, row in fw_means.iterrows():
|
||||||
|
self.logger.info(
|
||||||
|
f" {fw:<8} mean={row['mean']:>6.2f} "
|
||||||
|
f"range=[{row['min']:.2f}, {row['max']:.2f}]"
|
||||||
|
)
|
||||||
|
|
||||||
|
if "MDGs" in fw_means.index and "SDGs" in fw_means.index:
|
||||||
|
gap = fw_means.loc["MDGs", "mean"] - fw_means.loc["SDGs", "mean"]
|
||||||
|
self.logger.info(
|
||||||
|
f"\n Gap MDGs-SDGs = {gap:.2f} poin"
|
||||||
|
+ (
|
||||||
|
" → SUBSTANTIF (indikator SDGs mengukur deprivasi lebih dalam)"
|
||||||
|
if abs(gap) > 10 else
|
||||||
|
" → dalam batas wajar"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
def _validate_mdgs_equals_total(self, df: pd.DataFrame, level: str = ""):
|
def _validate_mdgs_equals_total(self, df: pd.DataFrame, level: str = ""):
|
||||||
self.logger.info(f"\n Validasi MDGs < {self.sdgs_start_year} == Total [{level}]:")
|
self.logger.info(f"\n Validasi MDGs < {self.sdgs_start_year} == Total [{level}]:")
|
||||||
group_by = ["year"] if level.startswith("asean") else ["country_id", "year"]
|
group_by = ["year"] if level.startswith("asean") else ["country_id", "year"]
|
||||||
@@ -1202,6 +1336,10 @@ class FoodSecurityAggregator:
|
|||||||
self.logger.info("\n" + "=" * 70)
|
self.logger.info("\n" + "=" * 70)
|
||||||
self.logger.info("FOOD SECURITY AGGREGATION — 6 TABLES -> fs_asean_gold")
|
self.logger.info("FOOD SECURITY AGGREGATION — 6 TABLES -> fs_asean_gold")
|
||||||
self.logger.info(f" Condition threshold: bad<{THRESHOLD_BAD}, good>{THRESHOLD_GOOD}")
|
self.logger.info(f" Condition threshold: bad<{THRESHOLD_BAD}, good>{THRESHOLD_GOOD}")
|
||||||
|
self.logger.info(
|
||||||
|
" NORMALISASI: norm_value dari analytical_layer (satu referensi global)."
|
||||||
|
"\n Tidak ada rescaling per-framework. MDGs dan SDGs comparable."
|
||||||
|
)
|
||||||
self.logger.info("=" * 70)
|
self.logger.info("=" * 70)
|
||||||
|
|
||||||
self.load_data()
|
self.load_data()
|
||||||
@@ -1250,6 +1388,8 @@ if __name__ == "__main__":
|
|||||||
print("=" * 70)
|
print("=" * 70)
|
||||||
print("FOOD SECURITY AGGREGATION -> fs_asean_gold")
|
print("FOOD SECURITY AGGREGATION -> fs_asean_gold")
|
||||||
print(f"Condition threshold: bad<{THRESHOLD_BAD}, moderate {THRESHOLD_BAD}-{THRESHOLD_GOOD}, good>{THRESHOLD_GOOD}")
|
print(f"Condition threshold: bad<{THRESHOLD_BAD}, moderate {THRESHOLD_BAD}-{THRESHOLD_GOOD}, good>{THRESHOLD_GOOD}")
|
||||||
|
print("NORMALISASI: satu referensi global per indikator (dari analytical_layer).")
|
||||||
|
print("Tidak ada rescaling per-framework. MDGs dan SDGs comparable.")
|
||||||
print("=" * 70)
|
print("=" * 70)
|
||||||
|
|
||||||
logger = setup_logging()
|
logger = setup_logging()
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user