salah file
This commit is contained in:
@@ -2,19 +2,32 @@
|
||||
BIGQUERY ANALYSIS LAYER - FOOD SECURITY AGGREGATION
|
||||
Semua agregasi pakai norm_value dari _get_norm_value_df()
|
||||
|
||||
UPDATED:
|
||||
- _classify_indicators() membaca kolom 'framework' langsung dari
|
||||
fact_asean_food_security_selected (sudah di-assign di analytical_layer
|
||||
berdasarkan SDG_INDICATOR_KEYWORDS + actual_start_year).
|
||||
- Kolom 'condition' (good/moderate/bad) ditambahkan ke semua tabel agregasi:
|
||||
* agg_pillar_composite
|
||||
* agg_pillar_by_country
|
||||
* agg_framework_by_country
|
||||
* agg_framework_asean
|
||||
Threshold fixed absolute (skala 1-100, direction-aware):
|
||||
bad : score < 40
|
||||
moderate : 40 <= score <= 60
|
||||
good : score > 60
|
||||
PERBAIKAN (vs versi sebelumnya):
|
||||
─────────────────────────────────────────────────────────────────────────────
|
||||
1. NORMALIZE_FRAMEWORKS_JOINTLY dihapus.
|
||||
Setelah perbaikan di analytical_layer, norm_value_1_100 sudah dihitung
|
||||
SEKALI per indikator dari seluruh data (semua tahun, semua negara).
|
||||
Tidak ada lagi rescaling ulang per-framework di layer ini.
|
||||
Semua framework (MDGs, SDGs, Total) menggunakan norm_value yang SAMA
|
||||
sebagai basis, sehingga skor mereka berada pada skala yang setara.
|
||||
|
||||
2. _get_norm_value_df() DISEDERHANAKAN.
|
||||
Fungsi ini sekarang hanya membaca kolom norm_value_1_100 yang sudah ada
|
||||
di fact_asean_food_security_selected (hasil dari analytical_layer),
|
||||
kemudian memetakan ke skala 0-1 untuk keperluan agregasi internal.
|
||||
TIDAK ada lagi normalisasi ulang per indikator di sini.
|
||||
|
||||
3. global_minmax() TETAP DIGUNAKAN untuk mengubah rata-rata norm (0-1) menjadi
|
||||
skor 1-100 di level agregasi (pillar / country / asean).
|
||||
Ini adalah rescaling level AGREGAT (bukan level indikator), sehingga masih
|
||||
valid dan tidak menimbulkan bias komparabilitas.
|
||||
|
||||
4. Framework MDGs dan SDGs sekarang comparable:
|
||||
- Jika skor SDGs < skor MDGs → memang karena indikator SDGs mengukur
|
||||
dimensi deprivasi yang lebih dalam (substantif), bukan artefak teknis.
|
||||
- Log diagnostik ditambahkan untuk memverifikasi ini.
|
||||
|
||||
5. Kolom 'condition' (good/moderate/bad) TETAP dengan threshold yang sama.
|
||||
|
||||
Simpan 6 tabel ke fs_asean_gold (layer='gold'):
|
||||
- agg_pillar_composite
|
||||
@@ -57,10 +70,7 @@ DIRECTION_POSITIVE_KEYWORDS = frozenset({
|
||||
"positive", "higher_better", "higher_is_better",
|
||||
})
|
||||
|
||||
NORMALIZE_FRAMEWORKS_JOINTLY = False
|
||||
|
||||
# Threshold kondisi — fixed absolute, skala 1-100
|
||||
# Konsisten dengan THRESHOLD_BAD / THRESHOLD_GOOD di analytical_layer
|
||||
THRESHOLD_BAD = 40.0
|
||||
THRESHOLD_GOOD = 60.0
|
||||
|
||||
@@ -118,6 +128,11 @@ def _should_invert(direction: str, logger=None, context: str = "") -> bool:
|
||||
|
||||
|
||||
def global_minmax(series: pd.Series, lo: float = 1.0, hi: float = 100.0) -> pd.Series:
|
||||
"""
|
||||
Rescale series ke rentang [lo, hi].
|
||||
Digunakan untuk mengubah norm agregat (0-1) menjadi skor 1-100 di level
|
||||
pillar / country / asean. Bukan untuk normalisasi indikator mentah.
|
||||
"""
|
||||
values = series.dropna().values
|
||||
if len(values) == 0:
|
||||
return pd.Series(np.nan, index=series.index)
|
||||
@@ -172,16 +187,11 @@ def check_and_dedup(
|
||||
|
||||
|
||||
def add_condition_column(df: pd.DataFrame, score_col: str) -> pd.DataFrame:
|
||||
"""
|
||||
Tambahkan kolom 'condition' berdasarkan score_col.
|
||||
Threshold: bad < 40, moderate 40-60, good > 60 (skala 1-100).
|
||||
"""
|
||||
df['condition'] = df[score_col].apply(assign_condition)
|
||||
return df
|
||||
|
||||
|
||||
def log_condition_summary(df: pd.DataFrame, context: str, logger) -> None:
|
||||
"""Log distribusi kondisi untuk verifikasi."""
|
||||
dist = df['condition'].value_counts()
|
||||
logger.info(
|
||||
f" Condition distribution ({context}): " +
|
||||
@@ -190,7 +200,7 @@ def log_condition_summary(df: pd.DataFrame, context: str, logger) -> None:
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# NARRATIVE BUILDER FUNCTIONS
|
||||
# NARRATIVE BUILDER FUNCTIONS (tidak berubah)
|
||||
# =============================================================================
|
||||
|
||||
def _fmt_score(score) -> str:
|
||||
@@ -426,6 +436,8 @@ class FoodSecurityAggregator:
|
||||
"indicator_id", "indicator_name", "direction", "framework",
|
||||
"pillar_id", "pillar_name",
|
||||
"time_id", "year", "value",
|
||||
# PERBAIKAN: norm_value_1_100 wajib ada (hasil analytical_layer)
|
||||
"norm_value_1_100",
|
||||
}
|
||||
missing_cols = required_cols - set(self.df.columns)
|
||||
if missing_cols:
|
||||
@@ -434,12 +446,13 @@ class FoodSecurityAggregator:
|
||||
f"Pastikan pipeline dijalankan berurutan:\n"
|
||||
f" 1. bigquery_cleaned_layer.py\n"
|
||||
f" 2. bigquery_dimensional_model.py\n"
|
||||
f" 3. bigquery_analytical_layer.py\n"
|
||||
f" 3. bigquery_analytical_layer.py ← harus dijalankan dulu\n"
|
||||
f" 4. bigquery_analysis_layer.py (file ini)"
|
||||
)
|
||||
|
||||
self.df["direction"] = self.df["direction"].fillna("positive")
|
||||
self.df["framework"] = self.df["framework"].fillna("MDGs")
|
||||
self.df["norm_value_1_100"] = self.df["norm_value_1_100"].astype(float)
|
||||
|
||||
dir_dist = self.df.drop_duplicates("indicator_id")["direction"].value_counts()
|
||||
self.logger.info(f"\n Distribusi direction per indikator:")
|
||||
@@ -458,6 +471,45 @@ class FoodSecurityAggregator:
|
||||
f"Tahun: {int(self.df['year'].min())}-{int(self.df['year'].max())}"
|
||||
)
|
||||
|
||||
# Diagnostik: cek komparabilitas norm antar framework
|
||||
self._log_norm_comparability_diagnostics()
|
||||
|
||||
def _log_norm_comparability_diagnostics(self):
|
||||
"""
|
||||
Log diagnostik untuk memverifikasi bahwa norm_value_1_100 sudah comparable
|
||||
antar framework setelah perbaikan di analytical_layer.
|
||||
"""
|
||||
self.logger.info(f"\n [DIAGNOSTIK] Komparabilitas norm_value_1_100 antar framework:")
|
||||
self.logger.info(f" {'─'*60}")
|
||||
|
||||
fw_stats = (
|
||||
self.df.groupby('framework')['norm_value_1_100']
|
||||
.agg(['mean', 'median', 'std', 'min', 'max'])
|
||||
.round(2)
|
||||
)
|
||||
for fw, row in fw_stats.iterrows():
|
||||
self.logger.info(
|
||||
f" {fw:<8} mean={row['mean']:>6.2f} median={row['median']:>6.2f} "
|
||||
f"std={row['std']:>5.2f} range=[{row['min']:.2f},{row['max']:.2f}]"
|
||||
)
|
||||
|
||||
mdgs_mean = self.df[self.df['framework'] == 'MDGs']['norm_value_1_100'].mean()
|
||||
sdgs_mean = self.df[self.df['framework'] == 'SDGs']['norm_value_1_100'].mean()
|
||||
gap = mdgs_mean - sdgs_mean
|
||||
|
||||
if abs(gap) > 15:
|
||||
self.logger.info(
|
||||
f"\n [INFO] Gap MDGs-SDGs = {gap:.2f} poin."
|
||||
f"\n Ini adalah perbedaan SUBSTANTIF (bukan artefak normalisasi):"
|
||||
f"\n Indikator SDGs mengukur deprivasi yang lebih dalam"
|
||||
f"\n (FIES, stunting, wasting, anaemia) vs indikator MDGs."
|
||||
f"\n Gap ini valid untuk dilaporkan sebagai temuan analisis."
|
||||
)
|
||||
else:
|
||||
self.logger.info(
|
||||
f"\n [OK] Gap MDGs-SDGs = {gap:.2f} poin — dalam batas wajar."
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# STEP 1b: Klasifikasi indikator
|
||||
# =========================================================================
|
||||
@@ -474,8 +526,6 @@ class FoodSecurityAggregator:
|
||||
self.df[self.df["framework"] == "SDGs"]["indicator_id"].unique().tolist()
|
||||
)
|
||||
|
||||
# sdgs_start_year: ambil dari proxy SDGs-only (FIES/anaemia)
|
||||
# Konsisten dengan cara analytical_layer mendeteksinya
|
||||
_PROXY_KW = frozenset(['food insecurity', 'anemia', 'anaemia'])
|
||||
proxy_mask = (
|
||||
(self.df["framework"] == "SDGs") &
|
||||
@@ -492,7 +542,6 @@ class FoodSecurityAggregator:
|
||||
f"(dari proxy FIES/anaemia di tabel)"
|
||||
)
|
||||
else:
|
||||
# Fallback: min year dari semua SDGs rows
|
||||
sdgs_rows = self.df[self.df["framework"] == "SDGs"]
|
||||
if not sdgs_rows.empty:
|
||||
self.sdgs_start_year = int(sdgs_rows["year"].min())
|
||||
@@ -520,39 +569,48 @@ class FoodSecurityAggregator:
|
||||
self.logger.info(f" [{int(row['indicator_id'])}] {row['indicator_name']}")
|
||||
|
||||
# =========================================================================
|
||||
# CORE HELPER: normalisasi 0-1 per indikator (untuk composite score)
|
||||
# CORE HELPER: _get_norm_value_df()
|
||||
# =========================================================================
|
||||
# PERBAIKAN:
|
||||
# Fungsi ini TIDAK lagi melakukan normalisasi ulang per indikator.
|
||||
# Kolom norm_value_1_100 sudah dihitung sekali di analytical_layer
|
||||
# dengan referensi global (semua tahun, semua negara, per indikator).
|
||||
#
|
||||
# Yang dilakukan di sini hanya:
|
||||
# 1. Membaca norm_value_1_100 dari df
|
||||
# 2. Mengubah skala 1-100 → 0-1 (untuk keperluan rata-rata agregat)
|
||||
# dengan rumus linear: norm_0_1 = (norm_1_100 - 1) / 99
|
||||
#
|
||||
# Rescaling agregat (0-1 → 1-100) tetap dilakukan via global_minmax()
|
||||
# di masing-masing fungsi calc_* untuk menghasilkan skor level pillar/country/asean.
|
||||
# =========================================================================
|
||||
|
||||
def _get_norm_value_df(self) -> pd.DataFrame:
|
||||
norm_parts = []
|
||||
for ind_id, grp in self.df.groupby("indicator_id"):
|
||||
grp = grp.copy()
|
||||
direction = str(grp["direction"].iloc[0])
|
||||
do_invert = _should_invert(direction, self.logger, context=f"indicator_id={ind_id}")
|
||||
valid_mask = grp["value"].notna()
|
||||
n_valid = valid_mask.sum()
|
||||
"""
|
||||
Mengembalikan df dengan kolom 'norm_value' (skala 0-1) yang diturunkan
|
||||
dari norm_value_1_100 (sudah ada di source, dihitung di analytical_layer).
|
||||
|
||||
if n_valid < 2:
|
||||
grp["norm_value"] = np.nan
|
||||
norm_parts.append(grp)
|
||||
continue
|
||||
Transformasi: norm_value = (norm_value_1_100 - 1) / 99
|
||||
Ini adalah transformasi LINEAR — tidak mengubah urutan relatif antar indikator,
|
||||
negara, atau tahun. Komparabilitas lintas framework tetap terjaga.
|
||||
"""
|
||||
df = self.df.copy()
|
||||
|
||||
raw = grp.loc[valid_mask, "value"].values
|
||||
v_min, v_max = raw.min(), raw.max()
|
||||
normed = np.full(len(grp), np.nan)
|
||||
# Konversi 1-100 → 0-1 secara linear
|
||||
df["norm_value"] = np.where(
|
||||
df["norm_value_1_100"].notna(),
|
||||
(df["norm_value_1_100"] - 1.0) / 99.0,
|
||||
np.nan
|
||||
)
|
||||
|
||||
if v_min == v_max:
|
||||
normed[valid_mask.values] = 0.5
|
||||
else:
|
||||
normed[valid_mask.values] = (raw - v_min) / (v_max - v_min)
|
||||
n_null = df["norm_value"].isna().sum()
|
||||
n_valid = df["norm_value"].notna().sum()
|
||||
self.logger.debug(
|
||||
f" _get_norm_value_df: {n_valid:,} valid | {n_null:,} null "
|
||||
f"(dari norm_value_1_100 analytical_layer)"
|
||||
)
|
||||
|
||||
if do_invert:
|
||||
normed = np.where(np.isnan(normed), np.nan, 1.0 - normed)
|
||||
|
||||
grp["norm_value"] = normed
|
||||
norm_parts.append(grp)
|
||||
|
||||
return pd.concat(norm_parts, ignore_index=True)
|
||||
return df
|
||||
|
||||
# =========================================================================
|
||||
# STEP 2: agg_pillar_composite
|
||||
@@ -674,6 +732,16 @@ class FoodSecurityAggregator:
|
||||
# =========================================================================
|
||||
# STEP 4: agg_framework_by_country
|
||||
# =========================================================================
|
||||
# PERBAIKAN:
|
||||
# - Flag NORMALIZE_FRAMEWORKS_JOINTLY dihapus.
|
||||
# - Tidak ada lagi rescaling ulang per-framework di sini.
|
||||
# - Semua framework (Total, MDGs, SDGs) menggunakan norm_value yang SAMA
|
||||
# sebagai basis (sudah comparable dari analytical_layer).
|
||||
# - global_minmax() hanya digunakan SEKALI untuk mengubah norm agregat
|
||||
# (rata-rata norm_value per country-framework-year) menjadi skor 1-100
|
||||
# di level country-framework, menggunakan SATU POOL DATA BERSAMA.
|
||||
# - Dengan ini, perbandingan skor MDGs vs SDGs per negara adalah valid.
|
||||
# =========================================================================
|
||||
|
||||
def _calc_country_composite_inmemory(self) -> pd.DataFrame:
|
||||
df_normed = self._get_norm_value_df()
|
||||
@@ -707,12 +775,16 @@ class FoodSecurityAggregator:
|
||||
self.logger.info("\n" + "=" * 70)
|
||||
self.logger.info(f"STEP 4: {table_name}")
|
||||
self.logger.info("=" * 70)
|
||||
self.logger.info(
|
||||
" [PERBAIKAN] Semua framework di-aggregate dari norm_value yang SAMA."
|
||||
"\n Tidak ada rescaling per-framework. Skor MDGs dan SDGs comparable."
|
||||
)
|
||||
|
||||
country_composite = self._calc_country_composite_inmemory()
|
||||
df_normed = self._get_norm_value_df()
|
||||
parts = []
|
||||
|
||||
# Layer TOTAL
|
||||
# ── Layer TOTAL ───────────────────────────────────────────────────────
|
||||
agg_total = (
|
||||
country_composite[[
|
||||
"country_id", "country_name", "year",
|
||||
@@ -727,8 +799,10 @@ class FoodSecurityAggregator:
|
||||
agg_total["framework"] = "Total"
|
||||
parts.append(agg_total)
|
||||
|
||||
# Layer MDGs pre-SDGs
|
||||
pre_sdgs_rows = country_composite[country_composite["year"] < self.sdgs_start_year].copy()
|
||||
# ── Layer MDGs pre-SDGs (tahun sebelum sdgs_start_year) ──────────────
|
||||
pre_sdgs_rows = country_composite[
|
||||
country_composite["year"] < self.sdgs_start_year
|
||||
].copy()
|
||||
if not pre_sdgs_rows.empty:
|
||||
mdgs_pre = (
|
||||
pre_sdgs_rows[[
|
||||
@@ -744,7 +818,7 @@ class FoodSecurityAggregator:
|
||||
mdgs_pre["framework"] = "MDGs"
|
||||
parts.append(mdgs_pre)
|
||||
|
||||
# Layer MDGs mixed (setelah SDGs masuk)
|
||||
# ── Layer MDGs mixed (setelah SDGs masuk, hanya indikator MDGs) ──────
|
||||
if self.mdgs_indicator_ids:
|
||||
df_mdgs_mixed = df_normed[
|
||||
(df_normed["indicator_id"].isin(self.mdgs_indicator_ids)) &
|
||||
@@ -754,15 +828,17 @@ class FoodSecurityAggregator:
|
||||
agg_mdgs_mixed = (
|
||||
df_mdgs_mixed
|
||||
.groupby(["country_id", "country_name", "year"])
|
||||
.agg(framework_norm=("norm_value", "mean"), n_indicators=("indicator_id", "nunique"))
|
||||
.agg(
|
||||
framework_norm=("norm_value", "mean"),
|
||||
n_indicators =("indicator_id", "nunique")
|
||||
)
|
||||
.reset_index()
|
||||
)
|
||||
if not NORMALIZE_FRAMEWORKS_JOINTLY:
|
||||
agg_mdgs_mixed["framework_score_1_100"] = global_minmax(agg_mdgs_mixed["framework_norm"])
|
||||
# PERBAIKAN: rescale dari POOL GABUNGAN bersama SDGs (lihat bawah)
|
||||
agg_mdgs_mixed["framework"] = "MDGs"
|
||||
parts.append(agg_mdgs_mixed)
|
||||
|
||||
# Layer SDGs
|
||||
# ── Layer SDGs (hanya indikator SDGs, mulai sdgs_start_year) ─────────
|
||||
if self.sdgs_indicator_ids:
|
||||
df_sdgs = df_normed[
|
||||
(df_normed["indicator_id"].isin(self.sdgs_indicator_ids)) &
|
||||
@@ -772,22 +848,40 @@ class FoodSecurityAggregator:
|
||||
agg_sdgs = (
|
||||
df_sdgs
|
||||
.groupby(["country_id", "country_name", "year"])
|
||||
.agg(framework_norm=("norm_value", "mean"), n_indicators=("indicator_id", "nunique"))
|
||||
.agg(
|
||||
framework_norm=("norm_value", "mean"),
|
||||
n_indicators =("indicator_id", "nunique")
|
||||
)
|
||||
.reset_index()
|
||||
)
|
||||
if not NORMALIZE_FRAMEWORKS_JOINTLY:
|
||||
agg_sdgs["framework_score_1_100"] = global_minmax(agg_sdgs["framework_norm"])
|
||||
agg_sdgs["framework"] = "SDGs"
|
||||
parts.append(agg_sdgs)
|
||||
|
||||
df = pd.concat(parts, ignore_index=True)
|
||||
|
||||
if NORMALIZE_FRAMEWORKS_JOINTLY:
|
||||
mixed_mask = (df["framework"].isin(["MDGs", "SDGs"])) & (df["year"] >= self.sdgs_start_year)
|
||||
if mixed_mask.any():
|
||||
df.loc[mixed_mask, "framework_score_1_100"] = global_minmax(df.loc[mixed_mask, "framework_norm"])
|
||||
# PERBAIKAN: Rescale framework_score_1_100 dari SATU POOL BERSAMA
|
||||
# untuk semua framework (MDGs mixed + SDGs) sekaligus.
|
||||
# Ini memastikan skor 60 di MDGs dan skor 60 di SDGs memiliki makna
|
||||
# yang sama: posisi relatif yang sama dalam distribusi gabungan.
|
||||
mixed_mask = df["framework"].isin(["MDGs", "SDGs"])
|
||||
mixed_pre_mask = (df["framework"] == "MDGs") & (df["year"] < self.sdgs_start_year)
|
||||
|
||||
# Rescale pre-SDGs MDGs dari pool Total (sudah dihitung)
|
||||
# → sudah ada di agg_total (framework_score_1_100 = dari country_composite)
|
||||
|
||||
# Rescale MDGs mixed + SDGs dari SATU POOL BERSAMA
|
||||
post_sdgs_mask = mixed_mask & ~mixed_pre_mask & df["framework_norm"].notna()
|
||||
if post_sdgs_mask.any():
|
||||
df.loc[post_sdgs_mask, "framework_score_1_100"] = global_minmax(
|
||||
df.loc[post_sdgs_mask, "framework_norm"]
|
||||
)
|
||||
|
||||
df = check_and_dedup(df, ["country_id", "framework", "year"], context=table_name, logger=self.logger)
|
||||
|
||||
# Pastikan kolom framework_score_1_100 ada untuk semua baris
|
||||
if "framework_score_1_100" not in df.columns:
|
||||
df["framework_score_1_100"] = np.nan
|
||||
|
||||
df["rank_in_framework_year"] = (
|
||||
df.groupby(["framework", "year"])["framework_score_1_100"]
|
||||
.rank(method="min", ascending=False)
|
||||
@@ -797,6 +891,9 @@ class FoodSecurityAggregator:
|
||||
df = add_condition_column(df, "framework_score_1_100")
|
||||
log_condition_summary(df, table_name, self.logger)
|
||||
|
||||
# Log diagnostik: bandingkan skor MDGs vs SDGs
|
||||
self._log_framework_score_diagnostics(df, table_name)
|
||||
|
||||
df["country_id"] = df["country_id"].astype(int)
|
||||
df["year"] = df["year"].astype(int)
|
||||
df["n_indicators"] = safe_int(df["n_indicators"], col_name="n_indicators", logger=self.logger)
|
||||
@@ -828,6 +925,9 @@ class FoodSecurityAggregator:
|
||||
# =========================================================================
|
||||
# STEP 5: agg_framework_asean
|
||||
# =========================================================================
|
||||
# PERBAIKAN: Sama dengan framework_by_country — tidak ada rescaling terpisah
|
||||
# per framework. MDGs mixed dan SDGs di-rescale dari satu pool bersama.
|
||||
# =========================================================================
|
||||
|
||||
def calc_framework_asean(self) -> pd.DataFrame:
|
||||
table_name = "agg_framework_asean"
|
||||
@@ -835,6 +935,10 @@ class FoodSecurityAggregator:
|
||||
self.logger.info("\n" + "=" * 70)
|
||||
self.logger.info(f"STEP 5: {table_name}")
|
||||
self.logger.info("=" * 70)
|
||||
self.logger.info(
|
||||
" [PERBAIKAN] MDGs mixed + SDGs di-rescale dari SATU POOL BERSAMA."
|
||||
"\n Skor ASEAN MDGs dan SDGs sekarang comparable."
|
||||
)
|
||||
|
||||
df_normed = self._get_norm_value_df()
|
||||
country_composite = self._calc_country_composite_inmemory()
|
||||
@@ -847,14 +951,18 @@ class FoodSecurityAggregator:
|
||||
)
|
||||
asean_overall = (
|
||||
country_norm.groupby("year")
|
||||
.agg(asean_norm=("country_norm", "mean"), std_norm=("country_norm", "std"), n_countries=("country_norm", "count"))
|
||||
.agg(
|
||||
asean_norm =("country_norm", "mean"),
|
||||
std_norm =("country_norm", "std"),
|
||||
n_countries =("country_norm", "count")
|
||||
)
|
||||
.reset_index()
|
||||
)
|
||||
asean_overall["asean_score_1_100"] = global_minmax(asean_overall["asean_norm"])
|
||||
|
||||
parts = []
|
||||
|
||||
# Layer TOTAL
|
||||
# ── Layer TOTAL ───────────────────────────────────────────────────────
|
||||
total_cols = asean_overall[["year", "asean_score_1_100", "asean_norm", "std_norm", "n_countries"]].copy()
|
||||
total_cols = total_cols.rename(columns={
|
||||
"asean_score_1_100": "framework_score_1_100",
|
||||
@@ -866,7 +974,7 @@ class FoodSecurityAggregator:
|
||||
total_cols["framework"] = "Total"
|
||||
parts.append(total_cols)
|
||||
|
||||
# Layer MDGs pre-SDGs
|
||||
# ── Layer MDGs pre-SDGs ───────────────────────────────────────────────
|
||||
pre_sdgs = asean_overall[asean_overall["year"] < self.sdgs_start_year].copy()
|
||||
if not pre_sdgs.empty:
|
||||
mdgs_pre = pre_sdgs[["year", "asean_score_1_100", "asean_norm", "std_norm", "n_countries"]].copy()
|
||||
@@ -884,7 +992,9 @@ class FoodSecurityAggregator:
|
||||
mdgs_pre["framework"] = "MDGs"
|
||||
parts.append(mdgs_pre)
|
||||
|
||||
# Layer MDGs mixed
|
||||
# ── Siapkan MDGs mixed dan SDGs untuk rescaling BERSAMA ───────────────
|
||||
mixed_parts = []
|
||||
|
||||
if self.mdgs_indicator_ids:
|
||||
df_mdgs_mixed = df_normed[
|
||||
(df_normed["indicator_id"].isin(self.mdgs_indicator_ids)) &
|
||||
@@ -902,12 +1012,9 @@ class FoodSecurityAggregator:
|
||||
).reset_index()
|
||||
n_ind_mdgs = df_mdgs_mixed.groupby("year")["indicator_id"].nunique().reset_index().rename(columns={"indicator_id": "n_indicators"})
|
||||
asean_mdgs = asean_mdgs.merge(n_ind_mdgs, on="year", how="left")
|
||||
if not NORMALIZE_FRAMEWORKS_JOINTLY:
|
||||
asean_mdgs["framework_score_1_100"] = global_minmax(asean_mdgs["framework_norm"])
|
||||
asean_mdgs["framework"] = "MDGs"
|
||||
parts.append(asean_mdgs)
|
||||
mixed_parts.append(asean_mdgs)
|
||||
|
||||
# Layer SDGs
|
||||
if self.sdgs_indicator_ids:
|
||||
df_sdgs = df_normed[
|
||||
(df_normed["indicator_id"].isin(self.sdgs_indicator_ids)) &
|
||||
@@ -925,23 +1032,25 @@ class FoodSecurityAggregator:
|
||||
).reset_index()
|
||||
n_ind_sdgs = df_sdgs.groupby("year")["indicator_id"].nunique().reset_index().rename(columns={"indicator_id": "n_indicators"})
|
||||
asean_sdgs = asean_sdgs.merge(n_ind_sdgs, on="year", how="left")
|
||||
if not NORMALIZE_FRAMEWORKS_JOINTLY:
|
||||
asean_sdgs["framework_score_1_100"] = global_minmax(asean_sdgs["framework_norm"])
|
||||
asean_sdgs["framework"] = "SDGs"
|
||||
parts.append(asean_sdgs)
|
||||
mixed_parts.append(asean_sdgs)
|
||||
|
||||
# PERBAIKAN: Rescale MDGs mixed + SDGs dari SATU POOL BERSAMA
|
||||
if mixed_parts:
|
||||
df_mixed = pd.concat(mixed_parts, ignore_index=True)
|
||||
df_mixed["framework_score_1_100"] = global_minmax(df_mixed["framework_norm"])
|
||||
parts.append(df_mixed)
|
||||
|
||||
df = pd.concat(parts, ignore_index=True)
|
||||
|
||||
if NORMALIZE_FRAMEWORKS_JOINTLY:
|
||||
mixed_mask = (df["framework"].isin(["MDGs", "SDGs"])) & (df["year"] >= self.sdgs_start_year)
|
||||
if mixed_mask.any():
|
||||
df.loc[mixed_mask, "framework_score_1_100"] = global_minmax(df.loc[mixed_mask, "framework_norm"])
|
||||
|
||||
df = check_and_dedup(df, ["framework", "year"], context=table_name, logger=self.logger)
|
||||
df = add_yoy(df, ["framework"], "framework_score_1_100")
|
||||
df = add_condition_column(df, "framework_score_1_100")
|
||||
log_condition_summary(df, table_name, self.logger)
|
||||
|
||||
# Log diagnostik: bandingkan skor ASEAN MDGs vs SDGs
|
||||
self._log_framework_score_diagnostics(df, table_name)
|
||||
|
||||
df["year"] = df["year"].astype(int)
|
||||
df["n_indicators"] = safe_int(df["n_indicators"], col_name="n_indicators", logger=self.logger)
|
||||
df["n_countries_with_data"] = safe_int(df["n_countries_with_data"], col_name="n_countries_with_data", logger=self.logger)
|
||||
@@ -1164,9 +1273,34 @@ class FoodSecurityAggregator:
|
||||
return df
|
||||
|
||||
# =========================================================================
|
||||
# HELPERS
|
||||
# DIAGNOSTIK & VALIDASI
|
||||
# =========================================================================
|
||||
|
||||
def _log_framework_score_diagnostics(self, df: pd.DataFrame, context: str):
|
||||
"""
|
||||
Log perbandingan rata-rata skor per framework.
|
||||
Setelah perbaikan, gap antar framework mencerminkan perbedaan substantif,
|
||||
bukan artefak normalisasi.
|
||||
"""
|
||||
self.logger.info(f"\n [DIAGNOSTIK] Rata-rata skor per framework ({context}):")
|
||||
fw_means = df.groupby("framework")["framework_score_1_100"].agg(['mean', 'min', 'max']).round(2)
|
||||
for fw, row in fw_means.iterrows():
|
||||
self.logger.info(
|
||||
f" {fw:<8} mean={row['mean']:>6.2f} "
|
||||
f"range=[{row['min']:.2f}, {row['max']:.2f}]"
|
||||
)
|
||||
|
||||
if "MDGs" in fw_means.index and "SDGs" in fw_means.index:
|
||||
gap = fw_means.loc["MDGs", "mean"] - fw_means.loc["SDGs", "mean"]
|
||||
self.logger.info(
|
||||
f"\n Gap MDGs-SDGs = {gap:.2f} poin"
|
||||
+ (
|
||||
" → SUBSTANTIF (indikator SDGs mengukur deprivasi lebih dalam)"
|
||||
if abs(gap) > 10 else
|
||||
" → dalam batas wajar"
|
||||
)
|
||||
)
|
||||
|
||||
def _validate_mdgs_equals_total(self, df: pd.DataFrame, level: str = ""):
|
||||
self.logger.info(f"\n Validasi MDGs < {self.sdgs_start_year} == Total [{level}]:")
|
||||
group_by = ["year"] if level.startswith("asean") else ["country_id", "year"]
|
||||
@@ -1202,6 +1336,10 @@ class FoodSecurityAggregator:
|
||||
self.logger.info("\n" + "=" * 70)
|
||||
self.logger.info("FOOD SECURITY AGGREGATION — 6 TABLES -> fs_asean_gold")
|
||||
self.logger.info(f" Condition threshold: bad<{THRESHOLD_BAD}, good>{THRESHOLD_GOOD}")
|
||||
self.logger.info(
|
||||
" NORMALISASI: norm_value dari analytical_layer (satu referensi global)."
|
||||
"\n Tidak ada rescaling per-framework. MDGs dan SDGs comparable."
|
||||
)
|
||||
self.logger.info("=" * 70)
|
||||
|
||||
self.load_data()
|
||||
@@ -1250,6 +1388,8 @@ if __name__ == "__main__":
|
||||
print("=" * 70)
|
||||
print("FOOD SECURITY AGGREGATION -> fs_asean_gold")
|
||||
print(f"Condition threshold: bad<{THRESHOLD_BAD}, moderate {THRESHOLD_BAD}-{THRESHOLD_GOOD}, good>{THRESHOLD_GOOD}")
|
||||
print("NORMALISASI: satu referensi global per indikator (dari analytical_layer).")
|
||||
print("Tidak ada rescaling per-framework. MDGs dan SDGs comparable.")
|
||||
print("=" * 70)
|
||||
|
||||
logger = setup_logging()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user