new narrative teks

This commit is contained in:
Debby
2026-04-22 16:02:05 +07:00
parent 40528766bd
commit f9d013f8e6
2 changed files with 206 additions and 464 deletions

View File

@@ -219,113 +219,58 @@ def _build_overview_narrative(
most_declined_country,
most_declined_delta,
) -> str:
# Sentence 1: indicator breakdown
parts_ind = []
"""
Narrative format (no em-dash):
In {year}, ASEAN scored {score} ({performance}) across {n_total} indicators
({n_mdg} MDGs, {n_sdg} SDGs). Score {increased/decreased} by {delta} pts from
{prev_year} ({prev_score}). {top_country} led the region; {bottom_country} ranked
last. Biggest gain: {country}; biggest drop: {country}.
"""
# Sentence 1: score + performance + indicators
ind_parts = []
if n_mdg > 0:
parts_ind.append(f"{n_mdg} MDG indicator{'s' if n_mdg > 1 else ''}")
ind_parts.append(f"**{n_mdg} MDGs**")
if n_sdg > 0:
parts_ind.append(f"{n_sdg} SDG indicator{'s' if n_sdg > 1 else ''}")
ind_parts.append(f"**{n_sdg} SDGs**")
ind_detail = f" ({', '.join(ind_parts)})" if ind_parts else ""
if parts_ind:
ind_detail = " and ".join(parts_ind)
sent1 = (
f"In {year}, the ASEAN food security assessment incorporated a total of "
f"{n_total_ind} indicator{'s' if n_total_ind != 1 else ''}, "
f"consisting of {ind_detail}."
)
else:
sent1 = (
f"In {year}, the ASEAN food security assessment incorporated "
f"{n_total_ind} indicator{'s' if n_total_ind != 1 else ''}."
)
# Sentence 2: score + performance status + YoY
status_phrase = (
f"classified as \"{performance_status}\" performance "
f"(threshold: {PERFORMANCE_THRESHOLD:.0f})"
sent1 = (
f"In **{year}**, ASEAN scored **{_fmt_score(score)}** (*{performance_status}*) "
f"across **{n_total_ind} indicators**{ind_detail}."
)
# Sentence 2: YoY
if yoy_val is not None and prev_score is not None:
direction_word = "increasing" if yoy_val >= 0 else "decreasing"
pct_clause = ""
if yoy_pct is not None:
abs_pct = abs(yoy_pct)
trend_word = "improvement" if yoy_val >= 0 else "decline"
pct_clause = f", representing a {abs_pct:.2f}% {trend_word} year-over-year"
status_change = ""
if prev_performance_status not in ("N/A", None) and prev_performance_status != performance_status:
status_change = (
f" This marks a shift from \"{prev_performance_status}\" in {prev_year} "
f"to \"{performance_status}\" in {year}."
)
direction_word = "increased" if yoy_val >= 0 else "decreased"
sent2 = (
f"The ASEAN overall score (Total framework) reached {_fmt_score(score)}, "
f"{status_phrase}, {direction_word} by {abs(yoy_val):.2f} points compared to "
f"{prev_year} ({_fmt_score(prev_score)}, \"{prev_performance_status}\"){pct_clause}.{status_change}"
f"Score {direction_word} by **{abs(yoy_val):.2f} pts** "
f"from {prev_year} ({_fmt_score(prev_score)}, *{prev_performance_status}*)."
)
else:
sent2 = (
f"The ASEAN overall score (Total framework) reached {_fmt_score(score)} in {year}, "
f"{status_phrase}. No prior-year data is available for year-over-year comparison."
)
sent2 = "No prior-year data available for comparison."
# Sentence 3: country ranking
sent3 = ""
if ranking_list:
first = ranking_list[0]
last = ranking_list[-1]
middle = ranking_list[1:-1]
first = ranking_list[0]
last = ranking_list[-1]
if len(ranking_list) == 1:
sent3 = (
f"In terms of country performance, {first['country_name']} was the only "
f"country assessed, scoring {_fmt_score(first['score'])} in {year}."
)
elif len(ranking_list) == 2:
sent3 = (
f"In terms of country performance, {first['country_name']} led the region "
f"with a score of {_fmt_score(first['score'])}, while "
f"{last['country_name']} recorded the lowest score of "
f"{_fmt_score(last['score'])} in {year}."
)
sent3 = f"**{first['country_name']}** was the only country assessed ({_fmt_score(first['score'])})."
else:
middle_parts = [
f"{c['country_name']} ({_fmt_score(c['score'])})" for c in middle
]
if len(middle_parts) == 1:
middle_str = middle_parts[0]
else:
middle_str = ", ".join(middle_parts[:-1]) + f", and {middle_parts[-1]}"
sent3 = (
f"In terms of country performance, {first['country_name']} led the region "
f"with a score of {_fmt_score(first['score'])}, followed by {middle_str}. "
f"At the other end, {last['country_name']} recorded the lowest score "
f"of {_fmt_score(last['score'])} in {year}."
f"**{first['country_name']}** led the region ({_fmt_score(first['score'])}); "
f"**{last['country_name']}** ranked last ({_fmt_score(last['score'])})."
)
# Sentence 4: most improved / declined country
# Sentence 4: most improved / declined
sent4_parts = []
if most_improved_country and most_improved_delta is not None:
sent4_parts.append(
f"the most notable improvement was seen in {most_improved_country}, "
f"which gained {_fmt_delta(most_improved_delta)} points from the previous year"
)
sent4_parts.append(f"Biggest gain: **{most_improved_country}** ({_fmt_delta(most_improved_delta)} pts)")
if most_declined_country and most_declined_delta is not None:
if most_declined_delta < 0:
sent4_parts.append(
f"while {most_declined_country} experienced the largest decline "
f"of {_fmt_delta(most_declined_delta)} points"
)
else:
sent4_parts.append(
f"while {most_declined_country} recorded the smallest gain "
f"of {_fmt_delta(most_declined_delta)} points"
)
sent4 = ""
if sent4_parts:
sent4 = ", ".join(sent4_parts) + "."
sent4_parts.append(f"biggest drop: **{most_declined_country}** ({_fmt_delta(most_declined_delta)} pts)")
sent4 = ("; ".join(sent4_parts) + ".") if sent4_parts else ""
if sent4:
sent4 = sent4[0].upper() + sent4[1:]
return " ".join(s for s in [sent1, sent2, sent3, sent4] if s)
@@ -351,70 +296,55 @@ def _build_pillar_narrative(
most_declined_pillar,
most_declined_delta,
) -> str:
"""
Narrative format (no em-dash):
In {year}, {pillar} ranked {rank}/{n} with score {score}, {up/down} {delta} pts YoY.
Top country: {top_country}; bottom: {bot_country}.
Strongest pillar: {pillar}; weakest: {pillar}.
"""
rank_suffix = {1: "st", 2: "nd", 3: "rd"}.get(rank_in_year, "th")
# Sentence 1: rank + score + YoY
if yoy_val is not None:
direction_word = "up" if yoy_val >= 0 else "down"
yoy_clause = f", {direction_word} **{abs(yoy_val):.2f} pts** YoY"
else:
yoy_clause = ", no prior-year data"
sent1 = (
f"In {year}, the {pillar_name} pillar scored {_fmt_score(pillar_score)}, "
f"ranking {rank_in_year}{rank_suffix} out of {n_pillars} pillars assessed across ASEAN."
f"In **{year}**, **{pillar_name}** ranked **{rank_in_year}{rank_suffix}/{n_pillars}** "
f"with score **{_fmt_score(pillar_score)}**{yoy_clause}."
)
# Sentence 2: top / bottom country
sent2 = ""
if strongest_pillar and weakest_pillar:
if strongest_pillar == pillar_name:
sent2 = (
f"This made {pillar_name} the strongest performing pillar in {year}, "
f"compared to the weakest pillar, {weakest_pillar}, "
f"which scored {_fmt_score(weakest_score)}."
)
elif weakest_pillar == pillar_name:
sent2 = (
f"This made {pillar_name} the weakest performing pillar in {year}, "
f"compared to the strongest pillar, {strongest_pillar}, "
f"which scored {_fmt_score(strongest_score)}."
)
else:
sent2 = (
f"Across all pillars in {year}, {strongest_pillar} was the strongest "
f"(score: {_fmt_score(strongest_score)}), while {weakest_pillar} "
f"was the weakest (score: {_fmt_score(weakest_score)})."
)
sent3 = ""
if top_country and bot_country:
if top_country != bot_country:
sent3 = (
f"Within the {pillar_name} pillar, {top_country} led with a score of "
f"{_fmt_score(top_country_score)}, while {bot_country} recorded the lowest "
f"score of {_fmt_score(bot_country_score)}."
sent2 = (
f"Top country: **{top_country}** ({_fmt_score(top_country_score)}); "
f"bottom: **{bot_country}** ({_fmt_score(bot_country_score)})."
)
else:
sent3 = (
f"Within the {pillar_name} pillar, {top_country} was the only country "
f"with available data, scoring {_fmt_score(top_country_score)}."
)
sent2 = f"**{top_country}** was the only country with data ({_fmt_score(top_country_score)})."
if yoy_val is not None:
direction_word = "improved" if yoy_val >= 0 else "declined"
sent4 = (
f"Compared to the previous year, the {pillar_name} pillar "
f"{direction_word} by {abs(yoy_val):.2f} points"
)
else:
sent4 = (
f"No prior-year data is available to calculate year-over-year change "
f"for the {pillar_name} pillar in {year}"
# Sentence 3: strongest / weakest pillar
sent3 = ""
if strongest_pillar and weakest_pillar:
sent3 = (
f"Strongest pillar: **{strongest_pillar}** ({_fmt_score(strongest_score)}); "
f"weakest: **{weakest_pillar}** ({_fmt_score(weakest_score)})."
)
if (most_improved_pillar and most_improved_delta is not None
and most_declined_pillar and most_declined_delta is not None
and most_improved_pillar != most_declined_pillar):
sent4 += (
f". Across all pillars, {most_improved_pillar} showed the greatest improvement "
f"({_fmt_delta(most_improved_delta)} pts), while {most_declined_pillar} "
f"recorded the largest decline ({_fmt_delta(most_declined_delta)} pts)"
)
sent4 += "."
sent4 = sent4[0].upper() + sent4[1:]
# Sentence 4: most improved / declined pillar
sent4_parts = []
if most_improved_pillar and most_improved_delta is not None:
sent4_parts.append(f"Best gain: **{most_improved_pillar}** ({_fmt_delta(most_improved_delta)} pts)")
if most_declined_pillar and most_declined_delta is not None:
sent4_parts.append(f"largest drop: **{most_declined_pillar}** ({_fmt_delta(most_declined_delta)} pts)")
sent4 = ("; ".join(sent4_parts) + ".") if sent4_parts else ""
if sent4:
sent4 = sent4[0].upper() + sent4[1:]
return " ".join(s for s in [sent1, sent2, sent3, sent4] if s)
@@ -610,10 +540,6 @@ class FoodSecurityAggregator:
# =========================================================================
# METADATA BUILDER
# Menyesuaikan dengan signature: save_etl_metadata(client, metadata: dict)
# dan skema etl_metadata: source_class, table_name, execution_timestamp,
# duration_seconds, rows_fetched, rows_transformed, rows_loaded,
# completeness_pct, config_snapshot, validation_metrics
# =========================================================================
def _build_etl_metadata(
@@ -1419,50 +1345,7 @@ class FoodSecurityAggregator:
status = "OK (identik)" if max_diff < 0.01 else f"MISMATCH! max_diff={max_diff:.6f}"
self.logger.info(f" -> {status} (n_checked={len(check)})")
def _build_etl_metadata(
self,
table_name: str,
rows_loaded: int,
start_time: datetime,
end_time: datetime,
status: str,
error_msg: str = None,
) -> dict:
"""
Susun dict metadata sesuai signature save_etl_metadata(client, metadata: dict)
dan kolom skema etl_metadata di bigquery_helpers.py:
source_class, table_name, execution_timestamp, duration_seconds,
rows_fetched, rows_transformed, rows_loaded, completeness_pct,
config_snapshot, validation_metrics
"""
duration = (end_time - start_time).total_seconds() if (start_time and end_time) else 0.0
return {
"source_class" : "FoodSecurityAggregator",
"table_name" : table_name,
"execution_timestamp": start_time or end_time,
"duration_seconds" : round(duration, 4),
"rows_fetched" : rows_loaded,
"rows_transformed" : rows_loaded,
"rows_loaded" : rows_loaded,
"completeness_pct" : 100.0 if status == "success" else 0.0,
"config_snapshot" : json.dumps({
"layer" : "gold",
"write_disposition" : "WRITE_TRUNCATE",
"normalize_frameworks_jointly": NORMALIZE_FRAMEWORKS_JOINTLY,
"performance_threshold" : PERFORMANCE_THRESHOLD,
"status" : status,
}),
"validation_metrics" : json.dumps({
"status" : status,
"error_msg": error_msg or "",
}),
}
def _finalize(self, table_name: str, rows_loaded: int):
"""
Tandai tabel sukses. Catat ke etl_logs dan etl_metadata.
Pemanggilan: save_etl_metadata(client, metadata_dict)
"""
end_time = datetime.now()
start_time = self.load_metadata[table_name].get("start_time")
@@ -1486,7 +1369,6 @@ class FoodSecurityAggregator:
)
)
except Exception as meta_err:
# Error metadata tidak boleh menghentikan pipeline
self.logger.warning(
f" [METADATA WARNING] Gagal simpan etl_metadata untuk {table_name}: {meta_err}"
)
@@ -1494,10 +1376,6 @@ class FoodSecurityAggregator:
self.logger.info(f" [OK] {table_name}: {rows_loaded:,} rows -> [Gold] fs_asean_gold")
def _fail(self, table_name: str, error: Exception):
"""
Tandai tabel gagal. Catat ke etl_logs dan etl_metadata.
Pemanggilan: save_etl_metadata(client, metadata_dict)
"""
end_time = datetime.now()
start_time = self.load_metadata[table_name].get("start_time")
error_msg = str(error)
@@ -1579,10 +1457,6 @@ class FoodSecurityAggregator:
# =============================================================================
def run_aggregation():
"""
Airflow task: Hitung semua agregasi dari fact_asean_food_security_selected.
Dipanggil setelah analytical_layer_to_gold selesai.
"""
from scripts.bigquery_config import get_bigquery_client
client = get_bigquery_client()
agg = FoodSecurityAggregator(client)