replace sklearn with pure numpy

This commit is contained in:
Debby
2026-03-15 00:15:53 +07:00
parent 4b617a1e8f
commit a4ff15677e
2 changed files with 10 additions and 861 deletions

View File

@@ -24,7 +24,6 @@ from scripts.bigquery_helpers import (
save_etl_metadata,
)
from google.cloud import bigquery
from sklearn.preprocessing import MinMaxScaler
# =============================================================================
@@ -87,12 +86,10 @@ def global_minmax(series: pd.Series, lo: float = 1.0, hi: float = 100.0) -> pd.S
v_min, v_max = values.min(), values.max()
if v_min == v_max:
return pd.Series((lo + hi) / 2.0, index=series.index)
scaler = MinMaxScaler(feature_range=(lo, hi))
result = np.full(len(series), np.nan)
result = np.full(len(series), np.nan)
not_nan = series.notna()
result[not_nan.values] = scaler.fit_transform(
series[not_nan].values.reshape(-1, 1)
).flatten()
raw = series[not_nan].values
result[not_nan.values] = lo + (raw - v_min) / (v_max - v_min) * (hi - lo)
return pd.Series(result, index=series.index)
@@ -274,11 +271,13 @@ class FoodSecurityAggregator:
norm_parts.append(grp)
continue
scaler = MinMaxScaler(feature_range=(0, 1))
raw = grp.loc[valid_mask, "value"].values
v_min, v_max = raw.min(), raw.max()
normed = np.full(len(grp), np.nan)
normed[valid_mask.values] = scaler.fit_transform(
grp.loc[valid_mask, ["value"]]
).flatten()
if v_min == v_max:
normed[valid_mask.values] = 0.5
else:
normed[valid_mask.values] = (raw - v_min) / (v_max - v_min)
if do_invert:
normed = np.where(np.isnan(normed), np.nan, 1.0 - normed)
@@ -757,7 +756,7 @@ if __name__ == "__main__":
_sys.stderr = io.TextIOWrapper(_sys.stderr.buffer, encoding="utf-8", errors="replace")
print("=" * 70)
print("FOOD SECURITY AGGREGATION v8.0 — 4 TABLES -> fs_asean_gold")
print("FOOD SECURITY AGGREGATION 4 TABLES -> fs_asean_gold")
print(f" NORMALIZE_FRAMEWORKS_JOINTLY = {NORMALIZE_FRAMEWORKS_JOINTLY}")
print("=" * 70)