replace sklearn with pure numpy
This commit is contained in:
@@ -24,7 +24,6 @@ from scripts.bigquery_helpers import (
|
||||
save_etl_metadata,
|
||||
)
|
||||
from google.cloud import bigquery
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -87,12 +86,10 @@ def global_minmax(series: pd.Series, lo: float = 1.0, hi: float = 100.0) -> pd.S
|
||||
v_min, v_max = values.min(), values.max()
|
||||
if v_min == v_max:
|
||||
return pd.Series((lo + hi) / 2.0, index=series.index)
|
||||
scaler = MinMaxScaler(feature_range=(lo, hi))
|
||||
result = np.full(len(series), np.nan)
|
||||
result = np.full(len(series), np.nan)
|
||||
not_nan = series.notna()
|
||||
result[not_nan.values] = scaler.fit_transform(
|
||||
series[not_nan].values.reshape(-1, 1)
|
||||
).flatten()
|
||||
raw = series[not_nan].values
|
||||
result[not_nan.values] = lo + (raw - v_min) / (v_max - v_min) * (hi - lo)
|
||||
return pd.Series(result, index=series.index)
|
||||
|
||||
|
||||
@@ -274,11 +271,13 @@ class FoodSecurityAggregator:
|
||||
norm_parts.append(grp)
|
||||
continue
|
||||
|
||||
scaler = MinMaxScaler(feature_range=(0, 1))
|
||||
raw = grp.loc[valid_mask, "value"].values
|
||||
v_min, v_max = raw.min(), raw.max()
|
||||
normed = np.full(len(grp), np.nan)
|
||||
normed[valid_mask.values] = scaler.fit_transform(
|
||||
grp.loc[valid_mask, ["value"]]
|
||||
).flatten()
|
||||
if v_min == v_max:
|
||||
normed[valid_mask.values] = 0.5
|
||||
else:
|
||||
normed[valid_mask.values] = (raw - v_min) / (v_max - v_min)
|
||||
|
||||
if do_invert:
|
||||
normed = np.where(np.isnan(normed), np.nan, 1.0 - normed)
|
||||
@@ -757,7 +756,7 @@ if __name__ == "__main__":
|
||||
_sys.stderr = io.TextIOWrapper(_sys.stderr.buffer, encoding="utf-8", errors="replace")
|
||||
|
||||
print("=" * 70)
|
||||
print("FOOD SECURITY AGGREGATION v8.0 — 4 TABLES -> fs_asean_gold")
|
||||
print("FOOD SECURITY AGGREGATION 4 TABLES -> fs_asean_gold")
|
||||
print(f" NORMALIZE_FRAMEWORKS_JOINTLY = {NORMALIZE_FRAMEWORKS_JOINTLY}")
|
||||
print("=" * 70)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user