Files
airflow-coolify/scripts/test_data.py
2026-03-07 17:17:27 +07:00

37 lines
1.3 KiB
Python

import requests
import zipfile
import io
import pandas as pd
from scripts.bigquery_config import get_bigquery_client
from scripts.bigquery_helpers import load_to_bigquery, read_from_bigquery
def run_fao_test():
print("--- MEMULAI TEST LOAD FAO KE BIGQUERY ---")
# 1. Extract
url = "https://bulks-faostat.fao.org/production/Food_Security_Data_E_All_Data_(Normalized).zip"
response = requests.get(url, timeout=120)
with zipfile.ZipFile(io.BytesIO(response.content)) as z:
csv_name = [f for f in z.namelist() if f.endswith('.csv')][0]
df = pd.read_csv(z.open(csv_name), encoding='latin-1')
# Ambil 5 baris teratas
df_top5 = df.head(5)
print("HASIL 5 DATA TERATAS:")
print("====================================================")
print(df_top5.to_string(index=False))
print("====================================================")
# 2. Load ke BigQuery
client = get_bigquery_client()
load_to_bigquery(client, df_top5, "raw_fao_test", layer="bronze", write_disposition="WRITE_TRUNCATE")
print(f"Total data yang berhasil di-load: {len(df_top5)} baris.")
# 3. Verify
df_check = read_from_bigquery(client, "raw_fao_test", layer="bronze")
print(f"Verify dari BigQuery: {len(df_check)} baris tersimpan.")
if __name__ == "__main__":
run_fao_test()