31 lines
1.0 KiB
Python
31 lines
1.0 KiB
Python
import requests
|
|
import pandas as pd
|
|
|
|
def run_scraping_logic():
|
|
print("--- MEMULAI PROSES PENARIKAN DATA DUMMY ---")
|
|
|
|
# URL API Publik (Data Universitas)
|
|
url = "http://universities.hipolabs.com/search?country=Indonesia"
|
|
|
|
try:
|
|
# 1. Extract
|
|
response = requests.get(url, timeout=10)
|
|
data = response.json()
|
|
|
|
# 2. Transform (Gunakan Pandas)
|
|
df = pd.DataFrame(data)
|
|
|
|
# Kita ambil 5 data teratas saja untuk ditampilkan di log
|
|
preview_data = df[['name', 'web_pages']].head(5)
|
|
|
|
# 3. Load (Simulasi: Tampilkan ke Log Airflow)
|
|
print("HASIL PENARIKAN DATA (5 Teratas):")
|
|
print("====================================================")
|
|
print(preview_data.to_string(index=False))
|
|
print("====================================================")
|
|
print(f"Total data yang berhasil ditarik: {len(df)} baris.")
|
|
|
|
return True
|
|
except Exception as e:
|
|
print(f"TERJADI KESALAHAN: {str(e)}")
|
|
return False |