import requests import pandas as pd def run_scraping_logic(): print("--- MEMULAI PROSES PENARIKAN DATA DUMMY ---") # URL API Publik (Data Universitas) url = "http://universities.hipolabs.com/search?country=Indonesia" try: # 1. Extract response = requests.get(url, timeout=10) data = response.json() # 2. Transform (Gunakan Pandas) df = pd.DataFrame(data) # Kita ambil 5 data teratas saja untuk ditampilkan di log preview_data = df[['name', 'web_pages']].head(5) # 3. Load (Simulasi: Tampilkan ke Log Airflow) print("HASIL PENARIKAN DATA (5 Teratas):") print("====================================================") print(preview_data.to_string(index=False)) print("====================================================") print(f"Total data yang berhasil ditarik: {len(df)} baris.") return True except Exception as e: print(f"TERJADI KESALAHAN: {str(e)}") return False