From ddb9b71f3fa66037a2cfd31f94d5fbb7a1cbf285 Mon Sep 17 00:00:00 2001
From: aa_humaaan <adityasharma9136@gmail.com>
Date: Tue, 3 Mar 2026 21:25:03 +0530
Subject: [PATCH] chore: remove redundant scraper.py (replaced by
 fetch-github-metadata.js)

---
 scraper/scraper.py | 83 ----------------------------------------------
 1 file changed, 83 deletions(-)
 delete mode 100644 scraper/scraper.py

diff --git a/scraper/scraper.py b/scraper/scraper.py
deleted file mode 100644
index b71e65a..0000000
--- a/scraper/scraper.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import json
-import urllib.request
-import urllib.error
-import os
-import time
-
-# Paths
-BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-DATA_FILE = os.path.join(BASE_DIR, 'data', 'tools.json')
-
-def load_data():
-    with open(DATA_FILE, 'r') as f:
-        return json.load(f)
-
-def save_data(data):
-    with open(DATA_FILE, 'w') as f:
-        json.dump(data, f, indent=2)
-
-def fetch_github_stats(repo_name):
-    print(f"Fetching stats for {repo_name}...")
-    url = f"https://api.github.com/repos/{repo_name}"
-    req = urllib.request.Request(url)
-    # GitHub requires a User-Agent
-    req.add_header('User-Agent', 'Python-Urllib-Scraper')
-    
-    # Use GITHUB_TOKEN if available to avoid rate limits
-    token = os.environ.get('GITHUB_TOKEN')
-    if token:
-        req.add_header('Authorization', f'token {token}')
-    
-    try:
-        with urllib.request.urlopen(req) as response:
-            if response.status == 200:
-                data = json.loads(response.read().decode())
-                return {
-                    "stars": data.get("stargazers_count", 0),
-                    "description": data.get("description", ""),
-                    "last_commit": data.get("pushed_at", ""),
-                    "language": data.get("language", ""),
-                    "license": data.get("license", {}).get("name", "Unknown") if data.get("license") else "None"
-                }
-            else:
-                print(f"Error fetching {repo_name}: {response.status}")
-                return None
-    except urllib.error.HTTPError as e:
-        print(f"HTTP Error fetching {repo_name}: {e.code}")
-        # If rate limited, print a warning
-        if e.code == 403:
-            print("Warning: API Rate limited. Use GITHUB_TOKEN for higher limits.")
-        return None
-    except Exception as e:
-        print(f"Error fetching {repo_name}: {e}")
-        return None
-
-def main():
-    tools = load_data()
-    updated = False
-    
-    for tool in tools:
-        if tool.get("is_open_source") and tool.get("github_repo"):
-            stats = fetch_github_stats(tool["github_repo"])
-            if stats:
-                tool.update(stats)
-                updated = True
-            time.sleep(0.5) # Be nice to API, slightly faster with token
-    
-    # Save to root data folder
-    with open(DATA_FILE, 'w') as f:
-        json.dump(tools, f, indent=2)
-    
-    # Save to web data folder if it exists
-    web_data_path = os.path.join(BASE_DIR, 'web', 'data', 'tools.json')
-    if os.path.exists(os.path.dirname(web_data_path)):
-        with open(web_data_path, 'w') as f:
-            json.dump(tools, f, indent=2)
-    
-    if updated:
-        print("Data updated successfully!")
-    else:
-        print("No updates found.")
-
-if __name__ == "__main__":
-    main()