chore: remove redundant scraper.py (replaced by fetch-github-metadata.js)

2026-04-17 21:53:12 +02:00 · 2026-03-03 21:25:03 +05:30
parent 42b2eb3764
commit ddb9b71f3f
1 changed files with 0 additions and 83 deletions
--- a/scraper/scraper.py
+++ b/scraper/scraper.py
@@ -1,83 +0,0 @@
-import json
-import urllib.request
-import urllib.error
-import os
-import time
-
-# Paths
-BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-DATA_FILE = os.path.join(BASE_DIR, 'data', 'tools.json')
-
-def load_data():
-    with open(DATA_FILE, 'r') as f:
-        return json.load(f)
-
-def save_data(data):
-    with open(DATA_FILE, 'w') as f:
-        json.dump(data, f, indent=2)
-
-def fetch_github_stats(repo_name):
-    print(f"Fetching stats for {repo_name}...")
-    url = f"https://api.github.com/repos/{repo_name}"
-    req = urllib.request.Request(url)
-    # GitHub requires a User-Agent
-    req.add_header('User-Agent', 'Python-Urllib-Scraper')
-    
-    # Use GITHUB_TOKEN if available to avoid rate limits
-    token = os.environ.get('GITHUB_TOKEN')
-    if token:
-        req.add_header('Authorization', f'token {token}')
-    
-    try:
-        with urllib.request.urlopen(req) as response:
-            if response.status == 200:
-                data = json.loads(response.read().decode())
-                return {
-                    "stars": data.get("stargazers_count", 0),
-                    "description": data.get("description", ""),
-                    "last_commit": data.get("pushed_at", ""),
-                    "language": data.get("language", ""),
-                    "license": data.get("license", {}).get("name", "Unknown") if data.get("license") else "None"
-                }
-            else:
-                print(f"Error fetching {repo_name}: {response.status}")
-                return None
-    except urllib.error.HTTPError as e:
-        print(f"HTTP Error fetching {repo_name}: {e.code}")
-        # If rate limited, print a warning
-        if e.code == 403:
-            print("Warning: API Rate limited. Use GITHUB_TOKEN for higher limits.")
-        return None
-    except Exception as e:
-        print(f"Error fetching {repo_name}: {e}")
-        return None
-
-def main():
-    tools = load_data()
-    updated = False
-    
-    for tool in tools:
-        if tool.get("is_open_source") and tool.get("github_repo"):
-            stats = fetch_github_stats(tool["github_repo"])
-            if stats:
-                tool.update(stats)
-                updated = True
-            time.sleep(0.5) # Be nice to API, slightly faster with token
-    
-    # Save to root data folder
-    with open(DATA_FILE, 'w') as f:
-        json.dump(tools, f, indent=2)
-    
-    # Save to web data folder if it exists
-    web_data_path = os.path.join(BASE_DIR, 'web', 'data', 'tools.json')
-    if os.path.exists(os.path.dirname(web_data_path)):
-        with open(web_data_path, 'w') as f:
-            json.dump(tools, f, indent=2)
-    
-    if updated:
-        print("Data updated successfully!")
-    else:
-        print("No updates found.")
-
-if __name__ == "__main__":
-    main()