From ddb9b71f3fa66037a2cfd31f94d5fbb7a1cbf285 Mon Sep 17 00:00:00 2001 From: aa_humaaan Date: Tue, 3 Mar 2026 21:25:03 +0530 Subject: [PATCH] chore: remove redundant scraper.py (replaced by fetch-github-metadata.js) --- scraper/scraper.py | 83 ---------------------------------------------- 1 file changed, 83 deletions(-) delete mode 100644 scraper/scraper.py diff --git a/scraper/scraper.py b/scraper/scraper.py deleted file mode 100644 index b71e65a..0000000 --- a/scraper/scraper.py +++ /dev/null @@ -1,83 +0,0 @@ -import json -import urllib.request -import urllib.error -import os -import time - -# Paths -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -DATA_FILE = os.path.join(BASE_DIR, 'data', 'tools.json') - -def load_data(): - with open(DATA_FILE, 'r') as f: - return json.load(f) - -def save_data(data): - with open(DATA_FILE, 'w') as f: - json.dump(data, f, indent=2) - -def fetch_github_stats(repo_name): - print(f"Fetching stats for {repo_name}...") - url = f"https://api.github.com/repos/{repo_name}" - req = urllib.request.Request(url) - # GitHub requires a User-Agent - req.add_header('User-Agent', 'Python-Urllib-Scraper') - - # Use GITHUB_TOKEN if available to avoid rate limits - token = os.environ.get('GITHUB_TOKEN') - if token: - req.add_header('Authorization', f'token {token}') - - try: - with urllib.request.urlopen(req) as response: - if response.status == 200: - data = json.loads(response.read().decode()) - return { - "stars": data.get("stargazers_count", 0), - "description": data.get("description", ""), - "last_commit": data.get("pushed_at", ""), - "language": data.get("language", ""), - "license": data.get("license", {}).get("name", "Unknown") if data.get("license") else "None" - } - else: - print(f"Error fetching {repo_name}: {response.status}") - return None - except urllib.error.HTTPError as e: - print(f"HTTP Error fetching {repo_name}: {e.code}") - # If rate limited, print a warning - if e.code == 403: - print("Warning: API Rate limited. Use GITHUB_TOKEN for higher limits.") - return None - except Exception as e: - print(f"Error fetching {repo_name}: {e}") - return None - -def main(): - tools = load_data() - updated = False - - for tool in tools: - if tool.get("is_open_source") and tool.get("github_repo"): - stats = fetch_github_stats(tool["github_repo"]) - if stats: - tool.update(stats) - updated = True - time.sleep(0.5) # Be nice to API, slightly faster with token - - # Save to root data folder - with open(DATA_FILE, 'w') as f: - json.dump(tools, f, indent=2) - - # Save to web data folder if it exists - web_data_path = os.path.join(BASE_DIR, 'web', 'data', 'tools.json') - if os.path.exists(os.path.dirname(web_data_path)): - with open(web_data_path, 'w') as f: - json.dump(tools, f, indent=2) - - if updated: - print("Data updated successfully!") - else: - print("No updates found.") - -if __name__ == "__main__": - main()