chore: remove redundant scraper.py (replaced by fetch-github-metadata.js)

This commit is contained in:
aa_humaaan
2026-03-03 21:25:03 +05:30
parent 42b2eb3764
commit ddb9b71f3f

View File

@@ -1,83 +0,0 @@
import json
import urllib.request
import urllib.error
import os
import time
# Paths
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA_FILE = os.path.join(BASE_DIR, 'data', 'tools.json')
def load_data():
with open(DATA_FILE, 'r') as f:
return json.load(f)
def save_data(data):
with open(DATA_FILE, 'w') as f:
json.dump(data, f, indent=2)
def fetch_github_stats(repo_name):
print(f"Fetching stats for {repo_name}...")
url = f"https://api.github.com/repos/{repo_name}"
req = urllib.request.Request(url)
# GitHub requires a User-Agent
req.add_header('User-Agent', 'Python-Urllib-Scraper')
# Use GITHUB_TOKEN if available to avoid rate limits
token = os.environ.get('GITHUB_TOKEN')
if token:
req.add_header('Authorization', f'token {token}')
try:
with urllib.request.urlopen(req) as response:
if response.status == 200:
data = json.loads(response.read().decode())
return {
"stars": data.get("stargazers_count", 0),
"description": data.get("description", ""),
"last_commit": data.get("pushed_at", ""),
"language": data.get("language", ""),
"license": data.get("license", {}).get("name", "Unknown") if data.get("license") else "None"
}
else:
print(f"Error fetching {repo_name}: {response.status}")
return None
except urllib.error.HTTPError as e:
print(f"HTTP Error fetching {repo_name}: {e.code}")
# If rate limited, print a warning
if e.code == 403:
print("Warning: API Rate limited. Use GITHUB_TOKEN for higher limits.")
return None
except Exception as e:
print(f"Error fetching {repo_name}: {e}")
return None
def main():
tools = load_data()
updated = False
for tool in tools:
if tool.get("is_open_source") and tool.get("github_repo"):
stats = fetch_github_stats(tool["github_repo"])
if stats:
tool.update(stats)
updated = True
time.sleep(0.5) # Be nice to API, slightly faster with token
# Save to root data folder
with open(DATA_FILE, 'w') as f:
json.dump(tools, f, indent=2)
# Save to web data folder if it exists
web_data_path = os.path.join(BASE_DIR, 'web', 'data', 'tools.json')
if os.path.exists(os.path.dirname(web_data_path)):
with open(web_data_path, 'w') as f:
json.dump(tools, f, indent=2)
if updated:
print("Data updated successfully!")
else:
print("No updates found.")
if __name__ == "__main__":
main()