mirror of
https://github.com/mustbeperfect/definitive-opensource.git
synced 2026-04-18 02:54:13 +02:00
Moved util scripts to dedicated scripts dir
This commit is contained in:
433
scripts/utils/contributing_autofill.py
Normal file
433
scripts/utils/contributing_autofill.py
Normal file
@@ -0,0 +1,433 @@
|
||||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Sequence, Set, Tuple
|
||||
import requests
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
TAGS_FILE = ROOT / "source" / "data" / "static" / "tags.json"
|
||||
PLATFORMS_FILE = ROOT / "source" / "data" / "static" / "platforms.json"
|
||||
CATEGORIES_FILE = ROOT / "source" / "data" / "static" / "categories.json"
|
||||
APPLICATIONS_FILE = ROOT / "source" / "data" / "dynamic" / "applications.json"
|
||||
USER_AGENT = "definitive-opensource-contributing-autofill"
|
||||
GITHUB_API_VERSION = "2022-11-28"
|
||||
|
||||
REPO_PATTERN = re.compile(
|
||||
r"(?:github\.com[:/])?(?P<owner>[\w\-.]+)/(?P<repo>[\w\-.]+?)(?:\.git)?(?:[#?].*)?$",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
PLATFORM_KEYWORDS: Dict[str, str] = {}
|
||||
|
||||
TAG_KEYWORDS: Dict[str, str] = {}
|
||||
|
||||
CATEGORY_KEYWORDS: Dict[str, str] = {}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReferenceData:
|
||||
tag_ids: Set[str]
|
||||
platform_ids: Set[str]
|
||||
category_ids: Set[str]
|
||||
tag_labels: Dict[str, str]
|
||||
platform_labels: Dict[str, str]
|
||||
category_labels: Dict[str, str]
|
||||
|
||||
|
||||
class DuplicateRepositoryError(RuntimeError):
|
||||
"""Raised when attempting to append an application that already exists."""
|
||||
|
||||
|
||||
def load_reference_data() -> ReferenceData:
|
||||
with open(TAGS_FILE, "r", encoding="utf-8") as fh:
|
||||
tags_data = json.load(fh)
|
||||
tag_labels: Dict[str, str] = {}
|
||||
for entry in tags_data.get("attributes", []):
|
||||
label = entry.get("description") or entry.get("name") or entry["id"]
|
||||
emoji = entry.get("emoji")
|
||||
if emoji:
|
||||
label = f"{emoji} {label}"
|
||||
tag_labels[entry["id"]] = label
|
||||
for entry in tags_data.get("properties", []):
|
||||
label = entry.get("name") or entry.get("description") or entry["id"]
|
||||
tag_labels[entry["id"]] = label
|
||||
tag_ids = set(tag_labels.keys())
|
||||
|
||||
with open(PLATFORMS_FILE, "r", encoding="utf-8") as fh:
|
||||
platforms_data = json.load(fh)
|
||||
platform_labels = {
|
||||
entry["id"]: entry.get("name") or entry["id"] for entry in platforms_data.get("platforms", [])
|
||||
}
|
||||
platform_ids = set(platform_labels.keys())
|
||||
|
||||
with open(CATEGORIES_FILE, "r", encoding="utf-8") as fh:
|
||||
categories_data = json.load(fh)
|
||||
category_labels: Dict[str, str] = {
|
||||
entry["id"]: entry.get("name") or entry["id"] for entry in categories_data.get("categories", [])
|
||||
}
|
||||
for entry in categories_data.get("subcategories", []):
|
||||
parent = entry.get("parent")
|
||||
parent_label = category_labels.get(parent, parent) if parent else None
|
||||
name = entry.get("name") or entry["id"]
|
||||
label = f"{name} ({parent_label})" if parent_label else name
|
||||
category_labels[entry["id"]] = label
|
||||
category_ids = set(category_labels.keys())
|
||||
|
||||
return ReferenceData(
|
||||
tag_ids=tag_ids,
|
||||
platform_ids=platform_ids,
|
||||
category_ids=category_ids,
|
||||
tag_labels=tag_labels,
|
||||
platform_labels=platform_labels,
|
||||
category_labels=category_labels,
|
||||
)
|
||||
|
||||
|
||||
def render_options(options: Dict[str, str], indent: str = " ") -> str:
|
||||
lines = []
|
||||
for key, label in sorted(options.items()):
|
||||
descriptor = f"{key}: {label}" if label and label != key else key
|
||||
lines.append(f"{indent}- {descriptor}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def available_text(label: str, options: Dict[str, str]) -> str:
|
||||
if not options:
|
||||
return ""
|
||||
return f"Available {label} ids:\n{render_options(options)}"
|
||||
|
||||
|
||||
def load_applications_data(path: Path) -> Dict:
|
||||
with open(path, "r", encoding="utf-8") as fh:
|
||||
return json.load(fh)
|
||||
|
||||
|
||||
def persist_applications_data(path: Path, data: Dict) -> None:
|
||||
with open(path, "w", encoding="utf-8") as fh:
|
||||
json.dump(data, fh, indent=4)
|
||||
fh.write("\n")
|
||||
|
||||
|
||||
def append_application(entry: Dict, path: Path) -> None:
|
||||
data = load_applications_data(path)
|
||||
applications = data.setdefault("applications", [])
|
||||
new_url = (entry.get("repo_url") or "").rstrip("/")
|
||||
for existing in applications:
|
||||
if (existing.get("repo_url") or "").rstrip("/") == new_url:
|
||||
try:
|
||||
display_path = path.relative_to(ROOT)
|
||||
except ValueError:
|
||||
display_path = path
|
||||
raise DuplicateRepositoryError(
|
||||
f"Repository {entry['repo_url']} already exists in {display_path}."
|
||||
)
|
||||
applications.append(entry)
|
||||
persist_applications_data(path, data)
|
||||
|
||||
|
||||
def parse_repo_identifier(value: str) -> Tuple[str, str]:
|
||||
value = value.strip()
|
||||
match = REPO_PATTERN.search(value)
|
||||
if match:
|
||||
return match.group("owner"), match.group("repo")
|
||||
if "/" in value:
|
||||
owner, repo = value.split("/", 1)
|
||||
return owner, repo
|
||||
raise ValueError(f"Could not parse repository from '{value}'.")
|
||||
|
||||
|
||||
def github_request(path: str, token: Optional[str], params: Optional[Dict[str, str]] = None) -> requests.Response:
|
||||
headers = {
|
||||
"Accept": "application/vnd.github+json",
|
||||
"User-Agent": USER_AGENT,
|
||||
"X-GitHub-Api-Version": GITHUB_API_VERSION,
|
||||
}
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
url = f"https://api.github.com/{path.lstrip('/')}"
|
||||
response = requests.get(url, headers=headers, params=params or {})
|
||||
if response.status_code == 401:
|
||||
raise RuntimeError("GitHub authentication failed. Set the GITHUB_TOKEN environment variable.")
|
||||
if response.status_code == 403 and "rate limit" in response.text.lower():
|
||||
raise RuntimeError("GitHub rate limit exceeded. Provide a token to continue.")
|
||||
return response
|
||||
|
||||
|
||||
def fetch_repo(owner: str, repo: str, token: Optional[str]) -> Dict:
|
||||
response = github_request(f"repos/{owner}/{repo}", token, params={"per_page": 1})
|
||||
if response.status_code != 200:
|
||||
raise RuntimeError(f"Failed to fetch repo metadata ({response.status_code}): {response.text}")
|
||||
return response.json()
|
||||
|
||||
|
||||
def fetch_readme_excerpt(owner: str, repo: str, token: Optional[str]) -> Optional[str]:
|
||||
response = github_request(f"repos/{owner}/{repo}/readme", token)
|
||||
if response.status_code != 200:
|
||||
return None
|
||||
payload = response.json()
|
||||
content = payload.get("content")
|
||||
if not content:
|
||||
return None
|
||||
try:
|
||||
decoded = base64.b64decode(content).decode("utf-8", errors="replace")
|
||||
except (ValueError, UnicodeDecodeError):
|
||||
return None
|
||||
for line in decoded.splitlines():
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
if stripped.startswith("#"):
|
||||
stripped = stripped.lstrip("#").strip()
|
||||
stripped = stripped.rstrip(".")
|
||||
if stripped:
|
||||
return stripped
|
||||
return None
|
||||
|
||||
|
||||
def normalize_project_name(repo_name: str) -> str:
|
||||
if not repo_name:
|
||||
return ""
|
||||
if any(ch.isupper() for ch in repo_name if ch.isalpha()):
|
||||
return repo_name
|
||||
tokens = [token for token in re.split(r"[-_]", repo_name) if token]
|
||||
if tokens:
|
||||
return " ".join(token.capitalize() for token in tokens)
|
||||
return repo_name.capitalize()
|
||||
|
||||
|
||||
def iso_to_mmddyyyy(value: Optional[str]) -> str:
|
||||
if not value:
|
||||
return ""
|
||||
try:
|
||||
return datetime.strptime(value, "%Y-%m-%dT%H:%M:%SZ").strftime("%m/%d/%Y")
|
||||
except ValueError:
|
||||
return ""
|
||||
|
||||
|
||||
def keyword_hits(keywords: Dict[str, str], sources: Sequence[str]) -> Set[str]:
|
||||
if not sources:
|
||||
return set()
|
||||
haystack = " ".join(filter(None, sources)).lower()
|
||||
matches: Set[str] = set()
|
||||
for needle, mapped in keywords.items():
|
||||
if re.search(rf"\b{re.escape(needle)}\b", haystack):
|
||||
matches.add(mapped)
|
||||
return matches
|
||||
|
||||
|
||||
def infer_platforms(repo: Dict, ref: ReferenceData) -> Tuple[Set[str], List[str]]:
|
||||
notes: List[str] = []
|
||||
notes.append(
|
||||
"Platforms were not inferred automatically.\n"
|
||||
f"{available_text('platform', ref.platform_labels)}"
|
||||
)
|
||||
return set(), notes
|
||||
|
||||
|
||||
def infer_tags(repo: Dict, ref: ReferenceData) -> Tuple[Set[str], List[str]]:
|
||||
return set(), []
|
||||
|
||||
|
||||
def infer_category(repo: Dict, ref: ReferenceData) -> Tuple[str, List[str]]:
|
||||
return "", [
|
||||
"Category must be specified manually.\n"
|
||||
f"{available_text('category', ref.category_labels)}"
|
||||
]
|
||||
|
||||
|
||||
def prompt_list(field_label: str, options: Dict[str, str], allow_empty: bool = False) -> List[str]:
|
||||
valid_values = set(options.keys())
|
||||
print(f"\n{available_text(field_label, options)}")
|
||||
skip_hint = " (press Enter to skip)" if allow_empty else ""
|
||||
prompt = f"Enter {field_label} ids (comma separated){skip_hint}:\n> "
|
||||
while True:
|
||||
raw = input(prompt).strip()
|
||||
if not raw and allow_empty:
|
||||
return []
|
||||
values = [val.strip() for val in raw.split(",") if val.strip()]
|
||||
invalid = [val for val in values if val not in valid_values]
|
||||
if invalid:
|
||||
print(f"Invalid values: {', '.join(invalid)}. Please try again.")
|
||||
continue
|
||||
if not values:
|
||||
print("At least one value is required. Press Ctrl+C to abort.")
|
||||
continue
|
||||
return values
|
||||
|
||||
|
||||
def prompt_value(field_label: str, options: Dict[str, str]) -> str:
|
||||
valid_values = set(options.keys())
|
||||
print(f"\n{available_text(field_label, options)}")
|
||||
prompt = f"Enter {field_label} id:\n> "
|
||||
while True:
|
||||
raw = input(prompt).strip()
|
||||
if raw in valid_values:
|
||||
return raw
|
||||
print(f"{raw} is not a valid value. Please try again.")
|
||||
|
||||
|
||||
def fill_missing_with_input(entry: Dict, ref: ReferenceData) -> Dict:
|
||||
if not sys.stdin.isatty():
|
||||
return entry
|
||||
|
||||
updated = entry.copy()
|
||||
if not updated.get("platforms"):
|
||||
updated["platforms"] = prompt_list("platform", ref.platform_labels)
|
||||
if not updated.get("tags"):
|
||||
updated["tags"] = prompt_list("tag", ref.tag_labels, allow_empty=True)
|
||||
if not updated.get("category"):
|
||||
updated["category"] = prompt_value("category", ref.category_labels)
|
||||
return updated
|
||||
|
||||
|
||||
def filter_resolved_notes(notes: List[str], entry: Dict) -> List[str]:
|
||||
filtered: List[str] = []
|
||||
for note in notes:
|
||||
lowered = note.lower()
|
||||
if "platform" in lowered and entry.get("platforms"):
|
||||
continue
|
||||
if "tag" in lowered and entry.get("tags"):
|
||||
continue
|
||||
if "category" in lowered and entry.get("category"):
|
||||
continue
|
||||
filtered.append(note)
|
||||
return filtered
|
||||
|
||||
|
||||
def build_entry(
|
||||
repo_url: str,
|
||||
repo_data: Dict,
|
||||
ref: ReferenceData,
|
||||
owner: str,
|
||||
repo: str,
|
||||
full_details: bool,
|
||||
token: Optional[str],
|
||||
) -> Tuple[Dict, List[str]]:
|
||||
notes: List[str] = []
|
||||
flags: Set[str] = set()
|
||||
|
||||
repo_description = repo_data.get("description") or ""
|
||||
readme_description: Optional[str] = None
|
||||
if full_details and not repo_description:
|
||||
readme_description = fetch_readme_excerpt(owner, repo, token)
|
||||
|
||||
name = normalize_project_name(repo_data.get("name", ""))
|
||||
platforms, platform_notes = infer_platforms(repo_data, ref)
|
||||
notes.extend(platform_notes)
|
||||
tags, tag_notes = infer_tags(repo_data, ref)
|
||||
notes.extend(tag_notes)
|
||||
category, category_notes = infer_category(repo_data, ref)
|
||||
notes.extend(category_notes)
|
||||
|
||||
entry = {
|
||||
"name": name,
|
||||
"description": "",
|
||||
"repo_url": repo_url,
|
||||
"tags": sorted(tags),
|
||||
"platforms": sorted(platforms),
|
||||
"category": category,
|
||||
"stars": 0,
|
||||
"flags": sorted(flags),
|
||||
"last_commit": "",
|
||||
"language": "",
|
||||
"license": "",
|
||||
"homepage_url": "",
|
||||
}
|
||||
|
||||
if full_details:
|
||||
description_value = repo_description or readme_description or ""
|
||||
entry["description"] = description_value
|
||||
if readme_description and not repo_description:
|
||||
flags.add("custom-description")
|
||||
notes.append("Description pulled from README (custom-description flag added).")
|
||||
elif not description_value:
|
||||
notes.append("Repository has no description; field left blank.")
|
||||
|
||||
entry["stars"] = repo_data.get("stargazers_count", 0)
|
||||
entry["last_commit"] = iso_to_mmddyyyy(repo_data.get("pushed_at"))
|
||||
entry["language"] = repo_data.get("language") or ""
|
||||
license_data = repo_data.get("license") or {}
|
||||
entry["license"] = license_data.get("spdx_id") or license_data.get("name") or ""
|
||||
entry["homepage_url"] = repo_data.get("homepage") or ""
|
||||
|
||||
entry["flags"] = sorted(flags)
|
||||
|
||||
invalid_tags = [tag for tag in entry["tags"] if tag not in ref.tag_ids]
|
||||
if invalid_tags:
|
||||
raise ValueError(f"Invalid tag ids supplied: {', '.join(invalid_tags)}")
|
||||
|
||||
invalid_platforms = [platform for platform in entry["platforms"] if platform not in ref.platform_ids]
|
||||
if invalid_platforms:
|
||||
raise ValueError(f"Invalid platform ids supplied: {', '.join(invalid_platforms)}")
|
||||
|
||||
if entry["category"] and entry["category"] not in ref.category_ids:
|
||||
raise ValueError(f"Invalid category id supplied: {entry['category']}")
|
||||
|
||||
if not entry["platforms"]:
|
||||
notes.append(
|
||||
"No platforms detected; please review `platforms`.\n"
|
||||
f"{available_text('platform', ref.platform_labels)}"
|
||||
)
|
||||
if not entry["category"]:
|
||||
notes.append(
|
||||
"Category missing; update `category` manually.\n"
|
||||
f"{available_text('category', ref.category_labels)}"
|
||||
)
|
||||
|
||||
return entry, notes
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate an applications.json entry from a GitHub repository URL."
|
||||
)
|
||||
parser.add_argument("repo", help="GitHub repository URL or owner/repo slug.")
|
||||
parser.add_argument(
|
||||
"--applications-file",
|
||||
default=str(APPLICATIONS_FILE),
|
||||
help="Path to applications.json (default: core/data/dynamic/applications.json).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--full-details",
|
||||
action="store_true",
|
||||
help="Populate optional fields (description, stats, license, homepage) using GitHub data.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
owner, repo_name = parse_repo_identifier(args.repo)
|
||||
repo_url = f"https://github.com/{owner}/{repo_name}"
|
||||
|
||||
token = os.getenv("GITHUB_TOKEN")
|
||||
|
||||
repo_data = fetch_repo(owner, repo_name, token)
|
||||
ref = load_reference_data()
|
||||
entry, notes = build_entry(repo_url, repo_data, ref, owner, repo_name, args.full_details, token)
|
||||
entry = fill_missing_with_input(entry, ref)
|
||||
notes = filter_resolved_notes(notes, entry)
|
||||
|
||||
applications_path = Path(args.applications_file).resolve()
|
||||
|
||||
print(json.dumps(entry, indent=4))
|
||||
if notes:
|
||||
print("\nNotes:")
|
||||
for note in notes:
|
||||
print(f"- {note}")
|
||||
|
||||
try:
|
||||
append_application(entry, applications_path)
|
||||
except DuplicateRepositoryError as exc:
|
||||
print(f"\nEntry skipped: {exc}")
|
||||
print("Hint: If you meant to update that entry, edit applications.json directly.")
|
||||
return
|
||||
|
||||
print(f"\nAdded entry to {applications_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
62
scripts/utils/json_mod.py
Normal file
62
scripts/utils/json_mod.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import json
|
||||
|
||||
"""
|
||||
|
||||
# Load the JSON data from file
|
||||
with open("core/data/dynamic/applications.json", "r", encoding="utf-8") as file:
|
||||
data = json.load(file)
|
||||
|
||||
# Convert all platform entries to lowercase
|
||||
for app in data.get("applications", []):
|
||||
if "platforms" in app and isinstance(app["platforms"], list):
|
||||
app["platforms"] = [platform.lower() for platform in app["platforms"]]
|
||||
|
||||
# Write the modified data back to the file
|
||||
with open("core/data/dynamic/applications.json", "w", encoding="utf-8") as file:
|
||||
json.dump(data, file, indent=4)
|
||||
|
||||
print("All platform entries have been converted to lowercase.")
|
||||
"""
|
||||
|
||||
"""
|
||||
# Load the JSON file
|
||||
with open("core/data/dynamic/applications.json", "r", encoding="utf-8") as file:
|
||||
data = json.load(file)
|
||||
|
||||
# Add "flags" and "stars" properties to each application
|
||||
for app in data.get("applications", []):
|
||||
app["homepage_url"] = ""
|
||||
|
||||
# Save the updated JSON back to the file
|
||||
with open("core/data/dynamic/applications.json", "w", encoding="utf-8") as file:
|
||||
json.dump(data, file, indent=4, ensure_ascii=False)
|
||||
|
||||
print("Operation successful: applications.json updated")
|
||||
"""
|
||||
|
||||
# Load applications.json
|
||||
with open("core/data/dynamic/applications.json", "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Platforms to check for
|
||||
move_to_tags = {"cli", "cli-plus", "tui", "pip"}
|
||||
|
||||
for app in data.get("applications", []):
|
||||
platforms = app.get("platforms", [])
|
||||
tags = set(app.get("tags", []))
|
||||
|
||||
# Move matching items to tags
|
||||
new_platforms = []
|
||||
for p in platforms:
|
||||
if p in move_to_tags:
|
||||
tags.add(p)
|
||||
else:
|
||||
new_platforms.append(p)
|
||||
|
||||
# Update the application
|
||||
app["platforms"] = new_platforms
|
||||
app["tags"] = sorted(tags)
|
||||
|
||||
# Save the updated file
|
||||
with open("core/data/dynamic/applications.json", "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=4)
|
||||
48
scripts/utils/md_to_json.py
Normal file
48
scripts/utils/md_to_json.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
|
||||
def parse_readme(readme_path):
|
||||
with open(readme_path, 'r', encoding='utf-8') as file:
|
||||
lines = file.readlines()
|
||||
|
||||
applications = []
|
||||
category = None
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
# Match category headers
|
||||
category_match = re.match(r"^### (.+)", line)
|
||||
if category_match:
|
||||
category = category_match.group(1).lower().replace(" ", "-")
|
||||
continue
|
||||
|
||||
# Match application entries with optional tag
|
||||
app_match = re.match(r"\| \[(.+)\]\((https://github.com/[^)]+)\)(?: `([^`]+)`)? \| (.+?) \| (.+?) \|", line)
|
||||
if app_match and category:
|
||||
name, link, tag, description, platforms = app_match.groups()
|
||||
applications.append({
|
||||
"name": name,
|
||||
"description": description,
|
||||
"repo_url": link,
|
||||
"tags": [tag] if tag else [],
|
||||
"platforms": platforms.split(),
|
||||
"category": category
|
||||
})
|
||||
|
||||
return applications
|
||||
|
||||
|
||||
def save_to_json(data, output_path):
|
||||
with open(output_path, 'w', encoding='utf-8') as file:
|
||||
json.dump(data, file, indent=4, ensure_ascii=False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
readme_path = "README.md" # Update with actual path
|
||||
output_path = "applications.json"
|
||||
|
||||
parsed_data = parse_readme(readme_path)
|
||||
save_to_json(parsed_data, output_path)
|
||||
print(f"Converted README to {output_path}")
|
||||
Reference in New Issue
Block a user