diff --git a/skills/public/github-deep-research/SKILL.md b/skills/public/github-deep-research/SKILL.md new file mode 100644 index 0000000..53215ef --- /dev/null +++ b/skills/public/github-deep-research/SKILL.md @@ -0,0 +1,153 @@ +--- +name: github-deep-research +description: Conduct multi-round deep research on any GitHub Repo. Use when users request comprehensive analysis, timeline reconstruction, competitive analysis, or in-depth investigation of GitHub. Produces structured markdown reports with executive summaries, chronological timelines, metrics analysis, and Mermaid diagrams. Triggers on Github repository URL or open source projects. +--- + +# GitHub Deep Research Skill + +Multi-round research combining GitHub API, web_search, web_fetch to produce comprehensive markdown reports. + +## Research Workflow + +``` +Round 1: GitHub API +├── Get repository basic information +└── Get repository README + +Round 2: Discovery +├── Identify key entities and terms +├── Web search for overview (3-5 queries) +└── Fetch official sources + +Round 3: Deep Dive +├── GitHub analysis +├── Targeted searches for specifics +└── Fetch detailed articles, docs, PRs + +Round 4: Synthesis +├── Construct timeline +├── Analyze metrics +└── Generate report +``` + +## Core Methodology + +### Query Strategy + +**Broad to Narrow**: Start with GitHub API, then general queries, refine based on findings. + +``` +Round 1: GitHub API +Round 2: "{topic} overview" +Round 3: "{topic} architecture", "{topic} vs alternatives" +Round 4: "{topic} issues", "{topic} roadmap", "site:github.com {topic}" +``` + +**Source Prioritization**: +1. Official docs/repos (highest weight) +2. Technical blogs (Medium, Dev.to) +3. News articles (verified outlets) +4. Community discussions (Reddit, HN) +5. Social media (lowest weight, for sentiment) + +### Research Rounds + +**Round 1 - GitHub API** +Directly execute `scripts/github_api.py` without `read_file()`: +```bash +cd path/to/skill && python scripts/github_api.py summary +``` + +**Round 2 - Discovery (3-5 web_search)** +- Get overview and identify key terms +- Find official website/repo +- Identify main players/competitors + +**Round 3 - Deep Investigation (5-10 web_search + web_fetch)** +- Technical architecture details +- Timeline of key events +- Community sentiment +- Use web_fetch on valuable URLs for full content + +**Round 4 - GitHub Deep Dive** +- Analyze commit history for timeline +- Review issues/PRs for feature evolution +- Check contributor activity + +## Report Structure + +Follow template in `assets/report_template.md`: + +1. **Metadata Block** - Date, confidence level, subject +2. **Executive Summary** - 2-3 sentence overview with key metrics +3. **Chronological Timeline** - Phased breakdown with dates +4. **Key Analysis Sections** - Topic-specific deep dives +5. **Metrics & Comparisons** - Tables, growth charts +6. **Strengths & Weaknesses** - Balanced assessment +7. **Sources** - Categorized references +8. **Confidence Assessment** - Claims by confidence level +9. **Methodology** - Research approach used + +### Mermaid Diagrams + +Include diagrams where helpful: + +**Timeline (Gantt)**: +```mermaid +gantt + title Project Timeline + dateFormat YYYY-MM-DD + section Phase 1 + Development :2025-01-01, 2025-03-01 + section Phase 2 + Launch :2025-03-01, 2025-04-01 +``` + +**Architecture (Flowchart)**: +```mermaid +flowchart TD + A[User] --> B[Coordinator] + B --> C[Planner] + C --> D[Research Team] + D --> E[Reporter] +``` + +**Comparison (Pie/Bar)**: +```mermaid +pie title Market Share + "Project A" : 45 + "Project B" : 30 + "Others" : 25 +``` + +## Confidence Scoring + +Assign confidence based on source quality: + +| Confidence | Criteria | +|------------|----------| +| High (90%+) | Official docs, GitHub data, multiple corroborating sources | +| Medium (70-89%) | Single reliable source, recent articles | +| Low (50-69%) | Social media, unverified claims, outdated info | + +## Output + +Save report as: `research_{topic}_{YYYYMMDD}.md` + +### Formatting Rules + +- Chinese content: Use full-width punctuation(,。:;!?) +- Technical terms: Provide Wiki/doc URL on first mention +- Tables: Use for metrics, comparisons +- Code blocks: For technical examples +- Mermaid: For architecture, timelines, flows + +## Best Practices + +1. **Start with official sources** - Repo, docs, company blog +2. **Verify dates from commits/PRs** - More reliable than articles +3. **Triangulate claims** - 2+ independent sources +4. **Note conflicting info** - Don't hide contradictions +5. **Distinguish fact vs opinion** - Label speculation clearly +6. **Cite inline** - Reference sources near claims +7. **Update as you go** - Don't wait until end to synthesize diff --git a/skills/public/github-deep-research/assets/report_template.md b/skills/public/github-deep-research/assets/report_template.md new file mode 100644 index 0000000..c90b693 --- /dev/null +++ b/skills/public/github-deep-research/assets/report_template.md @@ -0,0 +1,185 @@ +# {TITLE} + +**Research Date:** {DATE} +**Timestamp:** {TIMESTAMP} +**Confidence Level:** {CONFIDENCE_LEVEL} +**Subject:** {SUBJECT_DESCRIPTION} + +--- + +## Repository Information + +**Name:** {REPOSITORY_NAME} +**Description:** {REPOSITORY_DESCRIPTION} +**URL:** {REPOSITORY_URL} +**Stars:** {REPOSITORY_STARS} +**Forks:** {REPOSITORY_FORKS} +**Open Issues:** {REPOSITORY_OPEN_ISSUES} +**Language(s):** {REPOSITORY_LANGUAGES} +**License:** {REPOSITORY_LICENSE} +**Created At:** {REPOSITORY_CREATED_AT} +**Updated At:** {REPOSITORY_UPDATED_AT} +**Pushed At:** {REPOSITORY_PUSHED_AT} +**Topics:** {REPOSITORY_TOPICS} + +--- + +## Executive Summary + +{EXECUTIVE_SUMMARY} + +--- + +## Complete Chronological Timeline + +### PHASE 1: {PHASE_1_NAME} + +#### {PHASE_1_PERIOD} + +{PHASE_1_CONTENT} + +### PHASE 2: {PHASE_2_NAME} + +#### {PHASE_2_PERIOD} + +{PHASE_2_CONTENT} + +### PHASE 3: {PHASE_3_NAME} + +#### {PHASE_3_PERIOD} + +{PHASE_3_CONTENT} + +--- + +## Key Analysis + +### {ANALYSIS_SECTION_1_TITLE} + +{ANALYSIS_SECTION_1_CONTENT} + +### {ANALYSIS_SECTION_2_TITLE} + +{ANALYSIS_SECTION_2_CONTENT} + +--- + +## Architecture / System Overview + +```mermaid +flowchart TD + A[Component A] --> B[Component B] + B --> C[Component C] + C --> D[Component D] +``` + +{ARCHITECTURE_DESCRIPTION} + +--- + +## Metrics & Impact Analysis + +### Growth Trajectory + +``` +{METRICS_TIMELINE} +``` + +### Key Metrics + +| Metric | Value | Assessment | +|--------|-------|------------| +| {METRIC_1} | {VALUE_1} | {ASSESSMENT_1} | +| {METRIC_2} | {VALUE_2} | {ASSESSMENT_2} | +| {METRIC_3} | {VALUE_3} | {ASSESSMENT_3} | + +--- + +## Comparative Analysis + +### Feature Comparison + +| Feature | {SUBJECT} | {COMPETITOR_1} | {COMPETITOR_2} | +|---------|-----------|----------------|----------------| +| {FEATURE_1} | {SUBJ_F1} | {COMP1_F1} | {COMP2_F1} | +| {FEATURE_2} | {SUBJ_F2} | {COMP1_F2} | {COMP2_F2} | +| {FEATURE_3} | {SUBJ_F3} | {COMP1_F3} | {COMP2_F3} | + +### Market Positioning + +{MARKET_POSITIONING} + +--- + +## Strengths & Weaknesses + +### Strengths + +{STRENGTHS} + +### Areas for Improvement + +{WEAKNESSES} + +--- + +## Key Success Factors + +{SUCCESS_FACTORS} + +--- + +## Sources + +### Primary Sources + +{PRIMARY_SOURCES} + +### Media Coverage + +{MEDIA_SOURCES} + +### Academic / Technical Sources + +{ACADEMIC_SOURCES} + +### Community Sources + +{COMMUNITY_SOURCES} + +--- + +## Confidence Assessment + +**High Confidence (90%+) Claims:** +{HIGH_CONFIDENCE_CLAIMS} + +**Medium Confidence (70-89%) Claims:** +{MEDIUM_CONFIDENCE_CLAIMS} + +**Lower Confidence (50-69%) Claims:** +{LOW_CONFIDENCE_CLAIMS} + +--- + +## Research Methodology + +This report was compiled using: + +1. **Multi-source web search** - Broad discovery and targeted queries +2. **GitHub repository analysis** - Commits, issues, PRs, activity metrics +3. **Content extraction** - Official docs, technical articles, media coverage +4. **Cross-referencing** - Verification across independent sources +5. **Chronological reconstruction** - Timeline from timestamped data +6. **Confidence scoring** - Claims weighted by source reliability + +**Research Depth:** {RESEARCH_DEPTH} +**Time Scope:** {TIME_SCOPE} +**Geographic Scope:** {GEOGRAPHIC_SCOPE} + +--- + +**Report Prepared By:** Claude Deep Research +**Date:** {REPORT_DATE} +**Report Version:** 1.0 +**Status:** Complete diff --git a/skills/public/github-deep-research/scripts/github_api.py b/skills/public/github-deep-research/scripts/github_api.py new file mode 100644 index 0000000..41fc76e --- /dev/null +++ b/skills/public/github-deep-research/scripts/github_api.py @@ -0,0 +1,328 @@ +#!/usr/bin/env python3 +""" +GitHub API client for deep research. +Uses requests for HTTP operations. +""" + +import json +import sys +from typing import Any, Dict, List, Optional + +try: + import requests +except ImportError: + # Fallback to urllib if requests not available + import urllib.error + import urllib.request + + class RequestsFallback: + """Minimal requests-like interface using urllib.""" + + class Response: + def __init__(self, data: bytes, status: int): + self._data = data + self.status_code = status + self.text = data.decode("utf-8", errors="replace") + + def json(self): + return json.loads(self._data) + + def raise_for_status(self): + if self.status_code >= 400: + raise Exception(f"HTTP {self.status_code}") + + @staticmethod + def get(url: str, headers: dict = None, params: dict = None, timeout: int = 30): + if params: + query = "&".join(f"{k}={v}" for k, v in params.items()) + url = f"{url}?{query}" + + req = urllib.request.Request(url, headers=headers or {}) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + return RequestsFallback.Response(resp.read(), resp.status) + except urllib.error.HTTPError as e: + return RequestsFallback.Response(e.read(), e.code) + + requests = RequestsFallback() + + +class GitHubAPI: + """GitHub API client for repository analysis.""" + + BASE_URL = "https://api.github.com" + + def __init__(self, token: Optional[str] = None): + """ + Initialize GitHub API client. + + Args: + token: Optional GitHub personal access token for higher rate limits + """ + self.token = token + self.headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Deep-Research-Bot/1.0", + } + if token: + self.headers["Authorization"] = f"token {token}" + + def _get( + self, endpoint: str, params: Optional[Dict] = None, accept: Optional[str] = None + ) -> Any: + """Make GET request to GitHub API.""" + url = f"{self.BASE_URL}{endpoint}" + headers = self.headers.copy() + if accept: + headers["Accept"] = accept + + resp = requests.get(url, headers=headers, params=params, timeout=30) + resp.raise_for_status() + + if "application/vnd.github.raw" in (accept or ""): + return resp.text + return resp.json() + + def get_repo_info(self, owner: str, repo: str) -> Dict: + """Get basic repository information.""" + return self._get(f"/repos/{owner}/{repo}") + + def get_readme(self, owner: str, repo: str) -> str: + """Get repository README content as markdown.""" + try: + return self._get( + f"/repos/{owner}/{repo}/readme", accept="application/vnd.github.raw" + ) + except Exception as e: + return f"[README not found: {e}]" + + def get_tree( + self, owner: str, repo: str, branch: str = "main", recursive: bool = True + ) -> Dict: + """Get repository directory tree.""" + params = {"recursive": "1"} if recursive else {} + try: + return self._get(f"/repos/{owner}/{repo}/git/trees/{branch}", params) + except Exception: + # Try 'master' if 'main' fails + if branch == "main": + return self._get(f"/repos/{owner}/{repo}/git/trees/master", params) + raise + + def get_file_content(self, owner: str, repo: str, path: str) -> str: + """Get content of a specific file.""" + try: + return self._get( + f"/repos/{owner}/{repo}/contents/{path}", + accept="application/vnd.github.raw", + ) + except Exception as e: + return f"[File not found: {e}]" + + def get_languages(self, owner: str, repo: str) -> Dict[str, int]: + """Get repository languages and their bytes.""" + return self._get(f"/repos/{owner}/{repo}/languages") + + def get_contributors(self, owner: str, repo: str, limit: int = 30) -> List[Dict]: + """Get repository contributors.""" + return self._get( + f"/repos/{owner}/{repo}/contributors", params={"per_page": min(limit, 100)} + ) + + def get_recent_commits( + self, owner: str, repo: str, limit: int = 50, since: Optional[str] = None + ) -> List[Dict]: + """ + Get recent commits. + + Args: + owner: Repository owner + repo: Repository name + limit: Max commits to fetch + since: ISO date string to fetch commits since + """ + params = {"per_page": min(limit, 100)} + if since: + params["since"] = since + return self._get(f"/repos/{owner}/{repo}/commits", params) + + def get_issues( + self, + owner: str, + repo: str, + state: str = "all", + limit: int = 30, + labels: Optional[str] = None, + ) -> List[Dict]: + """ + Get repository issues. + + Args: + state: 'open', 'closed', or 'all' + labels: Comma-separated label names + """ + params = {"state": state, "per_page": min(limit, 100)} + if labels: + params["labels"] = labels + return self._get(f"/repos/{owner}/{repo}/issues", params) + + def get_pull_requests( + self, owner: str, repo: str, state: str = "all", limit: int = 30 + ) -> List[Dict]: + """Get repository pull requests.""" + return self._get( + f"/repos/{owner}/{repo}/pulls", + params={"state": state, "per_page": min(limit, 100)}, + ) + + def get_releases(self, owner: str, repo: str, limit: int = 10) -> List[Dict]: + """Get repository releases.""" + return self._get( + f"/repos/{owner}/{repo}/releases", params={"per_page": min(limit, 100)} + ) + + def get_tags(self, owner: str, repo: str, limit: int = 20) -> List[Dict]: + """Get repository tags.""" + return self._get( + f"/repos/{owner}/{repo}/tags", params={"per_page": min(limit, 100)} + ) + + def search_issues(self, owner: str, repo: str, query: str, limit: int = 30) -> Dict: + """Search issues and PRs in repository.""" + q = f"repo:{owner}/{repo} {query}" + return self._get("/search/issues", params={"q": q, "per_page": min(limit, 100)}) + + def get_commit_activity(self, owner: str, repo: str) -> List[Dict]: + """Get weekly commit activity for the last year.""" + return self._get(f"/repos/{owner}/{repo}/stats/commit_activity") + + def get_code_frequency(self, owner: str, repo: str) -> List[List[int]]: + """Get weekly additions/deletions.""" + return self._get(f"/repos/{owner}/{repo}/stats/code_frequency") + + def format_tree(self, tree_data: Dict, max_depth: int = 3) -> str: + """ + Format tree data as text directory structure. + + Args: + tree_data: Response from get_tree() + max_depth: Maximum depth to display + """ + if "tree" not in tree_data: + return "[Unable to parse tree]" + + lines = [] + for item in tree_data["tree"]: + path = item["path"] + depth = path.count("/") + if depth < max_depth: + indent = " " * depth + name = path.split("/")[-1] + if item["type"] == "tree": + lines.append(f"{indent}{name}/") + else: + lines.append(f"{indent}{name}") + + return "\n".join(lines[:100]) # Limit output + + def summarize_repo(self, owner: str, repo: str) -> Dict: + """ + Get comprehensive repository summary. + + Returns dict with: info, languages, contributor_count, + recent_activity, top_issues, latest_release + """ + info = self.get_repo_info(owner, repo) + + summary = { + "name": info.get("full_name"), + "description": info.get("description"), + "url": info.get("html_url"), + "stars": info.get("stargazers_count"), + "forks": info.get("forks_count"), + "open_issues": info.get("open_issues_count"), + "language": info.get("language"), + "license": info.get("license", {}).get("spdx_id") + if info.get("license") + else None, + "created_at": info.get("created_at"), + "updated_at": info.get("updated_at"), + "pushed_at": info.get("pushed_at"), + "default_branch": info.get("default_branch"), + "topics": info.get("topics", []), + } + + # Add languages + try: + summary["languages"] = self.get_languages(owner, repo) + except Exception: + summary["languages"] = {} + + # Add contributor count + try: + contributors = self.get_contributors(owner, repo, limit=1) + # GitHub returns Link header with total, but we approximate + summary["contributor_count"] = len( + self.get_contributors(owner, repo, limit=100) + ) + except Exception: + summary["contributor_count"] = "N/A" + + # Latest release + try: + releases = self.get_releases(owner, repo, limit=1) + if releases: + summary["latest_release"] = { + "tag": releases[0].get("tag_name"), + "name": releases[0].get("name"), + "date": releases[0].get("published_at"), + } + except Exception: + summary["latest_release"] = None + + return summary + + +def main(): + """CLI interface for testing.""" + if len(sys.argv) < 3: + print("Usage: python github_api.py [command]") + print("Commands: info, readme, tree, languages, contributors,") + print(" commits, issues, prs, releases, summary") + sys.exit(1) + + owner, repo = sys.argv[1], sys.argv[2] + command = sys.argv[3] if len(sys.argv) > 3 else "summary" + + api = GitHubAPI() + + commands = { + "info": lambda: api.get_repo_info(owner, repo), + "readme": lambda: api.get_readme(owner, repo), + "tree": lambda: api.format_tree(api.get_tree(owner, repo)), + "languages": lambda: api.get_languages(owner, repo), + "contributors": lambda: api.get_contributors(owner, repo), + "commits": lambda: api.get_recent_commits(owner, repo), + "issues": lambda: api.get_issues(owner, repo), + "prs": lambda: api.get_pull_requests(owner, repo), + "releases": lambda: api.get_releases(owner, repo), + "summary": lambda: api.summarize_repo(owner, repo), + } + + if command not in commands: + print(f"Unknown command: {command}") + sys.exit(1) + + try: + result = commands[command]() + if isinstance(result, str): + print(result) + else: + print(json.dumps(result, indent=2, default=str)) + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main()