diff --git a/LICENSE_HEADER b/LICENSE_HEADER new file mode 100644 index 0000000..58bc29b --- /dev/null +++ b/LICENSE_HEADER @@ -0,0 +1,2 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# SPDX-License-Identifier: MIT diff --git a/LICENSE_HEADER_TS b/LICENSE_HEADER_TS new file mode 100644 index 0000000..f4a7ed9 --- /dev/null +++ b/LICENSE_HEADER_TS @@ -0,0 +1,2 @@ +// Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +// SPDX-License-Identifier: MIT diff --git a/Makefile b/Makefile index b731c43..3780905 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help lint format install-dev serve test coverage langgraph-dev lint-frontend +.PHONY: help lint format install-dev serve test coverage langgraph-dev lint-frontend add-license add-license-ts add-license-all check-license check-license-ts check-license-all help: ## Show this help message @echo "Deer Flow - Available Make Targets:" @@ -34,3 +34,11 @@ langgraph-dev: ## Start langgraph development server coverage: ## Run tests with coverage report uv run pytest --cov=src tests/ --cov-report=term-missing --cov-report=xml + +add-license-all: ## Add license headers to all Python and TypeScript files + @echo "Adding license headers to all source files..." + @uv run python scripts/license_header.py src/ tests/ server.py main.py web/src/ web/tests/ --verbose + +check-license-all: ## Check if all Python and TypeScript files have license headers + @echo "Checking license headers in all source files..." + @uv run python scripts/license_header.py src/ tests/ server.py main.py web/src/ web/tests/ --check diff --git a/docs/LICENSE_HEADERS.md b/docs/LICENSE_HEADERS.md new file mode 100644 index 0000000..f104799 --- /dev/null +++ b/docs/LICENSE_HEADERS.md @@ -0,0 +1,224 @@ +# License Header Management + +This document explains how to manage license headers in the DeerFlow project. + +## License Header Format + +All source files in this project should include license headers. + +### Python Files + +```python +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# SPDX-License-Identifier: MIT +``` + +For files with a shebang (`#!/usr/bin/env python3`), the header is placed after the shebang: + +```python +#!/usr/bin/env python3 +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# SPDX-License-Identifier: MIT + +import something +``` + +### TypeScript Files + +```typescript +// Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +// SPDX-License-Identifier: MIT + +import { something } from "somewhere"; +``` + +## Makefile Targets + +### Check License Headers + +Check if all Python and TypeScript files have the required license header: + +```bash +# Check all files (Python and TypeScript) +make check-license-all + +# Check only Python files +make check-license + +# Check only TypeScript files +make check-license-ts +``` + +These commands: +- Scan all source files in `src/`, `tests/`, `web/src/`, `web/tests/`, and root-level files +- Report files missing the license header +- Return exit code 1 if any files are missing headers (useful for CI/CD) +- Return exit code 0 if all files have headers + +### Add License Headers + +Automatically add license headers to files that don't have them: + +```bash +# Add to all files (Python and TypeScript) +make add-license-all + +# Add only to Python files +make add-license + +# Add only to TypeScript files +make add-license-ts +``` + +These commands: +- Add the appropriate license header to files that don't have it +- Preserve shebangs at the top of Python files +- Add appropriate spacing after headers +- Show vTypeScript files +uv run python scripts/license_header.py web/src/components/ --check + +# Check a single file (works for both .py and .ts/.tsx) +uv run python scripts/license_header.py src/workflow.py --check +uv run python scripts/license_header.py web/src/core/api/chat.ts --check +``` + +### Script Options + +- `--check`: Check mode - verify headers without modifying files +- `--verbose` / `-v`: Show detailed output for each file processed +- `paths`: One or more paths (files or directories) to process + +### Supported File Types + +The script automatically detects and processes: +- Python files (`.py`) +- TypeScript files (`.ts`) +- TypeScript React files (`.tsx`) + +## Pre-commit Hook + +The license header check is integrated into the pre-commit hook. Before allowing a commit, it will: + +1. Run linting (`make lint`) +2. Run formatting (`make format`) + +This ensures all merged code has proper license headers for both Python and TypeScript fileill be blocked. Run `make add-license` to fix. + +## CI/CD Integration + +For continuous integration, add the license check to your workflow: + +```bash +# In your CI script or GitHub Actions +ma`.next` (Next.js build directory) +- ke check-license +``` + +This ensures all merged code has proper license headers. + +## Files Excluded + +The license header tool automatically skips: +- `__pycache__` directories +- `.pytest_cache`, `.ruff_cache`, `.mypy_cache` +- `node_modules` +- Virtual environment directories (`.venv`, `venv`, `.tox`) +- Build artifacts (`build`, `dist`) +- `.git` directory + +## Customization + +### Changing the License Header +S` dictionary in `scripts/license_header.py`: + +```python +LICENSE_HEADERS: Dict[str, str] = { + "python": """# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# SPDX-License-Identifier: MIT +""", + "typescript": """// Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +// SPDX-License-Identifier: MIT +""", +} +``` + +### Adding Mo-all: + @uv run python scripts/license_header.py src/ tests/ scripts/ web/src/ web/test +1. Add the extension to `FILE_TYPE_MAP` in `scripts/license_header.py` +2. Add the corresponding header format to `LICENSE_HEADERS` + +```python +FILE_TYPE_MAP = { + ".py": "python", + ".ts": "typescript", + ".tsx": "typescript", + ".js": "javascript", # Example: adding JavaScript support +} + +LICENSE_HEADERS = { + # ... existing headers ... + "javascript": """// Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +// SPDX-License-Identifier: MIT +""", +}PDX-License-Identifier: MIT +""" +``` +-all +Checking license headers in all source files... +✅ All 289 source file(s) have license headers. +``` + +### Example 2: Check Only TypeScript Files +```bash +$ make check-license-ts +Checking license headers in TypeScript files... +❌ 3 file(s) missing license header: + - web/src/components/new-component.tsx + - web/src/core/api/new-api.ts + - web/tests/new-test.test.ts + +Run 'make add-license-ts' to add headers. +``` + +### Example 3: Add Headers to New Module +```bash +$ make add-license-all +Adding license headers to all source files... +✅ Added license header to 11 file(s). +``` + +### Example 4: Check Specific Directory +```bash +$ uv run python scripts/license_header.py web/src/components/ --check --verbose +Header already present: web/src/components/deer-flow/logo.tsx +Header already present: web/src/components/deer-flow/markdown.tsx +Header already present: web/src/components/editor/index.tsx +✅ All 24 sourceooks for exact matches (ignoring leading/trailing whitespace) + +### "Pre-commit hook blocks my commit" +- Run `make add-license` to add headers to all files +- Or disable the check temporarily by editing the `pre-commit` file + +## Examples + +### Example 1: Check All Files +```bash +$ make check-license +Checking license headers in Python files... +✅ All 156 Python file(s) have license headers. +``` + +### Example 2: Add Headers to New Module +```bash +$ make add-license +Adding license headers to Python files... +✅ Added license header to 11 file(s). +``` + +### Example 3: Check Specific Directory +```bash +$ uv run python scripts/license_header.py src/agents/ --check --verbose +Header already present: src/agents/base.py +Header already present: src/agents/coordinator.py +✅ All 8 Python file(s) have license headers. +``` diff --git a/pre-commit b/pre-commit index b9a986a..5470705 100755 --- a/pre-commit +++ b/pre-commit @@ -20,6 +20,17 @@ if [ $FORMAT_RESULT -ne 0 ]; then exit 1 fi +# Check license headers +echo "Checking license headers..." +make check-license-all +LICENSE_RESULT=$? + +if [ $LICENSE_RESULT -ne 0 ]; then + echo "❌ Some files are missing license headers." + echo "Run 'make add-license-all' to add them automatically." + exit 1 +fi + # If any files were reformatted, add them back to staging git diff --name-only | xargs -I {} git add "{}" diff --git a/scripts/license_header.py b/scripts/license_header.py new file mode 100644 index 0000000..7d53bac --- /dev/null +++ b/scripts/license_header.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python3 +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# SPDX-License-Identifier: MIT +"""Script to add or check license headers in Python and TypeScript files.""" + +import argparse +import sys +from pathlib import Path +from typing import Dict, List + +# License headers for different file types +LICENSE_HEADERS: Dict[str, str] = { + "python": """# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# SPDX-License-Identifier: MIT +""", + "typescript": """// Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +// SPDX-License-Identifier: MIT +""", +} + +# File extensions mapping +FILE_TYPE_MAP = { + ".py": "python", + ".ts": "typescript", + ".tsx": "typescript", +} + +# Patterns to skip +SKIP_PATTERNS = [ + "__pycache__", + ".pytest_cache", + ".ruff_cache", + "node_modules", + ".next", + ".venv", + "venv", + ".tox", + "build", + "dist", + ".git", + ".mypy_cache", +] + + +def should_skip(path: Path) -> bool: + """Check if a path should be skipped.""" + return any(pattern in str(path) for pattern in SKIP_PATTERNS) + + +def get_file_type(file_path: Path) -> str | None: + """Get the file type based on extension.""" + return FILE_TYPE_MAP.get(file_path.suffix) + + +def has_license_header(content: str, file_type: str) -> bool: + """Check if content already has the license header.""" + lines = content.split("\n") + license_header = LICENSE_HEADERS[file_type] + + # Skip shebang if present (Python files) + start_idx = 0 + if lines and lines[0].startswith("#!"): + start_idx = 1 + # Skip empty lines after shebang + while start_idx < len(lines) and not lines[start_idx].strip(): + start_idx += 1 + + # Check if license header is present + header_lines = license_header.strip().split("\n") + if len(lines) < start_idx + len(header_lines): + return False + + for i, header_line in enumerate(header_lines): + if lines[start_idx + i].strip() != header_line.strip(): + return False + + return True + + +def add_license_header(file_path: Path, dry_run: bool = False) -> bool: + """Add license header to a file if not present. + + Args: + file_path: Path to the file + dry_run: If True, only check without modifying + + Returns: + True if header was added (or would be added in dry-run), False if already present + """ + file_type = get_file_type(file_path) + if not file_type: + return False + + try: + content = file_path.read_text(encoding="utf-8") + except Exception as e: + print(f"Error reading {file_path}: {e}", file=sys.stderr) + return False + + if has_license_header(content, file_type): + return False + + if dry_run: + return True + + # Prepare new content with license header + license_header = LICENSE_HEADERS[file_type] + lines = content.split("\n") + new_lines = [] + + # Preserve shebang at the top if present (Python files) + start_idx = 0 + if lines and lines[0].startswith("#!"): + new_lines.append(lines[0]) + start_idx = 1 + # Skip empty lines after shebang + while start_idx < len(lines) and not lines[start_idx].strip(): + start_idx += 1 + new_lines.append("") # Empty line after shebang + + # Add license header + new_lines.extend(license_header.strip().split("\n")) + new_lines.append("") # Empty line after header + + # Add the rest of the file + new_lines.extend(lines[start_idx:]) + + # Write back to file + try: + file_path.write_text("\n".join(new_lines), encoding="utf-8") + return True + except Exception as e: + print(f"Error writing {file_path}: {e}", file=sys.stderr) + return False + + +def find_source_files(root: Path) -> List[Path]: + """Find all Python and TypeScript files in the given directory tree.""" + source_files = [] + + for extension in FILE_TYPE_MAP.keys(): + for path in root.rglob(f"*{extension}"): + if should_skip(path): + continue + source_files.append(path) + + return sorted(source_files) + + +def main(): + parser = argparse.ArgumentParser( + description="Add or check license headers in Python and TypeScript files" + ) + parser.add_argument( + "paths", + nargs="*", + default=["."], + help="Paths to check (files or directories)", + ) + parser.add_argument( + "--check", + action="store_true", + help="Check if headers are present without modifying files", + ) + parser.add_argument( + "--verbose", + "-v", + action="store_true", + help="Verbose output", + ) + + args = parser.parse_args() + + # Collect all source files + all_files = [] + for path_str in args.paths: + path = Path(path_str) + if not path.exists(): + print(f"Error: Path does not exist: {path}", file=sys.stderr) + sys.exit(1) + + if path.is_file(): + if path.suffix in FILE_TYPE_MAP and not should_skip(path): + all_files.append(path) + else: + all_files.extend(find_source_files(path)) + + if not all_files: + print("No source files found.") + return 0 + + # Process files + missing_header = [] + modified = [] + + for file_path in all_files: + if add_license_header(file_path, dry_run=args.check): + missing_header.append(file_path) + if not args.check: + modified.append(file_path) + if args.verbose: + print(f"Added header to: {file_path}") + elif args.verbose: + print(f"Header already present: {file_path}") + + # Report results + if args.check: + if missing_header: + print(f"\n❌ {len(missing_header)} file(s) missing license header:") + for path in missing_header: + print(f" - {path}") + print("\nRun 'make add-license' to add headers.") + return 1 + else: + print(f"✅ All {len(all_files)} source file(s) have license headers.") + return 0 + else: + if modified: + print(f"✅ Added license header to {len(modified)} file(s).") + else: + print(f"✅ All {len(all_files)} source file(s) already have license headers.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) +