diff --git a/.github/actions/post-clang-tidy-results/action.yaml b/.github/actions/post-clang-tidy-results/action.yaml index 64ea1ef4f..f8a727e69 100644 --- a/.github/actions/post-clang-tidy-results/action.yaml +++ b/.github/actions/post-clang-tidy-results/action.yaml @@ -12,6 +12,11 @@ inputs: required: false default: "false" +outputs: + has_content: + description: "Whether any clang-tidy artifacts with reportable content were found" + value: ${{ steps.check.outputs.has_summary_content }} + runs: using: "composite" steps: diff --git a/.github/workflows/clang-tidy-check.yaml b/.github/workflows/clang-tidy-check.yaml index 6139a89af..3d4fd0163 100644 --- a/.github/workflows/clang-tidy-check.yaml +++ b/.github/workflows/clang-tidy-check.yaml @@ -217,8 +217,23 @@ jobs: issues: write steps: - name: Post Clang-Tidy results + id: post_results uses: Framework-R-D/phlex/.github/actions/post-clang-tidy-results@main with: build-path: ${{ needs.setup.outputs.build_path }} pr-number: ${{ needs.setup.outputs.pr_number }} post-summary: "true" + + - name: Post no-issues comment + if: >- + always() && github.event_name == 'issue_comment' && needs.clang-tidy-check.result == 'success' && + steps.post_results.outputs.has_content != 'true' && needs.setup.outputs.pr_number != '' + uses: thollander/actions-comment-pull-request@24bffb9b452ba05a4f3f77933840a6a841d1b32b # v3.0.1 + with: + message: "No clang-tidy issues found." + + - name: Update PR comment reactions + if: always() && github.event_name == 'issue_comment' + uses: Framework-R-D/phlex/.github/actions/complete-pr-comment@main + with: + status: ${{ needs.clang-tidy-check.result }} diff --git a/.github/workflows/cmake-build.yaml b/.github/workflows/cmake-build.yaml index cff0d44e3..e49310660 100644 --- a/.github/workflows/cmake-build.yaml +++ b/.github/workflows/cmake-build.yaml @@ -302,6 +302,7 @@ jobs: runs-on: ubuntu-latest permissions: pull-requests: write + issues: write steps: - name: Comment on build completion @@ -319,3 +320,9 @@ jobs: See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for detailed results. # yamllint enable + + - name: Update PR comment reactions + if: always() + uses: Framework-R-D/phlex/.github/actions/complete-pr-comment@main + with: + status: ${{ needs.build.result }} diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml index 1db29ec5c..1888c65b1 100644 --- a/.github/workflows/coverage.yaml +++ b/.github/workflows/coverage.yaml @@ -83,6 +83,7 @@ jobs: has_coverage_xml: ${{ steps.coverage_outputs.outputs.has_coverage_xml }} has_coverage_html: ${{ steps.coverage_outputs.outputs.has_coverage_html }} has_coverage_llvm_info: ${{ steps.coverage_outputs.outputs.has_coverage_llvm_info }} + has_coverage_python_xml: ${{ steps.coverage_outputs.outputs.has_coverage_python_xml }} artifact_upload_available: ${{ steps.artifact_runtime.outputs.available }} steps: @@ -271,7 +272,6 @@ jobs: --cov=scripts \ --cov-report=xml:"$BUILD_DIR/coverage-scripts.xml" \ --cov-report=term-missing \ - --omit="scripts/test/*" \ -q; then echo "✅ Scripts coverage report generation succeeded." else @@ -454,3 +454,32 @@ jobs: path: coverage-artifacts/coverage-html/ if-no-files-found: warn retention-days: 30 + + coverage-complete: + needs: [setup, coverage, coverage-upload] + if: always() && github.event_name == 'issue_comment' && needs.setup.result == 'success' + runs-on: ubuntu-latest + permissions: + pull-requests: write + issues: write + + steps: + - name: Comment on coverage completion + uses: thollander/actions-comment-pull-request@24bffb9b452ba05a4f3f77933840a6a841d1b32b # v3.0.1 + with: + message: | + Coverage workflow completed. + + **Result:** ${{ needs.coverage.result == 'success' && '✅ Coverage analysis passed.' + || needs.coverage.result == 'failure' && '❌ Coverage analysis failed.' + || needs.coverage.result == 'cancelled' && '⚠️ Coverage analysis was cancelled.' + || needs.coverage.result == 'skipped' && 'ℹ️ No relevant changes detected; coverage skipped.' + || format('ℹ️ Coverage completed with status: {0}.', needs.coverage.result) }} + + See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for detailed results. + + - name: Update PR comment reactions + if: always() + uses: Framework-R-D/phlex/.github/actions/complete-pr-comment@main + with: + status: ${{ needs.coverage.result }} diff --git a/.gitignore b/.gitignore index 485e805b9..f04cfb84b 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,4 @@ docs/build/ .act-artifacts/ .secrets actionlint +.coverage diff --git a/pyproject.toml b/pyproject.toml index c7eae4a1f..274287260 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,8 +30,33 @@ extend-ignore = [ "E203", # whitespace before ':' ] +[tool.ruff.lint.isort] +# Scripts are treated as first-party so that `import clang_tidy_check_summary` +# and similar local imports are grouped correctly relative to third-party ones +# (e.g. pytest), avoiding false I001 violations in scripts/test/. +known-first-party = [ + "check_codeql_alerts", + "clang_tidy_check_summary", + "clang_tidy_diff_issues", + "clang_tidy_fixes_to_problems", + "codeql_reset_dismissed_alerts", + "create_coverage_symlinks", + "export_llvm_lcov", + "fix_header_guards", + "normalize_coverage_lcov", + "normalize_coverage_xml", + "sarif_alerts", +] + [tool.ruff.lint.pydocstyle] convention = "google" [tool.ruff.format] quote-style = "double" + +[tool.coverage.run] +# Exclude the test helpers from coverage measurement. pytest-cov resolves +# this glob relative to the measurement source root (scripts/), so the pattern +# works whether pytest is invoked from the repo root or a build directory that +# sets --cov=scripts. +omit = ["scripts/test/*"] diff --git a/scripts/QUICK_REFERENCE.md b/scripts/QUICK_REFERENCE.md index 801395daa..7e7fa1638 100644 --- a/scripts/QUICK_REFERENCE.md +++ b/scripts/QUICK_REFERENCE.md @@ -150,6 +150,58 @@ Quick coverage workflow: See `scripts/README.md` for detailed coverage documentation. +## Developer Tools (Quick Reference) + +Three local-only utility scripts for post-processing clang-tidy output and +managing GitHub CodeQL alerts. None of these are invoked by CI. + +### clang-tidy Checklist + +After running `run-clang-tidy -export-fixes build/fixes.yaml ...`, generate a +markdown task list of checks with occurrence counts: + +```bash +# Plain list to stdout +python3 scripts/clang_tidy_check_summary.py build/fixes.yaml + +# With documentation hyperlinks, written to a file +python3 scripts/clang_tidy_check_summary.py build/fixes.yaml --links \ + -o summary.md +``` + +### clang-tidy → VS Code Problem Links + +Convert the same YAML to gcc-style `file:line:col: severity: message [check]` +lines that VS Code's `$gcc` problem matcher turns into clickable source links: + +```bash +# Diagnostics to stdout (clickable in VS Code terminal) +python3 scripts/clang_tidy_fixes_to_problems.py build/fixes.yaml + +# Translate CI runner paths to local paths +python3 scripts/clang_tidy_fixes_to_problems.py build/fixes.yaml \ + --path-map /__w/phlex/phlex/phlex-src=/your/local/checkout \ + -o build/problems.txt +``` + +### Reset Dismissed CodeQL Alerts + +Reopen all dismissed CodeQL alerts so the next scan re-evaluates them: + +```bash +# Preview (no changes) +GITHUB_TOKEN=$(gh auth token) \ +python3 scripts/codeql_reset_dismissed_alerts.py \ + --owner Framework-R-D --repo phlex --dry-run + +# Live run +GITHUB_TOKEN=$(gh auth token) \ +python3 scripts/codeql_reset_dismissed_alerts.py \ + --owner Framework-R-D --repo phlex +``` + +See `scripts/README.md` for full documentation on all three tools. + ## Getting Help - Script documentation: `scripts/README.md` diff --git a/scripts/README.md b/scripts/README.md index 5d6cfdc18..b6b54973c 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -316,6 +316,171 @@ cd build && ctest -R test_name ./scripts/coverage.sh clean test summary ``` +## Developer Tools + +These scripts are **not** invoked by CI — they are local developer utilities +for post-processing clang-tidy output and managing GitHub code-scanning alerts. + +### `clang_tidy_check_summary.py` + +Reads a `clang-tidy-fixes.yaml` file (produced by `run-clang-tidy +-export-fixes`) and writes a compact markdown task list — one line per check +name with its unique occurrence count — suitable for pasting into a GitHub +issue or PR description to track remediation progress. + +#### Usage (`clang_tidy_check_summary.py`) + +```bash +# Print checklist to stdout +python3 scripts/clang_tidy_check_summary.py build/clang-tidy-fixes.yaml + +# Write to a file +python3 scripts/clang_tidy_check_summary.py build/clang-tidy-fixes.yaml \ + -o summary.md + +# Add documentation hyperlinks for each check name +python3 scripts/clang_tidy_check_summary.py build/clang-tidy-fixes.yaml \ + --links -o summary.md +``` + +#### Output format (`clang_tidy_check_summary.py`) + +```markdown +- [ ] cert-err34-c (3) +- [ ] modernize-use-nullptr (12) +- [ ] readability-identifier-naming (47) +``` + +With `--links` each check name becomes a hyperlink to its clang-tidy +documentation page. + +#### Typical workflow + +```bash +# 1. Run clang-tidy and collect fixes (from repo root) +run-clang-tidy -p build -export-fixes build/fixes.yaml phlex/ form/ plugins/ + +# 2. Generate a summary for a GitHub issue +python3 scripts/clang_tidy_check_summary.py build/fixes.yaml --links \ + -o summary.md + +# 3. Paste summary.md into the issue body +``` + +#### Uniqueness counting + +Two entries are collapsed into a single occurrence when they share the same +`(DiagnosticName, FilePath, FileOffset)` triplet — this avoids inflating +counts when the same location is reported by multiple translation units. + +--- + +### `clang_tidy_fixes_to_problems.py` + +Converts a `clang-tidy-fixes.yaml` file into gcc/clang compiler-style +diagnostic lines: + +```text +/abs/path/file.cpp:line:col: warning: message [check-name] +``` + +VS Code's built-in `$gcc` problem matcher recognises this format and turns +each line into a clickable source link in the terminal panel. The script also +handles CI-to-local path translation via `--path-map`. + +#### Usage (`clang_tidy_fixes_to_problems.py`) + +```bash +# Print diagnostics to stdout (opens as clickable links in VS Code terminal) +python3 scripts/clang_tidy_fixes_to_problems.py build/clang-tidy-fixes.yaml + +# Write to a file +python3 scripts/clang_tidy_fixes_to_problems.py build/fixes.yaml \ + -o build/problems.txt + +# Translate CI runner paths to local paths +python3 scripts/clang_tidy_fixes_to_problems.py build/fixes.yaml \ + --path-map /__w/phlex/phlex/phlex-src=/home/user/phlex \ + --workspace-root /home/user/phlex \ + -o build/problems.txt +``` + +#### Path mapping + +clang-tidy embeds absolute paths as seen on the machine that ran the build. +When you download a `clang-tidy-fixes.yaml` artifact from CI you need to +translate the runner paths to your local checkout. + +Use `--path-map OLD=NEW` (repeatable). Two default mappings for the standard +Phlex CI layout are applied automatically after any explicit `--path-map` +entries: + +| CI path prefix | Local path | +| :--- | :--- | +| `/__w/phlex/phlex/phlex-src` | `` | +| `/__w/phlex/phlex/phlex-build` | `/build` | + +#### External-header handling + +When a diagnostic's primary location is inside a system or third-party header +that is absent locally, the script searches the attached clang-tidy notes for +a trace back to workspace source code and redirects the reported location +there. The original external location is preserved in the message text. + +--- + +### `codeql_reset_dismissed_alerts.py` + +Reopens every dismissed CodeQL code-scanning alert for a repository via the +GitHub REST API. Useful before an audit or after a large refactor to ensure +the alert list reflects the current code rather than past dismissals. + +#### Authentication + +Set a GitHub personal access token (PAT) with the `security_events` scope +before running: + +```bash +export GITHUB_TOKEN=ghp_your_token_here + +# Or reuse the token from the GitHub CLI +export GH_TOKEN=$(gh auth token) +``` + +#### Usage (`codeql_reset_dismissed_alerts.py`) + +```bash +# Preview what would be reopened (no changes made) +GITHUB_TOKEN=ghp_... python3 scripts/codeql_reset_dismissed_alerts.py \ + --owner Framework-R-D --repo phlex --dry-run + +# Reopen all dismissed alerts +GITHUB_TOKEN=ghp_... python3 scripts/codeql_reset_dismissed_alerts.py \ + --owner Framework-R-D --repo phlex +``` + +#### What it does + +1. Pages through all alerts with `state=dismissed` in batches of 100. +2. Prints the alert number, rule ID, and dismissal reason for each one. +3. Issues a `PATCH` request per alert to set `state=open`. + +After running, the next scheduled or triggered CodeQL analysis will +re-evaluate every reopened alert and either confirm it as still present or +close it as fixed. + +#### Rate limits + +Each alert requires one PATCH request. The GitHub API allows 5 000 +authenticated requests per hour; for repositories with large numbers of +dismissed alerts, check your remaining quota first: + +```bash +gh api /rate_limit +``` + +--- + ## Additional Notes - All scripts should be run from the repository root or scripts directory diff --git a/scripts/check_codeql_alerts.py b/scripts/check_codeql_alerts.py index 285fadc78..a1b32c308 100644 --- a/scripts/check_codeql_alerts.py +++ b/scripts/check_codeql_alerts.py @@ -242,9 +242,10 @@ def _paginate_alerts_api( owner: str, repo: str, *, state: str = "open", ref: str | None = None ) -> collections.abc.Iterator[dict]: """Paginate Code Scanning alerts from the GitHub API for given repo and ref.""" + per_page = 100 page = 1 while True: - params = {"state": state, "per_page": 100, "page": page} + params = {"state": state, "per_page": per_page, "page": page} if ref: params["ref"] = ref result = _api_request("GET", f"/repos/{owner}/{repo}/code-scanning/alerts", params=params) @@ -254,6 +255,9 @@ def _paginate_alerts_api( return for alert in result: yield alert + # GitHub returns fewer than per_page items on the last page + if len(result) < per_page: + return page += 1 @@ -350,7 +354,7 @@ def _to_alert_api(raw: dict) -> Alert: html_url=raw.get("html_url"), rule_id=rule_id, level=(raw.get("severity") or "warning"), - message=str(raw.get("message", {}).get("text") or rule_name or "(no message)"), + message=str((instance.get("message") or {}).get("text") or rule_name or "(no message)"), location=location, rule_name=rule_name, help_uri=help_uri, @@ -790,7 +794,7 @@ def write_summary( if new_alerts: handle.write( f"❌ {len(new_alerts)} new alert{'s' if len(new_alerts) != 1 else ''} " - "(level ≥ {threshold}).\n" + f"(level ≥ {threshold}).\n" ) for line in _format_section(new_alerts, max_results=max_results, bullet_prefix=":x:"): handle.write(f"{line}\n") @@ -873,7 +877,9 @@ def set_outputs( handle.write("log_path=\n") -def _compare_alerts_via_api(owner: str, repo: str, ref: str) -> APIAlertComparison: +def _compare_alerts_via_api( + owner: str, repo: str, ref: str, *, min_level: str = "warning" +) -> APIAlertComparison: """Compare alerts between a ref and the main branch using the GitHub API.""" # Fetch alerts for the PR merge ref (fixed) and for the repo default state (open) pr_alerts_raw = list(_paginate_alerts_api(owner, repo, state="open", ref=ref)) @@ -885,6 +891,7 @@ def _compare_alerts_via_api(owner: str, repo: str, ref: str) -> APIAlertComparis base_ref: str | None = None prev_commit_ref: str | None = None base_sha: str | None = None + base_alerts_raw: list[dict] = [] if ref.startswith("refs/pull/"): try: pr_num = int(ref.split("/")[2]) @@ -901,7 +908,6 @@ def _compare_alerts_via_api(owner: str, repo: str, ref: str) -> APIAlertComparis base_ref = None prev_commit_ref = None - base_alerts_raw: list[dict] = [] if base_ref or base_sha: # prefer base SHA if available base_target = base_sha or base_ref @@ -986,13 +992,20 @@ def alert_key(a: Alert) -> tuple[str, str]: new_vs_base = [pr_alerts[rid] for rid in sorted(new_vs_base_ids)] fixed_vs_base = [base_alerts[rid] for rid in sorted(fixed_vs_base_ids)] + def _meets_threshold(a: Alert) -> bool: + return severity_reaches_threshold(a.level, min_level) + + # Resolved alerts (fixed_*) are intentionally NOT filtered by min_level: + # surfacing that any alert was resolved — even a below-threshold one — is + # always useful information for reviewers. Only *new* alerts are gated by + # the threshold because those are the ones that can block a PR. return APIAlertComparison( - new_alerts=[pr_alerts[rid] for rid in sorted(new_ids)], + new_alerts=[pr_alerts[rid] for rid in sorted(new_ids) if _meets_threshold(pr_alerts[rid])], fixed_alerts=[main_alerts[rid] for rid in sorted(fixed_ids)], matched_alerts=[pr_alerts[rid] for rid in sorted(pr_keys & main_keys)], - new_vs_prev=new_vs_prev, + new_vs_prev=[a for a in new_vs_prev if _meets_threshold(a)], fixed_vs_prev=fixed_vs_prev, - new_vs_base=new_vs_base, + new_vs_base=[a for a in new_vs_base if _meets_threshold(a)], fixed_vs_base=fixed_vs_base, base_sha=base_sha, prev_commit_ref=prev_commit_ref, @@ -1066,6 +1079,38 @@ def _build_multi_section_comment( ) lines.append("") + # When no finer-grained comparisons are available, fall back to the main + # new/fixed-vs-main lists so the comment always shows useful alert details. + has_detail = ( + api_comp.new_vs_prev + or api_comp.fixed_vs_prev + or api_comp.new_vs_base + or api_comp.fixed_vs_base + ) + if not has_detail: + if api_comp.new_alerts: + lines.append( + f"## ❌ {len(api_comp.new_alerts)} new CodeQL alert" + f"{'s' if len(api_comp.new_alerts) != 1 else ''} compared to main" + ) + lines.extend( + _format_section(api_comp.new_alerts, max_results=max_results, bullet_prefix=":x:") + ) + lines.append("") + if api_comp.fixed_alerts: + lines.append( + f"## ✅ {len(api_comp.fixed_alerts)} CodeQL alert" + f"{'s' if len(api_comp.fixed_alerts) != 1 else ''} resolved compared to main" + ) + lines.extend( + _format_section( + api_comp.fixed_alerts, + max_results=max_results, + bullet_prefix=":white_check_mark:", + ) + ) + lines.append("") + repo_str = os.environ.get("GITHUB_REPOSITORY") if repo_str: code_scanning_url = f"https://github.com/{repo_str}/security/code-scanning" @@ -1073,7 +1118,7 @@ def _build_multi_section_comment( else: lines.append("Review the CodeQL report in the Security tab for full details.") - return "\n".join(line for line in lines if line).strip() + "\n" + return "\n".join(lines).strip() + "\n" def main(argv: collections.abc.Sequence[str] | None = None) -> int: @@ -1119,7 +1164,7 @@ def main(argv: collections.abc.Sequence[str] | None = None) -> int: return 2 owner, repo = repo_full.split("/", 1) try: - api_comp = _compare_alerts_via_api(owner, repo, args.ref) + api_comp = _compare_alerts_via_api(owner, repo, args.ref, min_level=min_level) new_alerts = api_comp.new_alerts fixed_alerts = api_comp.fixed_alerts except GitHubAPIError as exc: diff --git a/scripts/clang_tidy_check_summary.py b/scripts/clang_tidy_check_summary.py index dfe09e310..add6c106d 100755 --- a/scripts/clang_tidy_check_summary.py +++ b/scripts/clang_tidy_check_summary.py @@ -1,13 +1,52 @@ #!/usr/bin/env python3 """Summarize clang-tidy diagnostics as a markdown checklist. -Reads a clang-tidy export-fixes YAML file and writes an alphabetically ordered -markdown checklist of diagnostic check names with unique occurrence counts: - - - [ ] check-name (n) - -A unique occurrence is determined by the (DiagnosticName, FilePath, FileOffset) -triplet; identical triplets are counted only once. +PURPOSE +------- +After running clang-tidy with ``-export-fixes`` (or ``run-clang-tidy``), you +have a YAML file listing every diagnostic the tool found. This script reads +that file and produces a compact markdown task list — one line per *check +name* with its unique occurrence count — that you can paste directly into a +GitHub issue or PR comment to track remediation progress. + +OUTPUT FORMAT +------------- +Each line is an unchecked GitHub-Flavored Markdown task item:: + + - [ ] cert-err34-c (3) + - [ ] modernize-use-nullptr (12) + - [ ] readability-identifier-naming (47) + +With ``--links`` each check name becomes a hyperlink to its clang-tidy +documentation page:: + + - [ ] [cert-err34-c](https://clang.llvm.org/extra/clang-tidy/checks/cert/err34-c.html) (3) + +UNIQUENESS +---------- +Two diagnostic entries are considered the same occurrence if they share the +same (DiagnosticName, FilePath, FileOffset) triplet. Duplicate triplets are +collapsed so that the reported count reflects distinct locations, not the +number of times the check fired per translation unit. + +USAGE +----- + # Read from a file, write to stdout + python3 scripts/clang_tidy_check_summary.py build/clang-tidy-fixes.yaml + + # Read from stdin (e.g. piped from run-clang-tidy) + run-clang-tidy ... | python3 scripts/clang_tidy_check_summary.py + + # Write to a file instead of stdout + python3 scripts/clang_tidy_check_summary.py fixes.yaml -o summary.md + + # Add documentation hyperlinks for each check name + python3 scripts/clang_tidy_check_summary.py fixes.yaml --links + + # Typical post-clang-tidy workflow + run-clang-tidy -p build -export-fixes build/fixes.yaml phlex/ form/ + python3 scripts/clang_tidy_check_summary.py build/fixes.yaml --links -o summary.md + cat summary.md # paste into GitHub issue """ from __future__ import annotations @@ -20,7 +59,14 @@ def _load_diagnostics(path: Path | None) -> list[dict]: - """Load the Diagnostics list from a clang-tidy YAML file or stdin.""" + """Load the Diagnostics list from a clang-tidy YAML file or stdin. + + Args: + path: Path to the clang-tidy YAML file, or ``None`` to read stdin. + + Returns: + The list of diagnostic entry dicts, or an empty list on any error. + """ text = path.read_text(encoding="utf-8") if path is not None else sys.stdin.read() try: data = yaml.safe_load(text) @@ -37,13 +83,16 @@ def count_unique_diagnostics(diagnostics: list[dict]) -> dict[str, int]: """Count unique occurrences of each diagnostic check name. Uniqueness is determined by the (DiagnosticName, FilePath, FileOffset) - triplet. Identical triplets are counted only once. + triplet. Identical triplets are counted only once, so the count reflects + the number of distinct source locations flagged by each check rather than + the number of translation units that reported it. Args: - diagnostics: List of diagnostic entries from the clang-tidy YAML. + diagnostics: List of diagnostic entry dicts from the clang-tidy YAML. Returns: - Mapping from check name to count of unique occurrences. + Mapping from check name to count of unique occurrences, suitable for + passing directly to :func:`format_checklist`. """ seen: set[tuple[str, str, int | None]] = set() counts: dict[str, int] = {} @@ -70,7 +119,15 @@ def count_unique_diagnostics(diagnostics: list[dict]) -> dict[str, int]: def _check_url(name: str) -> str: - """Derive the clang-tidy documentation URL for a check name.""" + """Derive the clang-tidy documentation URL for a check name. + + Args: + name: A clang-tidy check name such as ``readability-identifier-naming`` + or ``clang-analyzer-core.NullDereference``. + + Returns: + The canonical LLVM documentation URL for the check. + """ if name.startswith("clang-analyzer-"): rest = name[len("clang-analyzer-") :] return f"https://clang.llvm.org/extra/clang-tidy/checks/clang-analyzer/{rest}.html" @@ -81,14 +138,24 @@ def _check_url(name: str) -> str: def format_checklist(counts: dict[str, int], *, links: bool = False) -> str: - """Format diagnostic counts as a markdown checklist. + """Format diagnostic counts as a markdown task list. + + Entries are sorted alphabetically by check name. Each entry is an + unchecked GFM task-list item:: + + - [ ] check-name (n) + + With ``links=True`` the check name becomes a Markdown hyperlink to the + clang-tidy documentation page for that check. Args: - counts: Mapping from check name to unique occurrence count. - links: If True, hyperlink each check name to its documentation page. + counts: Mapping from check name to unique occurrence count, as + returned by :func:`count_unique_diagnostics`. + links: When ``True``, hyperlink each check name to its documentation. Returns: - Markdown-formatted checklist string with entries alphabetically ordered. + A newline-joined string of task-list items (no trailing newline). + Returns an empty string when *counts* is empty. """ lines = [] for name, count in sorted(counts.items()): @@ -98,36 +165,54 @@ def format_checklist(counts: dict[str, int], *, links: bool = False) -> str: def build_arg_parser() -> argparse.ArgumentParser: - """Build and return the argument parser.""" + """Build and return the argument parser for this script.""" parser = argparse.ArgumentParser( + prog="clang_tidy_check_summary.py", description=( - "Summarize clang-tidy diagnostics as an alphabetically ordered" - " markdown checklist with unique occurrence counts." - ) + "Summarize clang-tidy diagnostics as an alphabetically ordered " + "markdown task list with unique occurrence counts." + ), + epilog=( + "examples:\n" + " %(prog)s build/clang-tidy-fixes.yaml\n" + " %(prog)s build/fixes.yaml --links -o summary.md\n" + " run-clang-tidy -export-fixes fixes.yaml phlex/ && %(prog)s fixes.yaml" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( "input", type=Path, nargs="?", - help="Path to the clang-tidy YAML file (default: stdin)", + metavar="FIXES_YAML", + help=( + "Path to the clang-tidy export-fixes YAML file produced by " + "run-clang-tidy or clang-tidy --export-fixes. " + "Reads from stdin when omitted." + ), ) parser.add_argument( "-o", "--output", type=Path, - help="Output file for the markdown checklist (default: stdout)", + metavar="FILE", + help="Write the markdown checklist to FILE instead of stdout.", ) parser.add_argument( "--links", action="store_true", default=False, - help="Hyperlink each check name to its clang-tidy documentation page.", + help=("Hyperlink each check name to its clang-tidy documentation page on clang.llvm.org."), ) return parser def main() -> int: - """Parse arguments and write the diagnostic checklist.""" + """Parse arguments and write the diagnostic checklist. + + Returns: + 0 on success (including when no diagnostics are found). + """ args = build_arg_parser().parse_args() diagnostics = _load_diagnostics(args.input) diff --git a/scripts/clang_tidy_fixes_to_problems.py b/scripts/clang_tidy_fixes_to_problems.py index 51d9f10b6..af7ab1ad4 100644 --- a/scripts/clang_tidy_fixes_to_problems.py +++ b/scripts/clang_tidy_fixes_to_problems.py @@ -1,11 +1,84 @@ #!/usr/bin/env python3 - -"""Convert clang-tidy export-fixes YAML into compiler-style diagnostics. - -The output format is compatible with VS Code problem matchers such as "$gcc": - - /abs/path/file.cpp:line:column: warning: message [check-name] - +r"""Convert a clang-tidy export-fixes YAML into compiler-style diagnostics. + +PURPOSE +------- +``run-clang-tidy`` (and ``clang-tidy --export-fixes``) produces a YAML file +that describes every diagnostic it found together with suggested fixes. The +file is machine-readable but not useful for quickly locating issues in an +editor or CI log. + +This script translates that YAML into the compact gcc/clang compiler output +format that editors, terminals, and problem-matcher tools understand:: + + /abs/path/to/file.cpp:line:col: warning: message [check-name] + +VS Code's built-in ``$gcc`` problem matcher picks up this format from the +terminal panel, turning every line into a clickable link that jumps straight +to the flagged source location. + +TYPICAL WORKFLOWS +----------------- +**Local development — navigate to each issue in VS Code:** + + # 1. Generate the fixes file (source root as working directory so + # .clang-tidy is found) + cd /path/to/phlex + run-clang-tidy -p build -export-fixes build/fixes.yaml phlex/ form/ + + # 2. Convert to compiler-style diagnostics + python3 scripts/clang_tidy_fixes_to_problems.py build/fixes.yaml + + # VS Code's terminal panel now shows clickable source links. + +**CI — translate paths from the GitHub Actions runner to the local workspace:** + + python3 scripts/clang_tidy_fixes_to_problems.py build/fixes.yaml \\ + --workspace-root /local/checkout \\ + --path-map /__w/phlex/phlex/phlex-src=/local/checkout \\ + -o problems.txt + +PATH MAPPING +------------ +clang-tidy records absolute paths as it sees them during the build. When +fixes are generated in CI and then inspected locally, the paths embedded in +the YAML refer to the CI runner's filesystem (e.g. ``/__w/phlex/...``) and +will not resolve on your machine. + +Use ``--path-map OLD=NEW`` (repeatable) to rewrite path prefixes before +emitting diagnostic lines. The first matching mapping wins. Two default +mappings for the standard Phlex CI layout are applied after any explicit +``--path-map`` entries:: + + /__w/phlex/phlex/phlex-src → + /__w/phlex/phlex/phlex-build → /build + +EXTERNAL HEADERS +---------------- +clang-tidy sometimes reports a diagnostic whose primary location is inside a +system or third-party header that does not exist on the local machine. When +this happens the script looks at the ``Notes`` attached to the diagnostic for +an entry that traces back to source code inside the workspace — preferring +notes whose message begins with ``Calling '`` (the typical trace for +analyzer-style checks) — and redirects the reported location to that note. +The original external-header location is included in the message for context. + +USAGE +----- + # Read fixes.yaml, write gcc-style lines to stdout + python3 scripts/clang_tidy_fixes_to_problems.py build/fixes.yaml + + # Read from stdin + python3 scripts/clang_tidy_fixes_to_problems.py < build/fixes.yaml + + # Write to a file (parent directories are created automatically) + python3 scripts/clang_tidy_fixes_to_problems.py build/fixes.yaml \\ + -o build/problems.txt + + # Translate CI paths to local paths + python3 scripts/clang_tidy_fixes_to_problems.py build/fixes.yaml \\ + --path-map /__w/phlex/phlex/phlex-src=/home/user/phlex \\ + --workspace-root /home/user/phlex """ from __future__ import annotations @@ -20,7 +93,7 @@ @dataclass class Diagnostic: - """Represents a single clang-tidy diagnostic.""" + """A single clang-tidy diagnostic with its primary source location.""" check: str = "clang-tidy" message: str = "" @@ -32,7 +105,7 @@ class Diagnostic: @dataclass class DiagnosticNote: - """Represents a single clang-tidy note attached to a diagnostic.""" + """A note attached to a :class:`Diagnostic`, providing extra context.""" file_path: str | None = None file_offset: int | None = None @@ -40,7 +113,16 @@ class DiagnosticNote: def parse_clang_tidy_fixes(text: str) -> tuple[str | None, list[Diagnostic]]: - """Parse a clang-tidy export-fixes YAML string into a list of diagnostics.""" + """Parse a clang-tidy export-fixes YAML string into structured diagnostics. + + Args: + text: Raw YAML text from a ``clang-tidy-fixes.yaml`` file. + + Returns: + A ``(main_source_file, diagnostics)`` tuple. *main_source_file* is + the ``MainSourceFile`` field from the YAML (``None`` when absent or + empty). *diagnostics* is an empty list when the input is malformed. + """ try: data = yaml.safe_load(text) except yaml.YAMLError as exc: @@ -109,17 +191,38 @@ def parse_clang_tidy_fixes(text: str) -> tuple[str | None, list[Diagnostic]]: def apply_path_map(path: str, mappings: list[tuple[str, str]]) -> str: - """Apply the first matching prefix mapping to translate a path.""" - normalized = path + """Rewrite *path* by replacing the first matching prefix. + + Mappings are tried in order; the first whose *old* prefix matches the + start of *path* is applied and no further mappings are checked. + + Args: + path: The original path string (typically from the YAML). + mappings: Ordered list of ``(old_prefix, new_prefix)`` pairs. + + Returns: + The rewritten path, or the original *path* if no mapping matches. + """ for old, new in mappings: - if normalized.startswith(old): - normalized = new + normalized[len(old) :] - break - return normalized + if path.startswith(old): + return new + path[len(old) :] + return path def offset_to_line_col(path: Path, offset: int) -> tuple[int, int]: - """Convert a byte offset in a file to a (line, column) pair.""" + """Convert a byte offset within a file to a 1-based (line, column) pair. + + The offset is clamped to the file length so out-of-range values never + raise. Returns ``(1, 1)`` when the file cannot be read (e.g. it does not + exist on the local machine). + + Args: + path: Path to the source file to read. + offset: Byte offset from the start of the file (0-based). + + Returns: + ``(line, column)`` both 1-based. + """ try: data = path.read_bytes() except OSError: @@ -131,15 +234,24 @@ def offset_to_line_col(path: Path, offset: int) -> tuple[int, int]: bounded = max(0, min(offset, len(data))) line = data.count(b"\n", 0, bounded) + 1 last_newline = data.rfind(b"\n", 0, bounded) - if last_newline < 0: - col = bounded + 1 - else: - col = bounded - last_newline + col = bounded + 1 if last_newline < 0 else bounded - last_newline return line, max(col, 1) def parse_path_map(items: list[str]) -> list[tuple[str, str]]: - """Parse a list of OLD=NEW path mapping strings into (old, new) tuples.""" + """Parse ``--path-map`` argument values into ``(old, new)`` tuples. + + Args: + items: List of strings in ``OLD=NEW`` form, as supplied on the + command line via ``--path-map``. + + Returns: + List of ``(old_prefix, new_prefix)`` tuples in the same order as + *items*. + + Raises: + ValueError: When any item does not contain ``=``. + """ mappings: list[tuple[str, str]] = [] for item in items: if "=" not in item: @@ -150,7 +262,19 @@ def parse_path_map(items: list[str]) -> list[tuple[str, str]]: def is_within_workspace(path: str, workspace_root: Path) -> bool: - """Return True when path resolves under workspace_root.""" + """Return ``True`` when *path* resolves to a location under *workspace_root*. + + Both paths are resolved (symlinks followed) before comparison so that + symlinked coverage trees are handled correctly. Returns ``False`` on any + :class:`OSError` (e.g. a path that contains null bytes on some platforms). + + Args: + path: An absolute path string to test. + workspace_root: The root directory to test containment against. + + Returns: + ``True`` if *path* is inside *workspace_root*, ``False`` otherwise. + """ try: Path(path).resolve().relative_to(workspace_root.resolve()) except ValueError: @@ -165,7 +289,28 @@ def choose_workspace_note( workspace_root: Path, mappings: list[tuple[str, str]] | None = None, ) -> DiagnosticNote | None: - """Choose the most helpful in-workspace note for external diagnostics.""" + """Pick the most useful in-workspace note for an out-of-workspace diagnostic. + + When a diagnostic's primary location is in an external header, its + ``Notes`` may contain a trace back to workspace source code. This + function selects the best such note for use as a proxy location. + + Selection priority: + + 1. The first note whose message starts with ``"Calling '"`` (the typical + call-trace note emitted by clang-analyzer checks). + 2. The first workspace note, regardless of message content. + + Args: + notes: The ``Notes`` list from a :class:`Diagnostic`. + workspace_root: Root of the local workspace for containment testing. + mappings: Optional path mappings applied to each note's file path + before the containment test. + + Returns: + The chosen :class:`DiagnosticNote`, or ``None`` when no workspace + note exists. + """ effective_mappings = mappings or [] workspace_notes = [ note @@ -186,37 +331,80 @@ def choose_workspace_note( def build_arg_parser() -> argparse.ArgumentParser: """Build and return the argument parser for this script.""" parser = argparse.ArgumentParser( - description="Convert clang-tidy export-fixes YAML into compiler-style diagnostics." + prog="clang_tidy_fixes_to_problems.py", + description=( + "Convert a clang-tidy export-fixes YAML file into compiler-style " + "diagnostics (gcc/clang format) for use in VS Code, terminals, " + "and CI problem matchers." + ), + epilog=( + "output format:\n" + " /path/to/file.cpp:line:col: warning: message [check-name]\n\n" + "examples:\n" + " # Basic: read fixes, print to stdout\n" + " %(prog)s build/clang-tidy-fixes.yaml\n\n" + " # Translate CI paths to local paths\n" + " %(prog)s build/fixes.yaml \\\n" + " --path-map /__w/phlex/phlex/phlex-src=/home/user/phlex \\\n" + " --workspace-root /home/user/phlex \\\n" + " -o build/problems.txt" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( "input", type=Path, nargs="?", - help="Path to clang-tidy-fixes.yaml (default: stdin)", + metavar="FIXES_YAML", + help=( + "Path to the clang-tidy export-fixes YAML (e.g. " + "build/clang-tidy-fixes.yaml). Reads from stdin when omitted." + ), ) parser.add_argument( "-o", "--output", type=Path, - help="Output text file with compiler-style diagnostics (default: stdout)", + metavar="FILE", + help=( + "Write compiler-style diagnostic lines to FILE instead of stdout. " + "Parent directories are created automatically." + ), ) parser.add_argument( "--workspace-root", type=Path, default=Path.cwd(), - help="Workspace root used by default path mapping", + metavar="DIR", + help=( + "Root of the local workspace, used to determine whether a " + "diagnostic location is inside the project (default: current " + "working directory). Also used as the target for the built-in " + "CI path mapping." + ), ) parser.add_argument( "--path-map", action="append", default=[], - help="Path mapping in OLD=NEW form. Can be specified multiple times.", + metavar="OLD=NEW", + help=( + "Rewrite path prefix OLD to NEW before emitting diagnostic lines. " + "May be specified multiple times; the first matching mapping wins. " + "Applied before the built-in CI mappings " + "(/__w/phlex/phlex/phlex-src → , etc.)." + ), ) return parser def main() -> int: - """Parse arguments, process the fixes YAML, and write compiler-style diagnostics.""" + """Parse arguments, convert the fixes YAML, and write gcc-style diagnostics. + + Returns: + 0 on success. Non-zero return codes are not currently used; errors + are printed to stderr and the affected diagnostic is skipped. + """ args = build_arg_parser().parse_args() if args.input is not None: @@ -237,12 +425,14 @@ def main() -> int: for diag in diagnostics: file_path = diag.file_path or main_source if not file_path: - # Skip diagnostics with no usable location. + # No usable location — skip rather than emit a meaningless line. continue mapped = apply_path_map(file_path, mappings) offset = diag.file_offset if diag.file_offset is not None else 0 + # When the primary location is outside the workspace, try to redirect + # to a workspace-local note so the link is navigable. chosen_note = None original_location: tuple[str, int, int] | None = None if not is_within_workspace(mapped, workspace_root): @@ -282,7 +472,7 @@ def main() -> int: if args.output is not None: args.output.parent.mkdir(parents=True, exist_ok=True) args.output.write_text(output_text, encoding="utf-8") - print(f"Wrote {len(lines)} diagnostics to {args.output}") + print(f"Wrote {len(lines)} diagnostic(s) to {args.output}") else: sys.stdout.write(output_text) return 0 diff --git a/scripts/codeql_reset_dismissed_alerts.py b/scripts/codeql_reset_dismissed_alerts.py index 9a37f8012..4a67a41ea 100644 --- a/scripts/codeql_reset_dismissed_alerts.py +++ b/scripts/codeql_reset_dismissed_alerts.py @@ -1,15 +1,63 @@ #!/usr/bin/env python3 -"""Reopen all dismissed CodeQL alerts for a repository. +r"""Reopen all dismissed CodeQL code-scanning alerts for a repository. -This script uses the GitHub REST API to locate every CodeQL alert that is -currently in the "dismissed" state and reopens it. Reopening an alert removes -its dismissal so the next CodeQL analysis can triage it as if it were new. +PURPOSE +------- +When CodeQL alerts are dismissed in the GitHub Security tab (e.g. marked +"Won't fix" or "Used in tests"), they no longer appear in pull-request checks +or block merges. Dismissed alerts also accumulate silently — a finding that +was incorrectly dismissed stays hidden from future analysis. -Example: - GITHUB_TOKEN=ghp_... python3 scripts/codeql_reset_dismissed_alerts.py \ +This script reopens every dismissed alert via the GitHub REST API, resetting +them to the "open" state. After reopening: + +* The next CodeQL scan will re-evaluate each alert and either confirm it as + still present, or mark it as fixed if the underlying code changed. +* Re-opened alerts that are genuinely intentional can be re-dismissed through + the normal GitHub UI review process. + +This is useful before a scheduled audit, after a significant refactor, or +when you want to ensure the alert list reflects only deliberate dismissals. + +AUTHENTICATION +-------------- +A GitHub personal access token (PAT) or a GitHub App installation token with +the ``security_events`` scope (``security_events:write``) is required. + +Set the token in the environment before running:: + + export GITHUB_TOKEN=ghp_your_token_here + +The script also accepts ``GH_TOKEN`` as a fallback (the name used by the +GitHub CLI, ``gh``). + +USAGE +----- + # Reopen all dismissed alerts (live run) + GITHUB_TOKEN=ghp_... python3 scripts/codeql_reset_dismissed_alerts.py \\ --owner Framework-R-D --repo phlex -The token must have the ``security_events`` scope (``security_events:write``). + # Preview what would be changed without modifying anything + GITHUB_TOKEN=ghp_... python3 scripts/codeql_reset_dismissed_alerts.py \\ + --owner Framework-R-D --repo phlex --dry-run + + # Using the gh CLI token directly + GH_TOKEN=$(gh auth token) python3 scripts/codeql_reset_dismissed_alerts.py \\ + --owner Framework-R-D --repo phlex --dry-run + +RATE LIMITS +----------- +The script pages through all dismissed alerts in batches of 100, then issues +one PATCH request per alert. For a repository with hundreds of dismissed +alerts this may approach GitHub's API rate limit (5 000 requests/hour for +authenticated users). Check your remaining quota with:: + + gh api /rate_limit + +EXIT CODES +---------- +* ``0`` — success (including when no dismissed alerts were found) +* ``1`` — an API error occurred or the token is missing/insufficient """ from __future__ import annotations @@ -29,13 +77,23 @@ class GitHubAPIError(RuntimeError): - """Raised when the GitHub API returns an unexpected response.""" + """Raised when the GitHub API returns an unexpected or error response.""" def _token() -> str: + """Return the GitHub API token from the environment. + + Checks ``GITHUB_TOKEN`` first, then ``GH_TOKEN`` (the name used by the + GitHub CLI). + + Raises: + GitHubAPIError: When neither variable is set. + """ token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") if not token: - raise GitHubAPIError("Set GITHUB_TOKEN (or GH_TOKEN) with security_events:write scope.") + raise GitHubAPIError( + "Set GITHUB_TOKEN (or GH_TOKEN) with the security_events:write scope." + ) return token @@ -46,10 +104,25 @@ def _request( params: Optional[dict] = None, payload: Optional[dict] = None, ) -> Any: + """Make an authenticated request to the GitHub REST API. + + Args: + method: HTTP method (``"GET"``, ``"PATCH"``, etc.). + path: API path relative to ``https://api.github.com``, e.g. + ``"/repos/owner/repo/code-scanning/alerts"``. + params: Optional query-string parameters. + payload: Optional JSON request body (for PATCH/POST). + + Returns: + Decoded JSON response body. Returns ``{}`` for empty responses + (e.g. HTTP 204). + + Raises: + GitHubAPIError: On any HTTP error response from the API. + """ url = urllib.parse.urljoin(API_ROOT, path) if params: - query = urllib.parse.urlencode(params) - url = f"{url}?{query}" + url = f"{url}?{urllib.parse.urlencode(params)}" data: Optional[bytes] = None headers = { @@ -66,9 +139,7 @@ def _request( try: with urllib.request.urlopen(request) as response: content = response.read().decode("utf-8") - if not content: - return {} - return json.loads(content) + return json.loads(content) if content else {} except urllib.error.HTTPError as exc: message = exc.read().decode("utf-8", errors="replace") raise GitHubAPIError( @@ -77,19 +148,30 @@ def _request( def _paginate_alerts(owner: str, repo: str) -> Iterator[dict]: + """Yield every dismissed code-scanning alert for *owner*/*repo*. + + Pages through the API in batches of 100 until the API returns an empty + page. + + Args: + owner: GitHub organization or user name. + repo: Repository name. + + Yields: + Raw alert dicts as returned by the GitHub API. + + Raises: + GitHubAPIError: On an unexpected API response or HTTP error. + """ page = 1 while True: result = _request( "GET", f"/repos/{owner}/{repo}/code-scanning/alerts", - params={ - "state": "dismissed", - "per_page": 100, - "page": page, - }, + params={"state": "dismissed", "per_page": 100, "page": page}, ) if not isinstance(result, list): - raise GitHubAPIError("Unexpected response when listing alerts (expected list).") + raise GitHubAPIError("Unexpected response when listing alerts (expected a JSON list).") if not result: return for alert in result: @@ -99,21 +181,39 @@ def _paginate_alerts(owner: str, repo: str) -> Iterator[dict]: @dataclass class Alert: - """Represents a dismissed CodeQL alert.""" + """A dismissed CodeQL code-scanning alert.""" number: int + """Alert number (unique within the repository).""" + html_url: str + """URL to the alert in the GitHub Security tab.""" + rule_id: str + """The CodeQL rule/query that generated this alert (e.g. ``py/sql-injection``).""" + dismissed_reason: Optional[str] + """The reason it was dismissed, or ``None`` if not recorded.""" def _to_alert(raw: dict) -> Alert: + """Convert a raw API alert dict to an :class:`Alert` dataclass. + + Args: + raw: A single alert object from the GitHub API response. + + Returns: + A populated :class:`Alert` instance. + + Raises: + GitHubAPIError: When the ``number`` field is missing or non-integer. + """ try: number = int(raw["number"]) except (KeyError, ValueError) as exc: # pragma: no cover - defensive raise GitHubAPIError(f"Alert object missing 'number': {raw}") from exc html_url = str(raw.get("html_url", "")) - rule = raw.get("rule", {}) or {} + rule = raw.get("rule") or {} rule_id = str(rule.get("id", "")) dismissed_reason = raw.get("dismissed_reason") return Alert( @@ -125,13 +225,18 @@ def _to_alert(raw: dict) -> Alert: def reopen_alert(owner: str, repo: str, alert: Alert, *, dry_run: bool) -> None: - """Reopens a dismissed CodeQL alert. + """Reopen a single dismissed CodeQL alert. + + In dry-run mode the action is logged but no API call is made. Args: - owner: The GitHub organization or user. - repo: The repository name. + owner: GitHub organization or user name. + repo: Repository name. alert: The alert to reopen. - dry_run: If True, print the action without executing it. + dry_run: When ``True``, print what would happen without doing it. + + Raises: + GitHubAPIError: On an API error in live mode. """ if dry_run: print(f"DRY RUN: would reopen alert #{alert.number} ({alert.rule_id})") @@ -145,33 +250,65 @@ def reopen_alert(owner: str, repo: str, alert: Alert, *, dry_run: bool) -> None: def parse_args(argv: Optional[List[str]] = None) -> argparse.Namespace: - """Parses command-line arguments. + """Parse command-line arguments. Args: - argv: The command-line arguments to parse. + argv: Argument list; defaults to ``sys.argv[1:]`` when ``None``. Returns: - The parsed arguments. + Parsed :class:`argparse.Namespace`. """ - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("--owner", required=True, help="GitHub organization or user") - parser.add_argument("--repo", required=True, help="Repository name") + parser = argparse.ArgumentParser( + prog="codeql_reset_dismissed_alerts.py", + description=( + "Reopen all dismissed CodeQL code-scanning alerts for a GitHub " + "repository so that the next analysis can re-evaluate them." + ), + epilog=( + "authentication:\n" + " Set GITHUB_TOKEN (or GH_TOKEN) to a PAT with the\n" + " security_events:write scope before running.\n\n" + " GH_TOKEN=$(gh auth token) # reuse the gh CLI token\n\n" + "examples:\n" + " # Live run — reopen everything\n" + " GITHUB_TOKEN=ghp_... %(prog)s --owner Framework-R-D --repo phlex\n\n" + " # Preview without changing anything\n" + " GITHUB_TOKEN=ghp_... %(prog)s --owner Framework-R-D --repo phlex" + " --dry-run" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--owner", + required=True, + metavar="ORG", + help="GitHub organization or user that owns the repository.", + ) + parser.add_argument( + "--repo", + required=True, + metavar="REPO", + help="Repository name (without the owner prefix).", + ) parser.add_argument( "--dry-run", action="store_true", - help="List alerts without modifying them", + help=( + "List dismissed alerts and show what would be reopened, but make " + "no changes to the repository." + ), ) return parser.parse_args(argv) def main(argv: Optional[List[str]] = None) -> int: - """The main entry point of the script. + """Entry point: list dismissed alerts and reopen them. Args: - argv: The command-line arguments. + argv: Argument list; defaults to ``sys.argv[1:]`` when ``None``. Returns: - The exit code. + ``0`` on success, ``1`` on error. """ args = parse_args(argv) try: @@ -187,8 +324,10 @@ def main(argv: Optional[List[str]] = None) -> int: print(f"Found {len(alerts)} dismissed alert(s).") for alert in alerts: reason = f" (reason: {alert.dismissed_reason})" if alert.dismissed_reason else "" - print(f"- #{alert.number} {alert.rule_id}{reason}") + print(f" #{alert.number} {alert.rule_id}{reason}") + print(f" {alert.html_url}") + print() for alert in alerts: try: reopen_alert(args.owner, args.repo, alert, dry_run=args.dry_run) @@ -197,9 +336,9 @@ def main(argv: Optional[List[str]] = None) -> int: return 1 if args.dry_run: - print("Dry run complete; no changes made.") + print("\nDry run complete; no changes made.") else: - print("All dismissed alerts reopened.") + print("\nAll dismissed alerts reopened.") return 0 diff --git a/scripts/create_coverage_symlinks.py b/scripts/create_coverage_symlinks.py index 67b38e228..20ce8e786 100644 --- a/scripts/create_coverage_symlinks.py +++ b/scripts/create_coverage_symlinks.py @@ -44,16 +44,7 @@ def should_link(path: pathlib.Path) -> bool: """ if not path.is_file(): return False - suffix = path.suffix - if suffix in SUPPORTED_SUFFIXES: - return True - if path.name.endswith(".c++") or path.name.endswith(".h++"): - return True - if path.name.endswith(".icc") or path.name.endswith(".tcc"): - return True - if path.suffix in {".i", ".ii"}: - return True - return False + return path.suffix in SUPPORTED_SUFFIXES def iter_source_files(build_root: pathlib.Path) -> Iterable[pathlib.Path]: diff --git a/scripts/fix_header_guards.py b/scripts/fix_header_guards.py index 91aadef02..fa6b324e9 100755 --- a/scripts/fix_header_guards.py +++ b/scripts/fix_header_guards.py @@ -22,7 +22,7 @@ def compute_expected_guard(file_path: Path, root: Path) -> str: def check_header_guard(file_path: Path, root: Path) -> tuple[bool, str | None]: """Check if header guard is correct. Returns (is_valid, expected_guard).""" - content = file_path.read_text() + content = file_path.read_text(encoding="utf-8") lines = content.splitlines(keepends=True) if len(lines) < 3: @@ -58,7 +58,7 @@ def check_header_guard(file_path: Path, root: Path) -> tuple[bool, str | None]: def fix_header_guard(file_path: Path, root: Path) -> bool: """Fix header guard. Returns True if modified.""" - content = file_path.read_text() + content = file_path.read_text(encoding="utf-8") lines = content.splitlines(keepends=True) if len(lines) < 3: @@ -96,7 +96,7 @@ def fix_header_guard(file_path: Path, root: Path) -> bool: modified = True if modified: - file_path.write_text("".join(lines)) + file_path.write_text("".join(lines), encoding="utf-8") return modified diff --git a/scripts/normalize_coverage_lcov.py b/scripts/normalize_coverage_lcov.py index e4a459dd1..a2a0a87b3 100644 --- a/scripts/normalize_coverage_lcov.py +++ b/scripts/normalize_coverage_lcov.py @@ -122,7 +122,7 @@ def normalize( missing: list[str] = [] external: list[str] = [] - lines = report_path.read_text().splitlines() + lines = report_path.read_text(encoding="utf-8").splitlines() rewritten: list[str] = [] record: list[str] = [] @@ -228,7 +228,7 @@ def _add_candidate(path: Path) -> None: # Handle any trailing lines without an end_of_record (unlikely but safe). flush_record(record) - report_path.write_text("\n".join(rewritten) + "\n") + report_path.write_text("\n".join(rewritten) + "\n", encoding="utf-8") return missing, external diff --git a/scripts/sarif-alerts.py b/scripts/sarif-alerts.py index 37173c1e8..f19b48794 100644 --- a/scripts/sarif-alerts.py +++ b/scripts/sarif-alerts.py @@ -1,62 +1,188 @@ -"""A simple tool to print SARIF results from one or more SARIF files.""" +"""A simple tool to print SARIF results from one or more SARIF files or directories.""" import argparse import json +import sys +from collections.abc import Iterator from pathlib import Path -from typing import Iterable - - -def _process_sarif(path: Path) -> Iterable[str]: - text = path.read_text(encoding="utf-8") - sarif = json.loads(text) - for run in sarif.get("runs", []): - for result in run.get("results", []): - rule = result.get("ruleId", "") - level = result.get("level", "info") - baseline = result.get("baselineState", "unchanged") - message = (result.get("message") or {}).get("text") or "" + +# Levels defined by the SARIF 2.1.0 spec (§3.27.10), ordered by severity. +_LEVEL_ORDER = {"none": 0, "note": 1, "warning": 2, "error": 3} + + +def _collect_sarif_paths(inputs: list[Path]) -> list[Path]: + """Expand any directory entries to the .sarif files they contain. + + Files are returned in a stable, sorted order so output is reproducible. + """ + paths: list[Path] = [] + for p in inputs: + if p.is_dir(): + found = sorted(p.rglob("*.sarif"), key=str) + if not found: + print(f"Warning: no .sarif files found under {p}", file=sys.stderr) + paths.extend(found) + else: + paths.append(p) + return paths + + +def _process_sarif( + path: Path, + *, + min_level: str = "none", + baseline_filter: set[str] | None = None, + max_message: int = 200, +) -> Iterator[str]: + """Yield one formatted line per result in the SARIF file at *path*. + + Args: + path: Path to a single .sarif file. + min_level: Skip results whose level is below this threshold. + baseline_filter: If given, only yield results whose baselineState is in + this set (e.g. ``{"new"}``). ``None`` means no filter. + max_message: Truncate the message text to this many characters. + + Yields: + One human-readable string per matching result. + + Raises: + OSError: If the file cannot be read. + ValueError: If the file is not valid JSON or not a SARIF document. + """ + try: + text = path.read_text(encoding="utf-8") + except OSError as exc: + raise OSError(f"Cannot read {path}: {exc}") from exc + + try: + sarif = json.loads(text) + except json.JSONDecodeError as exc: + raise ValueError(f"Invalid JSON in {path}: {exc}") from exc + + if not isinstance(sarif, dict): + raise ValueError(f"Not a SARIF document (expected JSON object): {path}") + + min_order = _LEVEL_ORDER.get(min_level, 0) + + for run in sarif.get("runs") or []: + for result in run.get("results") or []: + level = (result.get("level") or "none").lower() + if _LEVEL_ORDER.get(level, 0) < min_order: + continue + + baseline = (result.get("baselineState") or "unchanged").lower() + if baseline_filter is not None and baseline not in baseline_filter: + continue + + rule = result.get("ruleId") or "" + message = ((result.get("message") or {}).get("text") or "").strip() + # Collapse whitespace and truncate for terminal readability + message = " ".join(message.split()) + if len(message) > max_message: + message = message[: max_message - 1] + "…" + loc = "(unknown location)" for location in result.get("locations") or []: phys = location.get("physicalLocation") or {} - uri = phys.get("artifactLocation", {}).get("uri") + uri = (phys.get("artifactLocation") or {}).get("uri") region = phys.get("region") or {} line = region.get("startLine") if uri: loc = f"{uri}:{line}" if line else uri break + yield f"{rule} [{level}/{baseline}] {loc} — {message}" -def main(argv=None) -> int: - """The main entry point of the script. +def _positive_int(value: str) -> int: + """Argparse type for --max-message: reject values less than 1.""" + n = int(value) + if n < 1: + raise argparse.ArgumentTypeError(f"N must be at least 1, got {n}") + return n + + +def main(argv: list[str] | None = None) -> int: + """Entry point for the sarif-alerts tool. Args: - argv: The command-line arguments. + argv: Command-line arguments (defaults to sys.argv[1:]). Returns: - The exit code. + 0 on success, 1 if any input file could not be processed. """ parser = argparse.ArgumentParser( - description="Print SARIF results from one or more SARIF files" + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "files", + nargs="+", + metavar="FILE_OR_DIR", + help="Path(s) to SARIF file(s) or directories containing .sarif files", + ) + parser.add_argument( + "--level", + default="none", + choices=list(_LEVEL_ORDER), + metavar="LEVEL", + help=( + "Minimum severity level to display " + f"({', '.join(sorted(_LEVEL_ORDER, key=lambda k: _LEVEL_ORDER[k]))}). " + "Default: none (show all)." + ), + ) + parser.add_argument( + "--baseline", + action="append", + dest="baselines", + metavar="STATE", + help=( + "Only show results with this baselineState " + "(new, absent, unchanged, updated). " + "May be repeated. Default: show all states." + ), + ) + parser.add_argument( + "--max-message", + type=_positive_int, + default=200, + metavar="N", + help="Truncate result messages to N characters (default: 200, minimum: 1).", ) - parser.add_argument("files", nargs="+", help="Path(s) to SARIF file(s) to inspect") args = parser.parse_args(argv) + baseline_filter: set[str] | None = ( + {b.lower() for b in args.baselines} if args.baselines else None + ) + + paths = _collect_sarif_paths([Path(f) for f in args.files]) + total = 0 - for file in args.files: - path = Path(file) + errors = 0 + for path in paths: if not path.exists(): - print(f"Skipping missing file: {path}", file=sys.stderr) + print(f"Error: file not found: {path}", file=sys.stderr) + errors += 1 continue print(f"== {path} ==") - for line in _process_sarif(path): - print(line) - total += 1 - print(f"Total alerts across files: {total}") - return 0 + try: + for line in _process_sarif( + path, + min_level=args.level, + baseline_filter=baseline_filter, + max_message=args.max_message, + ): + print(line) + total += 1 + except (OSError, ValueError) as exc: + print(f"Error processing {path}: {exc}", file=sys.stderr) + errors += 1 + print(f"Total alerts: {total}") + return 1 if errors else 0 -if __name__ == "__main__": - import sys +if __name__ == "__main__": sys.exit(main()) diff --git a/scripts/test/conftest.py b/scripts/test/conftest.py new file mode 100644 index 000000000..b810cd7f3 --- /dev/null +++ b/scripts/test/conftest.py @@ -0,0 +1,14 @@ +"""pytest configuration for scripts/test/. + +Adds the scripts/ directory to sys.path so every test module can import +scripts directly without each file duplicating the path mutation. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +_scripts_dir = str(Path(__file__).parent.parent) +if _scripts_dir not in sys.path: + sys.path.insert(0, _scripts_dir) diff --git a/scripts/test/test_check_codeql_alerts.py b/scripts/test/test_check_codeql_alerts.py new file mode 100644 index 000000000..f23086d36 --- /dev/null +++ b/scripts/test/test_check_codeql_alerts.py @@ -0,0 +1,1463 @@ +"""Tests for check_codeql_alerts.py. + +Coverage strategy +----------------- +Unit tests target individual functions in isolation. API-touching functions +are tested via unittest.mock so no real network calls are made. + +Integration tests exercise main() end-to-end using temporary SARIF files and +captured environment / file I/O, matching exactly how the script is invoked +from .github/workflows/codeql-analysis.yaml: + + python3 scripts/check_codeql_alerts.py \ + --sarif "$GITHUB_WORKSPACE/sarif" \ + --min-level warning \ + --log-path "$LOG_PATH" \ + [--ref "refs/pull//merge"] +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest + +# sys.path is set up by scripts/test/conftest.py. +import check_codeql_alerts as M # noqa: E402 + +# --------------------------------------------------------------------------- +# Helpers / fixtures +# --------------------------------------------------------------------------- + +_SARIF_TEMPLATE: dict[str, Any] = { + "version": "2.1.0", + "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json", + "runs": [], +} + + +def _make_run( + results: list[dict[str, Any]], + rules: list[dict[str, Any]] | None = None, +) -> dict[str, Any]: + """Minimal SARIF run object.""" + return { + "tool": { + "driver": { + "name": "CodeQL", + "rules": rules or [], + } + }, + "results": results, + } + + +def _make_result( + rule_id: str = "py/sql-injection", + level: str = "error", + baseline_state: str = "new", + uri: str = "src/app.py", + start_line: int = 10, + message: str = "Possible SQL injection.", +) -> dict[str, Any]: + return { + "ruleId": rule_id, + "level": level, + "baselineState": baseline_state, + "message": {"text": message}, + "locations": [ + { + "physicalLocation": { + "artifactLocation": {"uri": uri}, + "region": {"startLine": start_line}, + } + } + ], + } + + +def _make_sarif(results: list[dict[str, Any]]) -> dict[str, Any]: + sarif = dict(_SARIF_TEMPLATE) + sarif["runs"] = [_make_run(results)] + return sarif + + +def _write_sarif_to(path: Path, results: list[dict[str, Any]]) -> Path: + """Write a minimal SARIF file into *path* (created if absent) and return *path*.""" + path.mkdir(parents=True, exist_ok=True) + (path / "results.sarif").write_text(json.dumps(_make_sarif(results)), encoding="utf-8") + return path + + +def _make_api_alert( + number: int = 1, + rule_id: str = "py/sql-injection", + severity: str = "error", + path: str = "src/app.py", + start_line: int = 10, + message_text: str = "SQL injection risk.", + analysis_key: str | None = None, +) -> dict[str, Any]: + """Minimal GitHub Code Scanning API alert object.""" + return { + "number": number, + "html_url": f"https://github.com/owner/repo/security/code-scanning/{number}", + "rule": { + "id": rule_id, + "name": rule_id.replace("/", "-"), + "helpUri": f"https://codeql.github.com/codeql-query-help/{rule_id}", + }, + "severity": severity, + "most_recent_instance": { + "analysis_key": analysis_key or f"ak:{number}", + "message": {"text": message_text}, + "location": { + "path": path, + "start_line": start_line, + }, + }, + } + + +# --------------------------------------------------------------------------- +# severity / severity_reaches_threshold +# --------------------------------------------------------------------------- + + +class TestSeverity: + """Normalisation of SARIF level strings.""" + + def test_known_levels_pass_through(self) -> None: + """Known levels pass through.""" + for level in ("none", "note", "warning", "error"): + assert M.severity(level) == level + + def test_unknown_level_becomes_warning(self) -> None: + """Unknown level becomes warning.""" + assert M.severity("critical") == "warning" + + def test_none_input_becomes_warning(self) -> None: + """None input becomes warning.""" + assert M.severity(None) == "warning" + + def test_case_insensitive(self) -> None: + """Case insensitive.""" + assert M.severity("ERROR") == "error" + assert M.severity("Warning") == "warning" + + def test_threshold_order(self) -> None: + """Threshold order.""" + assert M.severity_reaches_threshold("error", "warning") + assert M.severity_reaches_threshold("warning", "warning") + assert not M.severity_reaches_threshold("note", "warning") + assert not M.severity_reaches_threshold("none", "warning") + + def test_threshold_none_allows_everything(self) -> None: + """Threshold none allows everything.""" + assert M.severity_reaches_threshold("none", "none") + + def test_threshold_error_excludes_warning(self) -> None: + """Threshold error excludes warning.""" + assert not M.severity_reaches_threshold("warning", "error") + + +# --------------------------------------------------------------------------- +# sanitize_message / extract_message +# --------------------------------------------------------------------------- + + +class TestSanitizeMessage: + """Tests for TestSanitizeMessage.""" + + def test_truncates_long_messages(self) -> None: + """Truncates long messages.""" + long = "x" * 300 + result = M.sanitize_message(long) + assert result.endswith("...") + assert len(result) <= 220 + + def test_collapses_whitespace(self) -> None: + """Collapses whitespace.""" + assert M.sanitize_message("a b\n c") == "a b c" + + def test_none_returns_placeholder(self) -> None: + """None returns placeholder.""" + assert M.sanitize_message(None) == "(no message provided)" + + def test_empty_returns_placeholder(self) -> None: + """Empty returns placeholder.""" + assert M.sanitize_message("") == "(no message provided)" + + +class TestExtractMessage: + """Tests for TestExtractMessage.""" + + def test_prefers_markdown_over_text(self) -> None: + """Prefers markdown over text.""" + result = {"message": {"markdown": "**bold**", "text": "plain"}} + assert M.extract_message(result) == "**bold**" + + def test_falls_back_to_text(self) -> None: + """Falls back to text.""" + result = {"message": {"text": "plain text"}} + assert M.extract_message(result) == "plain text" + + def test_uses_arguments_when_no_text(self) -> None: + """Uses arguments when no text.""" + result = {"message": {"arguments": ["arg1", "arg2"]}} + assert M.extract_message(result) == "arg1 arg2" + + def test_missing_message_returns_placeholder(self) -> None: + """Missing message returns placeholder.""" + assert M.extract_message({}) == "(no message provided)" + + +# --------------------------------------------------------------------------- +# extract_location +# --------------------------------------------------------------------------- + + +class TestExtractLocation: + """Tests for TestExtractLocation.""" + + def _result(self, uri: str, line: int | None = None, col: int | None = None) -> dict: + region: dict[str, Any] = {} + if line is not None: + region["startLine"] = line + if col is not None: + region["startColumn"] = col + return { + "locations": [ + { + "physicalLocation": { + "artifactLocation": {"uri": uri}, + "region": region, + } + } + ] + } + + def test_uri_only(self) -> None: + """Uri only.""" + assert M.extract_location(self._result("src/app.py")) == "src/app.py" + + def test_uri_and_line(self) -> None: + """Uri and line.""" + assert M.extract_location(self._result("src/app.py", line=42)) == "src/app.py:42" + + def test_uri_line_and_column(self) -> None: + """Uri line and column.""" + assert M.extract_location(self._result("src/app.py", 42, 7)) == "src/app.py:42:7" + + def test_no_locations_returns_unavailable(self) -> None: + """No locations returns unavailable.""" + assert M.extract_location({}) == "(location unavailable)" + + def test_related_location_fallback(self) -> None: + """Related location fallback.""" + result = { + "locations": [], + "relatedLocations": [ + { + "physicalLocation": { + "artifactLocation": {"uri": "src/util.py"}, + "region": {"startLine": 5}, + } + } + ], + } + assert M.extract_location(result) == "src/util.py:5" + + def test_logical_location_fallback(self) -> None: + """Logical location fallback.""" + result = { + "locations": [], + "logicalLocations": [{"fullyQualifiedName": "MyModule::MyClass"}], + } + assert M.extract_location(result) == "MyModule::MyClass" + + +# --------------------------------------------------------------------------- +# rule_lookup_map +# --------------------------------------------------------------------------- + + +class TestRuleLookupMap: + """Tests for TestRuleLookupMap.""" + + def test_returns_rules_keyed_by_id(self) -> None: + """Returns rules keyed by id.""" + run = _make_run([], rules=[{"id": "py/sql-injection", "name": "SQL injection"}]) + rules = M.rule_lookup_map(run) + assert "py/sql-injection" in rules + assert rules["py/sql-injection"]["name"] == "SQL injection" + + def test_empty_driver_returns_empty_map(self) -> None: + """Empty driver returns empty map.""" + assert M.rule_lookup_map({"tool": {"driver": {}}}) == {} + + def test_rule_without_id_is_skipped(self) -> None: + """Rule without id is skipped.""" + run = _make_run([], rules=[{"name": "No ID rule"}]) + assert M.rule_lookup_map(run) == {} + + +# --------------------------------------------------------------------------- +# collect_alerts (SARIF baselineState) +# --------------------------------------------------------------------------- + + +class TestCollectAlerts: + """Tests for TestCollectAlerts.""" + + def test_new_alert_collected(self) -> None: + """New alert collected.""" + sarif = _make_sarif([_make_result(baseline_state="new", level="error")]) + buckets = M.collect_alerts(sarif, min_level="warning") + assert len(buckets["new"]) == 1 + assert buckets["new"][0].rule_id == "py/sql-injection" + + def test_absent_alert_collected(self) -> None: + """Absent alert collected.""" + sarif = _make_sarif([_make_result(baseline_state="absent", level="note")]) + buckets = M.collect_alerts(sarif, min_level="warning") + # absent alerts are always collected regardless of level + assert len(buckets["absent"]) == 1 + + def test_unchanged_alert_ignored(self) -> None: + """Unchanged alert ignored.""" + sarif = _make_sarif([_make_result(baseline_state="unchanged", level="error")]) + buckets = M.collect_alerts(sarif, min_level="warning") + assert len(buckets["new"]) == 0 + + def test_new_alert_below_threshold_excluded(self) -> None: + """New alert below threshold excluded.""" + sarif = _make_sarif([_make_result(baseline_state="new", level="note")]) + buckets = M.collect_alerts(sarif, min_level="warning") + assert len(buckets["new"]) == 0 + + def test_new_alert_below_threshold_included_when_threshold_none(self) -> None: + """New alert below threshold included when threshold none.""" + sarif = _make_sarif([_make_result(baseline_state="new", level="note")]) + buckets = M.collect_alerts(sarif, min_level="none") + assert len(buckets["new"]) == 1 + + def test_multiple_runs_merged(self) -> None: + """Multiple runs merged.""" + sarif = dict(_SARIF_TEMPLATE) + sarif["runs"] = [ + _make_run([_make_result(rule_id="py/r1", baseline_state="new")]), + _make_run([_make_result(rule_id="cpp/r2", baseline_state="new")]), + ] + buckets = M.collect_alerts(sarif, min_level="warning") + rule_ids = {a.rule_id for a in buckets["new"]} + assert rule_ids == {"py/r1", "cpp/r2"} + + def test_rule_metadata_attached(self) -> None: + """Rule metadata attached.""" + rule = { + "id": "py/sql-injection", + "name": "SQL Injection", + "helpUri": "https://example.com", + } + sarif = dict(_SARIF_TEMPLATE) + sarif["runs"] = [_make_run([_make_result(baseline_state="new")], rules=[rule])] + buckets = M.collect_alerts(sarif, min_level="warning") + alert = buckets["new"][0] + assert alert.rule_name == "SQL Injection" + assert alert.help_uri == "https://example.com" + + def test_empty_runs_returns_empty_buckets(self) -> None: + """Empty runs returns empty buckets.""" + sarif = dict(_SARIF_TEMPLATE) + sarif["runs"] = [] + buckets = M.collect_alerts(sarif, min_level="warning") + assert buckets["new"] == [] + assert buckets["absent"] == [] + + +# --------------------------------------------------------------------------- +# load_sarif +# --------------------------------------------------------------------------- + + +class TestLoadSarif: + """Tests for TestLoadSarif.""" + + def test_loads_single_file(self, tmp_path: Path) -> None: + """Loads single file.""" + sarif = _make_sarif([_make_result()]) + f = tmp_path / "results.sarif" + f.write_text(json.dumps(sarif), encoding="utf-8") + loaded = M.load_sarif(f) + assert len(loaded["runs"]) == 1 + + def test_loads_directory_merges_runs(self, tmp_path: Path) -> None: + """Loads directory merges runs.""" + for i, lang in enumerate(("cpp", "python")): + sarif = _make_sarif([_make_result(rule_id=f"{lang}/r{i}")]) + (tmp_path / f"{lang}.sarif").write_text(json.dumps(sarif), encoding="utf-8") + loaded = M.load_sarif(tmp_path) + assert len(loaded["runs"]) == 2 + + def test_missing_file_raises(self, tmp_path: Path) -> None: + """Missing file raises.""" + with pytest.raises(FileNotFoundError): + M.load_sarif(tmp_path / "nonexistent.sarif") + + def test_empty_directory_raises(self, tmp_path: Path) -> None: + """Empty directory raises.""" + with pytest.raises(FileNotFoundError): + M.load_sarif(tmp_path) + + def test_invalid_json_raises(self, tmp_path: Path) -> None: + """Invalid json raises.""" + f = tmp_path / "bad.sarif" + f.write_text("{not valid json}", encoding="utf-8") + with pytest.raises(ValueError, match="Failed to parse SARIF JSON"): + M.load_sarif(f) + + +# --------------------------------------------------------------------------- +# Alert helpers +# --------------------------------------------------------------------------- + + +class TestAlertHelpers: + """Tests for TestAlertHelpers.""" + + def _alert(self, **kwargs: Any) -> M.Alert: + defaults: dict[str, Any] = { + "number": 42, + "html_url": "https://github.com/owner/repo/security/code-scanning/42", + "rule_id": "py/sql-injection", + "level": "error", + "message": "SQL injection risk.", + "location": "src/app.py:10", + "rule_name": "SQL Injection", + "help_uri": "https://example.com", + "security_severity": "9.8", + } + defaults.update(kwargs) + return M.Alert(**defaults) + + def test_icon_error(self) -> None: + """Icon error.""" + assert self._alert(level="error").icon() == ":x:" + + def test_icon_warning(self) -> None: + """Icon warning.""" + assert self._alert(level="warning").icon() == ":warning:" + + def test_icon_unknown(self) -> None: + """Icon unknown.""" + assert self._alert(level="banana").icon() == ":grey_question:" + + def test_level_title(self) -> None: + """Level title.""" + assert self._alert(level="error").level_title() == "Error" + + def test_rule_display_with_uri(self) -> None: + """Rule display with uri.""" + display = self._alert().rule_display() + assert display == "[py/sql-injection](https://example.com)" + + def test_rule_display_without_uri(self) -> None: + """Rule display without uri.""" + display = self._alert(help_uri=None).rule_display() + assert display == "`py/sql-injection`" + + def test_severity_suffix_present(self) -> None: + """Severity suffix present.""" + assert self._alert(security_severity="9.8").severity_suffix() == " (9.8)" + + def test_severity_suffix_absent(self) -> None: + """Severity suffix absent.""" + assert self._alert(security_severity=None).severity_suffix() == "" + + +# --------------------------------------------------------------------------- +# _format_section +# --------------------------------------------------------------------------- + + +class TestFormatSection: + """Tests for TestFormatSection.""" + + def _alert(self, n: int, level: str = "error") -> M.Alert: + return M.Alert( + number=n, + html_url=f"https://example.com/{n}", + rule_id="py/r", + level=level, + message="msg", + location=f"src/app.py:{n}", + ) + + def test_all_alerts_shown_when_under_limit(self) -> None: + """All alerts shown when under limit.""" + alerts = [self._alert(i) for i in range(3)] + lines = M._format_section(alerts, max_results=10, bullet_prefix=":x:") + assert len(lines) == 3 + + def test_overflow_produces_trailing_line(self) -> None: + """Overflow produces trailing line.""" + alerts = [self._alert(i) for i in range(5)] + lines = M._format_section(alerts, max_results=3, bullet_prefix=":x:") + assert len(lines) == 4 + assert "2 more" in lines[-1] + + def test_alert_with_number_link(self) -> None: + """Alert with number link.""" + lines = M._format_section([self._alert(7)], max_results=10, bullet_prefix=":x:") + assert "[# 7]" in lines[0] + assert "https://example.com/7" in lines[0] + + def test_dismissed_note_included(self) -> None: + """Dismissed note included.""" + alert = self._alert(1) + alert.dismissed_reason = "false positive" + lines = M._format_section([alert], max_results=10, bullet_prefix=":x:") + assert "dismissed" in lines[0] + assert "false positive" in lines[0] + + def test_empty_list_returns_empty(self) -> None: + """Empty list returns empty.""" + assert M._format_section([], max_results=10, bullet_prefix=":x:") == [] + + +# --------------------------------------------------------------------------- +# build_comment +# --------------------------------------------------------------------------- + + +class TestBuildComment: + """Tests for TestBuildComment.""" + + def _alert(self, level: str = "error") -> M.Alert: + return M.Alert( + number=None, + html_url=None, + rule_id="py/sql-injection", + level=level, + message="SQL injection.", + location="src/app.py:10", + ) + + def test_new_alerts_heading(self) -> None: + """New alerts heading.""" + body = M.build_comment( + new_alerts=[self._alert()], + fixed_alerts=[], + repo="owner/repo", + max_results=20, + threshold="warning", + ) + assert "new CodeQL alert" in body + assert "❌" in body + + def test_fixed_alerts_heading(self) -> None: + """Fixed alerts heading.""" + body = M.build_comment( + new_alerts=[], + fixed_alerts=[self._alert()], + repo="owner/repo", + max_results=20, + threshold="warning", + ) + assert "resolved" in body + assert "✅" in body + + def test_code_scanning_link_included_when_repo_known(self) -> None: + """Code scanning link included when repo known.""" + body = M.build_comment( + new_alerts=[self._alert()], + fixed_alerts=[], + repo="owner/repo", + max_results=20, + threshold="warning", + ) + assert "https://github.com/owner/repo/security/code-scanning" in body + + def test_generic_link_when_repo_unknown(self) -> None: + """Generic link when repo unknown.""" + body = M.build_comment( + new_alerts=[self._alert()], + fixed_alerts=[], + repo=None, + max_results=20, + threshold="warning", + ) + assert "Security tab" in body + + def test_plural_singular_new(self) -> None: + """Plural singular new.""" + one = M.build_comment( + new_alerts=[self._alert()], + fixed_alerts=[], + repo=None, + max_results=20, + threshold="warning", + ) + two = M.build_comment( + new_alerts=[self._alert(), self._alert()], + fixed_alerts=[], + repo=None, + max_results=20, + threshold="warning", + ) + assert "1 new CodeQL alert " in one # singular (no trailing 's') + assert "2 new CodeQL alerts" in two + + def test_threshold_shown_in_heading(self) -> None: + """Threshold shown in heading.""" + body = M.build_comment( + new_alerts=[self._alert()], + fixed_alerts=[], + repo=None, + max_results=20, + threshold="warning", + ) + assert "≥ warning" in body + + def test_body_ends_with_single_newline(self) -> None: + """Body ends with single newline.""" + body = M.build_comment( + new_alerts=[self._alert()], + fixed_alerts=[], + repo=None, + max_results=20, + threshold="warning", + ) + assert body.endswith("\n") + assert not body.endswith("\n\n") + + def test_highest_severity_shown(self) -> None: + """Highest severity shown.""" + alerts = [ + self._alert("warning"), + self._alert("error"), + ] + body = M.build_comment( + new_alerts=alerts, + fixed_alerts=[], + repo=None, + max_results=20, + threshold="warning", + ) + assert "Error" in body # highest severity title + + +# --------------------------------------------------------------------------- +# _to_alert_api +# --------------------------------------------------------------------------- + + +class TestToAlertApi: + """Tests for TestToAlertApi.""" + + def test_basic_fields_extracted(self) -> None: + """Basic fields extracted.""" + raw = _make_api_alert(number=5, rule_id="py/path-traversal", severity="warning") + alert = M._to_alert_api(raw) + assert alert.number == 5 + assert alert.rule_id == "py/path-traversal" + assert alert.level == "warning" + assert alert.html_url == "https://github.com/owner/repo/security/code-scanning/5" + + def test_message_from_most_recent_instance(self) -> None: + """Message from most recent instance.""" + raw = _make_api_alert(message_text="Traversal risk.") + alert = M._to_alert_api(raw) + assert alert.message == "Traversal risk." + + def test_message_falls_back_to_rule_name(self) -> None: + """Message falls back to rule name.""" + raw = _make_api_alert() + raw["most_recent_instance"]["message"] = {} + alert = M._to_alert_api(raw) + # rule_name is "py-sql-injection" (from _make_api_alert) + assert "py-sql-injection" in alert.message or alert.message != "(no message)" + + def test_location_flat_api_format(self) -> None: + """Location flat api format.""" + raw = _make_api_alert(path="src/app.py", start_line=42) + alert = M._to_alert_api(raw) + assert "src/app.py" in alert.location + assert "42" in alert.location + + def test_location_nested_sarif_format(self) -> None: + """Location nested sarif format.""" + raw = _make_api_alert() + raw["most_recent_instance"]["location"] = { + "physicalLocation": { + "artifactLocation": {"uri": "lib/util.py"}, + "region": {"startLine": 7}, + } + } + alert = M._to_alert_api(raw) + assert "lib/util.py" in alert.location + assert "7" in alert.location + + def test_analysis_key_from_instance(self) -> None: + """Analysis key from instance.""" + raw = _make_api_alert(analysis_key="cpp/queries/CodeQL.cpp") + alert = M._to_alert_api(raw) + assert alert.analysis_key == "cpp/queries/CodeQL.cpp" + + def test_dismissed_reason_extracted(self) -> None: + """Dismissed reason extracted.""" + raw = _make_api_alert() + raw["dismissed_reason"] = "false positive" + alert = M._to_alert_api(raw) + assert alert.dismissed_reason == "false positive" + + def test_security_severity_from_rule_properties(self) -> None: + """Security severity from rule properties.""" + raw = _make_api_alert() + raw["rule"]["properties"] = {"security-severity": "8.5"} + alert = M._to_alert_api(raw) + assert alert.security_severity == "8.5" + + def test_missing_number_becomes_none(self) -> None: + """Missing number becomes none.""" + raw = _make_api_alert() + del raw["number"] + alert = M._to_alert_api(raw) + assert alert.number is None + + +# --------------------------------------------------------------------------- +# _paginate_alerts_api +# --------------------------------------------------------------------------- + + +class TestPaginateAlertsApi: + """Tests for TestPaginateAlertsApi.""" + + def _page(self, n: int, count: int) -> list[dict]: + return [_make_api_alert(number=i + (n - 1) * count) for i in range(count)] + + @patch("check_codeql_alerts._api_request") + def test_single_page(self, mock_req: MagicMock) -> None: + """Single page.""" + mock_req.return_value = [_make_api_alert(1), _make_api_alert(2)] + alerts = list(M._paginate_alerts_api("owner", "repo")) + assert len(alerts) == 2 + assert mock_req.call_count == 1 + + @patch("check_codeql_alerts._api_request") + def test_multiple_pages(self, mock_req: MagicMock) -> None: + """Multiple pages.""" + page1 = self._page(1, 100) + page2 = self._page(2, 50) + mock_req.side_effect = [page1, page2] + alerts = list(M._paginate_alerts_api("owner", "repo")) + assert len(alerts) == 150 + assert mock_req.call_count == 2 + + @patch("check_codeql_alerts._api_request") + def test_early_exit_on_partial_page(self, mock_req: MagicMock) -> None: + """A page with fewer than 100 items must stop pagination without an extra call.""" + mock_req.return_value = self._page(1, 50) + list(M._paginate_alerts_api("owner", "repo")) + assert mock_req.call_count == 1 + + @patch("check_codeql_alerts._api_request") + def test_empty_first_page_returns_nothing(self, mock_req: MagicMock) -> None: + """Empty first page returns nothing.""" + mock_req.return_value = [] + alerts = list(M._paginate_alerts_api("owner", "repo")) + assert alerts == [] + + @patch("check_codeql_alerts._api_request") + def test_non_list_response_raises(self, mock_req: MagicMock) -> None: + """Non list response raises.""" + mock_req.return_value = {"message": "not a list"} + with pytest.raises(M.GitHubAPIError): + list(M._paginate_alerts_api("owner", "repo")) + + @patch("check_codeql_alerts._api_request") + def test_ref_forwarded_to_api(self, mock_req: MagicMock) -> None: + """Ref forwarded to api.""" + mock_req.return_value = [] + list(M._paginate_alerts_api("owner", "repo", ref="refs/pull/42/merge")) + _, kwargs = mock_req.call_args + assert kwargs["params"]["ref"] == "refs/pull/42/merge" + + +# --------------------------------------------------------------------------- +# _compare_alerts_via_api +# --------------------------------------------------------------------------- + + +class TestCompareAlertsViaApi: + """Tests for the core API comparison logic.""" + + def _mock_paginate(self, pages: dict[str | None, list[dict]]) -> MagicMock: + """Return a side_effect callable that dispatches on the `ref` kwarg.""" + + def _paginate(owner: str, repo: str, *, state: str = "open", ref: str | None = None): # noqa: ANN202 + yield from pages.get(ref, []) + + return MagicMock(side_effect=_paginate) + + def _pr_info(self, base_ref: str = "main", base_sha: str = "abc1234") -> dict: + return {"base": {"ref": base_ref, "sha": base_sha}} + + @patch("check_codeql_alerts._api_request") + @patch("check_codeql_alerts._paginate_alerts_api") + def test_new_alert_detected(self, mock_pag: MagicMock, mock_req: MagicMock) -> None: + """New alert detected.""" + pr_alert = _make_api_alert(number=1, analysis_key="ak:1") + mock_pag.side_effect = self._mock_paginate({"refs/pull/7/merge": [pr_alert], None: []}) + mock_req.side_effect = [ + self._pr_info(), # pulls/{n} + [{"sha": "prev000"}], # pulls/{n}/commits (only 1 commit → no prev) + ] + result = M._compare_alerts_via_api("owner", "repo", "refs/pull/7/merge") + assert len(result.new_alerts) == 1 + assert result.new_alerts[0].number == 1 + + @patch("check_codeql_alerts._api_request") + @patch("check_codeql_alerts._paginate_alerts_api") + def test_fixed_alert_detected(self, mock_pag: MagicMock, mock_req: MagicMock) -> None: + """Fixed alert detected.""" + main_alert = _make_api_alert(number=2, analysis_key="ak:2") + mock_pag.side_effect = self._mock_paginate({"refs/pull/7/merge": [], None: [main_alert]}) + mock_req.side_effect = [self._pr_info(), [{"sha": "prev000"}]] + result = M._compare_alerts_via_api("owner", "repo", "refs/pull/7/merge") + assert len(result.fixed_alerts) == 1 + assert result.fixed_alerts[0].number == 2 + + @patch("check_codeql_alerts._api_request") + @patch("check_codeql_alerts._paginate_alerts_api") + def test_matched_alert(self, mock_pag: MagicMock, mock_req: MagicMock) -> None: + """Matched alert.""" + alert = _make_api_alert(number=3, analysis_key="ak:3") + mock_pag.side_effect = self._mock_paginate({"refs/pull/7/merge": [alert], None: [alert]}) + mock_req.side_effect = [self._pr_info(), [{"sha": "prev000"}]] + result = M._compare_alerts_via_api("owner", "repo", "refs/pull/7/merge") + assert len(result.new_alerts) == 0 + assert len(result.fixed_alerts) == 0 + assert len(result.matched_alerts) == 1 + + @patch("check_codeql_alerts._api_request") + @patch("check_codeql_alerts._paginate_alerts_api") + def test_min_level_filters_new_alerts(self, mock_pag: MagicMock, mock_req: MagicMock) -> None: + """Min level filters new alerts.""" + note_alert = _make_api_alert(number=5, severity="note", analysis_key="ak:5") + mock_pag.side_effect = self._mock_paginate({"refs/pull/7/merge": [note_alert], None: []}) + mock_req.side_effect = [self._pr_info(), [{"sha": "prev000"}]] + result = M._compare_alerts_via_api( + "owner", "repo", "refs/pull/7/merge", min_level="warning" + ) + assert len(result.new_alerts) == 0 + + @patch("check_codeql_alerts._api_request") + @patch("check_codeql_alerts._paginate_alerts_api") + def test_min_level_none_includes_notes(self, mock_pag: MagicMock, mock_req: MagicMock) -> None: + """Min level none includes notes.""" + note_alert = _make_api_alert(number=5, severity="note", analysis_key="ak:5") + mock_pag.side_effect = self._mock_paginate({"refs/pull/7/merge": [note_alert], None: []}) + mock_req.side_effect = [self._pr_info(), [{"sha": "prev000"}]] + result = M._compare_alerts_via_api("owner", "repo", "refs/pull/7/merge", min_level="none") + assert len(result.new_alerts) == 1 + + @patch("check_codeql_alerts._api_request") + @patch("check_codeql_alerts._paginate_alerts_api") + def test_prev_commit_comparison(self, mock_pag: MagicMock, mock_req: MagicMock) -> None: + """Prev commit comparison.""" + prev_only = _make_api_alert(number=10, analysis_key="ak:10") + pr_only = _make_api_alert(number=11, analysis_key="ak:11") + # The script uses commits[-2]["sha"], so with [older, prev] that is "older_sha". + mock_pag.side_effect = self._mock_paginate( + { + "refs/pull/7/merge": [pr_only], + None: [], + "older_sha": [prev_only], # key must match commits[-2]["sha"] + } + ) + mock_req.side_effect = [ + self._pr_info(), + [{"sha": "older_sha"}, {"sha": "head_sha"}], # 2 commits; [-2] = "older_sha" + ] + result = M._compare_alerts_via_api("owner", "repo", "refs/pull/7/merge") + assert len(result.new_vs_prev) == 1 + assert result.new_vs_prev[0].number == 11 + assert len(result.fixed_vs_prev) == 1 + assert result.fixed_vs_prev[0].number == 10 + + @patch("check_codeql_alerts._api_request") + @patch("check_codeql_alerts._paginate_alerts_api") + def test_base_comparison(self, mock_pag: MagicMock, mock_req: MagicMock) -> None: + """Base comparison.""" + base_only = _make_api_alert(number=20, analysis_key="ak:20") + pr_only = _make_api_alert(number=21, analysis_key="ak:21") + mock_pag.side_effect = self._mock_paginate( + { + "refs/pull/7/merge": [pr_only], + None: [], + "abc1234": [base_only], # base_sha from _pr_info + } + ) + mock_req.side_effect = [ + self._pr_info(base_sha="abc1234"), + [{"sha": "prev000"}], + ] + result = M._compare_alerts_via_api("owner", "repo", "refs/pull/7/merge") + assert len(result.new_vs_base) == 1 + assert result.new_vs_base[0].number == 21 + assert len(result.fixed_vs_base) == 1 + assert result.fixed_vs_base[0].number == 20 + assert result.base_sha == "abc1234" + + @patch("check_codeql_alerts._api_request") + @patch("check_codeql_alerts._paginate_alerts_api") + def test_api_error_on_pr_info_is_handled( + self, mock_pag: MagicMock, mock_req: MagicMock + ) -> None: + """Api error on pr info is handled.""" + mock_pag.side_effect = self._mock_paginate({"refs/pull/7/merge": [], None: []}) + mock_req.side_effect = M.GitHubAPIError("404 not found") + result = M._compare_alerts_via_api("owner", "repo", "refs/pull/7/merge") + # base/prev should be empty; comparison completes without raising + assert result.base_sha is None + assert result.prev_commit_ref is None + assert result.new_vs_base == [] + assert result.new_vs_prev == [] + + @patch("check_codeql_alerts._api_request") + @patch("check_codeql_alerts._paginate_alerts_api") + def test_non_pr_ref_skips_base_fetch(self, mock_pag: MagicMock, mock_req: MagicMock) -> None: + """Non pr ref skips base fetch.""" + mock_pag.side_effect = self._mock_paginate({"some/ref": [], None: []}) + result = M._compare_alerts_via_api("owner", "repo", "some/ref") + # No API calls should have been made for PR info + mock_req.assert_not_called() + assert result.base_sha is None + + @patch("check_codeql_alerts._api_request") + @patch("check_codeql_alerts._paginate_alerts_api") + def test_fixed_alerts_not_filtered_by_min_level( + self, mock_pag: MagicMock, mock_req: MagicMock + ) -> None: + """Resolved alerts must appear in fixed_alerts regardless of severity level. + + Only *new* alerts are gated by min_level; surfacing that even a low-severity + alert was fixed is always useful information for reviewers. + """ + note_alert = _make_api_alert(number=6, severity="note", analysis_key="ak:6") + # note_alert exists on main but not on the PR → it is "fixed" + mock_pag.side_effect = self._mock_paginate({"refs/pull/7/merge": [], None: [note_alert]}) + mock_req.side_effect = [self._pr_info(), [{"sha": "prev000"}]] + result = M._compare_alerts_via_api( + "owner", "repo", "refs/pull/7/merge", min_level="warning" + ) + assert len(result.fixed_alerts) == 1 + assert result.fixed_alerts[0].number == 6 + + +# --------------------------------------------------------------------------- +# _build_multi_section_comment +# --------------------------------------------------------------------------- + + +class TestBuildMultiSectionComment: + """Tests for TestBuildMultiSectionComment.""" + + def _alert(self, level: str = "error", number: int = 1) -> M.Alert: + return M.Alert( + number=number, + html_url=f"https://example.com/{number}", + rule_id="py/r", + level=level, + message="msg", + location="src/app.py:1", + ) + + def _comp(self, **kwargs: Any) -> M.APIAlertComparison: + defaults: dict[str, Any] = { + "new_alerts": [], + "fixed_alerts": [], + "matched_alerts": [], + "new_vs_prev": [], + "fixed_vs_prev": [], + "new_vs_base": [], + "fixed_vs_base": [], + "base_sha": None, + "prev_commit_ref": None, + } + defaults.update(kwargs) + return M.APIAlertComparison(**defaults) + + def test_new_vs_base_rendered(self) -> None: + """New vs base rendered.""" + comp = self._comp( + new_vs_base=[self._alert()], + base_sha="abc1234", + ) + body = M._build_multi_section_comment(comp, max_results=10) + assert "since the branch point" in body + assert "abc1234" in body + + def test_fixed_vs_prev_rendered(self) -> None: + """Fixed vs prev rendered.""" + comp = self._comp( + fixed_vs_prev=[self._alert()], + prev_commit_ref="def5678", + ) + body = M._build_multi_section_comment(comp, max_results=10) + assert "since the previous PR commit" in body + + def test_fallback_to_new_vs_main_when_no_context(self) -> None: + """When no vs_prev/vs_base data exists, new_alerts must appear in the comment.""" + comp = self._comp(new_alerts=[self._alert()]) + body = M._build_multi_section_comment(comp, max_results=10) + assert "compared to main" in body + + def test_fallback_to_fixed_vs_main_when_no_context(self) -> None: + """Fallback to fixed vs main when no context.""" + comp = self._comp(fixed_alerts=[self._alert()]) + body = M._build_multi_section_comment(comp, max_results=10) + assert "resolved compared to main" in body + + def test_no_fallback_when_detail_present(self) -> None: + """new_alerts should not appear a second time when vs_base is rendered.""" + comp = self._comp( + new_alerts=[self._alert(number=1)], + new_vs_base=[self._alert(number=2)], + base_sha="abc1234", + ) + body = M._build_multi_section_comment(comp, max_results=10) + assert "compared to main" not in body + + def test_body_ends_with_single_newline(self) -> None: + """Body ends with single newline.""" + comp = self._comp(new_alerts=[self._alert()]) + body = M._build_multi_section_comment(comp, max_results=10) + assert body.endswith("\n") + assert not body.endswith("\n\n") + + def test_blank_lines_preserved_between_sections(self) -> None: + """Adjacent headings must be separated by a blank line for valid markdown.""" + comp = self._comp( + new_vs_base=[self._alert()], + fixed_vs_base=[self._alert(number=2)], + base_sha="abc", + ) + body = M._build_multi_section_comment(comp, max_results=10) + # Two H2 headings should not be directly adjacent + assert "##" in body + lines = body.splitlines() + h2_indices = [i for i, ln in enumerate(lines) if ln.startswith("##")] + assert len(h2_indices) >= 2 + for a, b in zip(h2_indices, h2_indices[1:], strict=False): + assert b - a > 1, "H2 headings are directly adjacent (no blank line)" + + def test_code_scanning_link_present(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Code scanning link present.""" + monkeypatch.setenv("GITHUB_REPOSITORY", "owner/repo") + comp = self._comp(new_alerts=[self._alert()]) + body = M._build_multi_section_comment(comp, max_results=10) + assert "https://github.com/owner/repo/security/code-scanning" in body + + +# --------------------------------------------------------------------------- +# set_outputs +# --------------------------------------------------------------------------- + + +class TestSetOutputs: + """Tests for TestSetOutputs.""" + + def _alert(self) -> M.Alert: + return M.Alert( + number=1, + html_url=None, + rule_id="py/r", + level="error", + message="m", + location="src/app.py:1", + ) + + def test_outputs_written(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Outputs written.""" + out_file = tmp_path / "output.txt" + monkeypatch.setenv("GITHUB_OUTPUT", str(out_file)) + comment = tmp_path / "comment.md" + M.set_outputs( + new_alerts=[self._alert()], + fixed_alerts=[], + comment_path=comment, + ) + content = out_file.read_text() + assert "new_alerts=true" in content + assert "alert_count=1" in content + assert "fixed_alerts=false" in content + assert "fixed_count=0" in content + assert f"comment_path={comment}" in content + + def test_empty_comment_path(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Empty comment path.""" + out_file = tmp_path / "output.txt" + monkeypatch.setenv("GITHUB_OUTPUT", str(out_file)) + M.set_outputs(new_alerts=[], fixed_alerts=[], comment_path=None) + content = out_file.read_text() + assert "comment_path=\n" in content + + def test_no_github_output_env_is_noop(self, monkeypatch: pytest.MonkeyPatch) -> None: + """No github output env is noop.""" + monkeypatch.delenv("GITHUB_OUTPUT", raising=False) + # Must not raise even without GITHUB_OUTPUT set + M.set_outputs(new_alerts=[], fixed_alerts=[], comment_path=None) + + +# --------------------------------------------------------------------------- +# write_summary +# --------------------------------------------------------------------------- + + +class TestWriteSummary: + """Tests for TestWriteSummary.""" + + def _alert(self, level: str = "warning") -> M.Alert: + return M.Alert( + number=None, + html_url=None, + rule_id="py/r", + level=level, + message="m", + location="src/app.py:1", + ) + + def test_summary_written(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Summary written.""" + summary_file = tmp_path / "summary.md" + monkeypatch.setenv("GITHUB_STEP_SUMMARY", str(summary_file)) + M.write_summary( + new_alerts=[self._alert("error")], + fixed_alerts=[self._alert("warning")], + max_results=10, + threshold="warning", + ) + content = summary_file.read_text() + assert "CodeQL Alerts" in content + assert "new alert" in content + assert "resolved" in content + # threshold must be interpolated correctly (not the literal '{threshold}') + assert "{threshold}" not in content + assert "warning" in content + + def test_no_summary_env_is_noop(self, monkeypatch: pytest.MonkeyPatch) -> None: + """No summary env is noop.""" + monkeypatch.delenv("GITHUB_STEP_SUMMARY", raising=False) + M.write_summary( + new_alerts=[self._alert()], + fixed_alerts=[], + max_results=10, + threshold="warning", + ) + + def test_nothing_written_when_no_alerts( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Nothing written when no alerts.""" + summary_file = tmp_path / "summary.md" + monkeypatch.setenv("GITHUB_STEP_SUMMARY", str(summary_file)) + M.write_summary(new_alerts=[], fixed_alerts=[], max_results=10, threshold="warning") + assert not summary_file.exists() + + +# --------------------------------------------------------------------------- +# main() — end-to-end integration +# --------------------------------------------------------------------------- + + +class TestMainSarifMode: + """End-to-end via SARIF baselineState (no API calls).""" + + def _write_sarif(self, path: Path, results: list[dict]) -> Path: + return _write_sarif_to(path, results) + + def test_new_alert_returns_zero_and_writes_comment( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """New alert returns zero and writes comment.""" + sarif_dir = self._write_sarif(tmp_path / "sarif", [_make_result(baseline_state="new")]) + monkeypatch.setenv("RUNNER_TEMP", str(tmp_path / "runner")) + log = tmp_path / "codeql.log" + rc = M.main(["--sarif", str(sarif_dir), "--min-level", "warning", "--log-path", str(log)]) + assert rc == 0 + comment = tmp_path / "runner" / "codeql-alerts.md" + assert comment.exists() + assert "new CodeQL alert" in comment.read_text() + + def test_absent_alert_returns_zero_and_writes_comment( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Absent alert returns zero and writes comment.""" + sarif_dir = self._write_sarif(tmp_path / "sarif", [_make_result(baseline_state="absent")]) + monkeypatch.setenv("RUNNER_TEMP", str(tmp_path / "runner")) + log = tmp_path / "codeql.log" + rc = M.main(["--sarif", str(sarif_dir), "--min-level", "warning", "--log-path", str(log)]) + assert rc == 0 + comment = tmp_path / "runner" / "codeql-alerts.md" + assert comment.exists() + assert "resolved" in comment.read_text() + + def test_no_alerts_returns_zero_no_comment( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """No alerts returns zero no comment.""" + sarif_dir = self._write_sarif(tmp_path / "sarif", []) + monkeypatch.setenv("RUNNER_TEMP", str(tmp_path / "runner")) + log = tmp_path / "codeql.log" + rc = M.main(["--sarif", str(sarif_dir), "--min-level", "warning", "--log-path", str(log)]) + assert rc == 0 + comment = tmp_path / "runner" / "codeql-alerts.md" + assert not comment.exists() + + def test_below_threshold_alert_not_reported( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Below threshold alert not reported.""" + sarif_dir = self._write_sarif( + tmp_path / "sarif", + [_make_result(baseline_state="new", level="note")], + ) + monkeypatch.setenv("RUNNER_TEMP", str(tmp_path / "runner")) + log = tmp_path / "codeql.log" + rc = M.main(["--sarif", str(sarif_dir), "--min-level", "warning", "--log-path", str(log)]) + assert rc == 0 + assert not (tmp_path / "runner" / "codeql-alerts.md").exists() + + def test_missing_sarif_exits_nonzero(self, tmp_path: Path) -> None: + """Missing sarif exits nonzero.""" + log = tmp_path / "codeql.log" + with pytest.raises(FileNotFoundError): + M.main(["--sarif", str(tmp_path / "no_such.sarif"), "--log-path", str(log)]) + + def test_github_output_written(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Github output written.""" + sarif_dir = self._write_sarif(tmp_path / "sarif", [_make_result(baseline_state="new")]) + out_file = tmp_path / "gh_output.txt" + monkeypatch.setenv("GITHUB_OUTPUT", str(out_file)) + monkeypatch.setenv("RUNNER_TEMP", str(tmp_path / "runner")) + log = tmp_path / "codeql.log" + M.main(["--sarif", str(sarif_dir), "--min-level", "warning", "--log-path", str(log)]) + content = out_file.read_text() + assert "new_alerts=true" in content + assert "comment_path=" in content + + def test_github_output_false_when_no_alerts( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Github output false when no alerts.""" + sarif_dir = self._write_sarif(tmp_path / "sarif", []) + out_file = tmp_path / "gh_output.txt" + monkeypatch.setenv("GITHUB_OUTPUT", str(out_file)) + monkeypatch.setenv("RUNNER_TEMP", str(tmp_path / "runner")) + log = tmp_path / "codeql.log" + M.main(["--sarif", str(sarif_dir), "--min-level", "warning", "--log-path", str(log)]) + content = out_file.read_text() + assert "new_alerts=false" in content + + def test_directory_of_sarif_files( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Directory of sarif files.""" + sarif_dir = tmp_path / "sarif" + sarif_dir.mkdir() + for lang in ("cpp", "python"): + sarif = _make_sarif([_make_result(rule_id=f"{lang}/r1", baseline_state="new")]) + (sarif_dir / f"{lang}.sarif").write_text(json.dumps(sarif), encoding="utf-8") + monkeypatch.setenv("RUNNER_TEMP", str(tmp_path / "runner")) + log = tmp_path / "codeql.log" + rc = M.main(["--sarif", str(sarif_dir), "--min-level", "warning", "--log-path", str(log)]) + assert rc == 0 + comment = (tmp_path / "runner" / "codeql-alerts.md").read_text() + assert "2 new CodeQL alerts" in comment + + +class TestMainApiMode: + """End-to-end tests for API comparison mode (no SARIF baselineState).""" + + def _empty_sarif_dir(self, base: Path) -> Path: + return _write_sarif_to(base / "sarif", []) # no baselineState results + + @patch("check_codeql_alerts._api_request") + @patch("check_codeql_alerts._paginate_alerts_api") + def test_api_mode_new_alert( + self, + mock_pag: MagicMock, + mock_req: MagicMock, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Api mode new alert.""" + sarif_dir = self._empty_sarif_dir(tmp_path) + pr_alert = _make_api_alert(number=1, severity="error", analysis_key="ak:1") + + def _paginate(owner, repo, *, state="open", ref=None): + if ref == "refs/pull/7/merge": + yield pr_alert + # main and base return nothing + + mock_pag.side_effect = _paginate + mock_req.side_effect = [ + {"base": {"ref": "main", "sha": "base_sha"}}, # PR info + [{"sha": "prev000"}], # commits (1 commit, no prev) + ] + monkeypatch.setenv("GITHUB_REPOSITORY", "owner/repo") + monkeypatch.setenv("RUNNER_TEMP", str(tmp_path / "runner")) + log = tmp_path / "codeql.log" + rc = M.main( + [ + "--sarif", + str(sarif_dir), + "--ref", + "refs/pull/7/merge", + "--min-level", + "warning", + "--log-path", + str(log), + ] + ) + assert rc == 0 + comment = (tmp_path / "runner" / "codeql-alerts.md").read_text() + assert "new CodeQL alert" in comment + + @patch("check_codeql_alerts._api_request") + @patch("check_codeql_alerts._paginate_alerts_api") + def test_api_mode_min_level_filtering( + self, + mock_pag: MagicMock, + mock_req: MagicMock, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Api mode min level filtering.""" + sarif_dir = self._empty_sarif_dir(tmp_path) + note_alert = _make_api_alert(number=2, severity="note", analysis_key="ak:2") + + def _paginate(owner, repo, *, state="open", ref=None): + if ref == "refs/pull/7/merge": + yield note_alert + + mock_pag.side_effect = _paginate + mock_req.side_effect = [ + {"base": {"ref": "main", "sha": "base_sha"}}, + [{"sha": "prev000"}], + ] + monkeypatch.setenv("GITHUB_REPOSITORY", "owner/repo") + monkeypatch.setenv("RUNNER_TEMP", str(tmp_path / "runner")) + out_file = tmp_path / "gh_output.txt" + monkeypatch.setenv("GITHUB_OUTPUT", str(out_file)) + log = tmp_path / "codeql.log" + M.main( + [ + "--sarif", + str(sarif_dir), + "--ref", + "refs/pull/7/merge", + "--min-level", + "warning", + "--log-path", + str(log), + ] + ) + content = out_file.read_text() + # Note-level alert must not be reported when threshold is warning + assert "new_alerts=false" in content + + @patch("check_codeql_alerts._api_request") + @patch("check_codeql_alerts._paginate_alerts_api") + def test_api_mode_github_api_error_exits_2( + self, + mock_pag: MagicMock, + mock_req: MagicMock, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Api mode github api error exits 2.""" + sarif_dir = self._empty_sarif_dir(tmp_path) + mock_pag.side_effect = M.GitHubAPIError("403 Forbidden") + monkeypatch.setenv("GITHUB_REPOSITORY", "owner/repo") + log = tmp_path / "codeql.log" + rc = M.main( + [ + "--sarif", + str(sarif_dir), + "--ref", + "refs/pull/7/merge", + "--min-level", + "warning", + "--log-path", + str(log), + ] + ) + assert rc == 2 + + @patch("check_codeql_alerts._api_request") + @patch("check_codeql_alerts._paginate_alerts_api") + def test_api_mode_missing_github_repository_exits_2( + self, + mock_pag: MagicMock, + mock_req: MagicMock, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Api mode missing github repository exits 2.""" + sarif_dir = self._empty_sarif_dir(tmp_path) + monkeypatch.delenv("GITHUB_REPOSITORY", raising=False) + log = tmp_path / "codeql.log" + rc = M.main( + [ + "--sarif", + str(sarif_dir), + "--ref", + "refs/pull/7/merge", + "--min-level", + "warning", + "--log-path", + str(log), + ] + ) + assert rc == 2 + + @patch("check_codeql_alerts._api_request") + @patch("check_codeql_alerts._paginate_alerts_api") + def test_api_mode_skipped_when_sarif_has_baseline( + self, + mock_pag: MagicMock, + mock_req: MagicMock, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """SARIF with baselineState results must suppress API mode entirely.""" + sarif_dir = tmp_path / "sarif" + sarif_dir.mkdir() + sarif = _make_sarif([_make_result(baseline_state="new", level="error")]) + (sarif_dir / "results.sarif").write_text(json.dumps(sarif), encoding="utf-8") + + monkeypatch.setenv("GITHUB_REPOSITORY", "owner/repo") + monkeypatch.setenv("RUNNER_TEMP", str(tmp_path / "runner")) + log = tmp_path / "codeql.log" + M.main( + [ + "--sarif", + str(sarif_dir), + "--ref", + "refs/pull/7/merge", + "--min-level", + "warning", + "--log-path", + str(log), + ] + ) + mock_pag.assert_not_called() + mock_req.assert_not_called() diff --git a/scripts/test/test_clang_tidy_check_summary.py b/scripts/test/test_clang_tidy_check_summary.py new file mode 100644 index 000000000..7f5ed4b47 --- /dev/null +++ b/scripts/test/test_clang_tidy_check_summary.py @@ -0,0 +1,410 @@ +"""Tests for clang_tidy_check_summary.py. + +Coverage strategy +----------------- +Unit tests cover each public and module-private function in isolation. + +Integration tests exercise main() end-to-end matching how the script is used +from the clang-tidy check/fix workflows: + + python3 scripts/clang_tidy_check_summary.py [input.yaml] [-o output.md] [--links] +""" + +from __future__ import annotations + +import sys +from io import StringIO +from pathlib import Path +from unittest.mock import patch +from urllib.parse import urlparse + +import pytest + +# sys.path is set up by scripts/test/conftest.py. +import clang_tidy_check_summary as M # noqa: E402 + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _yaml(diagnostics: list[dict]) -> str: + """Minimal clang-tidy YAML with a Diagnostics list.""" + import yaml + + return yaml.dump({"Diagnostics": diagnostics}) + + +def _diag( + name: str = "readability-identifier-naming", + file: str = "/src/foo.cpp", + offset: int = 42, +) -> dict: + """Return a minimal diagnostic entry dict.""" + return { + "DiagnosticName": name, + "DiagnosticMessage": {"FilePath": file, "FileOffset": offset}, + } + + +# --------------------------------------------------------------------------- +# _load_diagnostics +# --------------------------------------------------------------------------- + + +class TestLoadDiagnostics: + """Tests for M._load_diagnostics.""" + + def test_reads_from_file(self, tmp_path: Path) -> None: + """Valid YAML file returns the Diagnostics list.""" + p = tmp_path / "fixes.yaml" + p.write_text(_yaml([_diag()]), encoding="utf-8") + result = M._load_diagnostics(p) + assert len(result) == 1 + assert result[0]["DiagnosticName"] == "readability-identifier-naming" + + def test_reads_from_stdin(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Passing None reads from stdin.""" + monkeypatch.setattr(sys, "stdin", StringIO(_yaml([_diag("modernize-use-override")]))) + result = M._load_diagnostics(None) + assert result[0]["DiagnosticName"] == "modernize-use-override" + + def test_invalid_yaml_returns_empty( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Malformed YAML prints an error to stderr and returns [].""" + p = tmp_path / "bad.yaml" + p.write_text("{not: valid: yaml:", encoding="utf-8") + result = M._load_diagnostics(p) + assert result == [] + assert "Failed to parse" in capsys.readouterr().err + + def test_non_dict_yaml_returns_empty(self, tmp_path: Path) -> None: + """Top-level YAML that is not a dict returns [].""" + import yaml + + p = tmp_path / "list.yaml" + p.write_text(yaml.dump([1, 2, 3]), encoding="utf-8") + result = M._load_diagnostics(p) + assert result == [] + + def test_missing_diagnostics_key_returns_empty(self, tmp_path: Path) -> None: + """Dict without a Diagnostics key returns [].""" + import yaml + + p = tmp_path / "no_diag.yaml" + p.write_text(yaml.dump({"Other": "value"}), encoding="utf-8") + result = M._load_diagnostics(p) + assert result == [] + + def test_non_list_diagnostics_value_returns_empty(self, tmp_path: Path) -> None: + """Diagnostics key mapped to a non-list returns [].""" + import yaml + + p = tmp_path / "bad_diag.yaml" + p.write_text(yaml.dump({"Diagnostics": "not-a-list"}), encoding="utf-8") + result = M._load_diagnostics(p) + assert result == [] + + def test_empty_diagnostics_returns_empty_list(self, tmp_path: Path) -> None: + """Diagnostics: [] returns an empty list.""" + p = tmp_path / "empty.yaml" + p.write_text(_yaml([]), encoding="utf-8") + result = M._load_diagnostics(p) + assert result == [] + + +# --------------------------------------------------------------------------- +# count_unique_diagnostics +# --------------------------------------------------------------------------- + + +class TestCountUniqueDiagnostics: + """Tests for M.count_unique_diagnostics.""" + + def test_single_entry_counted(self) -> None: + """One diagnostic yields count of 1.""" + diags = [_diag("readability-braces-around-statements")] + result = M.count_unique_diagnostics(diags) + assert result == {"readability-braces-around-statements": 1} + + def test_identical_triplet_counted_once(self) -> None: + """Duplicate (name, file, offset) triplets collapse to count of 1.""" + diags = [_diag("cert-err34-c", "/a.cpp", 10), _diag("cert-err34-c", "/a.cpp", 10)] + result = M.count_unique_diagnostics(diags) + assert result == {"cert-err34-c": 1} + + def test_different_offsets_counted_separately(self) -> None: + """Same name and file, different offsets → count of 2.""" + diags = [_diag("cert-err34-c", "/a.cpp", 10), _diag("cert-err34-c", "/a.cpp", 20)] + result = M.count_unique_diagnostics(diags) + assert result == {"cert-err34-c": 2} + + def test_multiple_checks_counted_independently(self) -> None: + """Two different check names produce two independent entries.""" + diags = [_diag("check-a", "/a.cpp", 1), _diag("check-b", "/b.cpp", 2)] + result = M.count_unique_diagnostics(diags) + assert result == {"check-a": 1, "check-b": 1} + + def test_empty_list_returns_empty_dict(self) -> None: + """No diagnostics yields an empty dict.""" + assert M.count_unique_diagnostics([]) == {} + + def test_missing_diagnostic_name_defaults_to_clang_tidy(self) -> None: + """An entry without DiagnosticName defaults to 'clang-tidy'.""" + diags = [{"DiagnosticMessage": {"FilePath": "/x.cpp", "FileOffset": 0}}] + result = M.count_unique_diagnostics(diags) + assert "clang-tidy" in result + + def test_invalid_offset_treated_as_none(self) -> None: + """Non-numeric FileOffset is stored as None (not an error).""" + diag_bad = { + "DiagnosticName": "some-check", + "DiagnosticMessage": {"FilePath": "/f.cpp", "FileOffset": "not-an-int"}, + } + result = M.count_unique_diagnostics([diag_bad]) + assert result == {"some-check": 1} + + def test_none_offset_counted_distinctly(self) -> None: + """Two entries with the same name+file but offset=None are one unique occurrence.""" + msg = {"FilePath": "/f.cpp", "FileOffset": None} + diag_a = {"DiagnosticName": "x", "DiagnosticMessage": msg} + diag_b = {"DiagnosticName": "x", "DiagnosticMessage": msg} + result = M.count_unique_diagnostics([diag_a, diag_b]) + assert result == {"x": 1} + + def test_missing_diagnostic_message_does_not_raise(self) -> None: + """Entry with no DiagnosticMessage key is handled gracefully.""" + diags = [{"DiagnosticName": "my-check"}] + result = M.count_unique_diagnostics(diags) + assert result == {"my-check": 1} + + +# --------------------------------------------------------------------------- +# _check_url +# --------------------------------------------------------------------------- + + +class TestCheckUrl: + """Tests for M._check_url.""" + + def test_clang_analyzer_prefix(self) -> None: + """clang-analyzer-* checks map to the clang-analyzer category.""" + url = M._check_url("clang-analyzer-core.NullDereference") + assert "clang-analyzer/core.NullDereference" in url + + def test_category_check_split_on_first_dash(self) -> None: + """readability-identifier-naming maps to readability/identifier-naming.""" + url = M._check_url("readability-identifier-naming") + assert "readability/identifier-naming" in url + + def test_no_dash_falls_back_to_root(self) -> None: + """A check with no dash still produces a valid URL.""" + url = M._check_url("clang-tidy") + parsed = urlparse(url) + assert parsed.hostname == "clang.llvm.org" + + def test_url_starts_with_https(self) -> None: + """All generated URLs start with https://.""" + assert M._check_url("modernize-use-override").startswith("https://") + + def test_url_ends_with_html(self) -> None: + """Generated URLs end with .html (links to the check documentation page).""" + assert M._check_url("cert-err34-c").endswith(".html") + + def test_check_url_in_format_checklist_link(self) -> None: + """The URL embedded in --links output resolves correctly for a known check.""" + result = M.format_checklist({"modernize-use-nullptr": 1}, links=True) + assert "modernize/use-nullptr.html" in result + + +# --------------------------------------------------------------------------- +# format_checklist +# --------------------------------------------------------------------------- + + +class TestFormatChecklist: + """Tests for M.format_checklist.""" + + def test_basic_entry(self) -> None: + """Single count entry produces the expected markdown line.""" + result = M.format_checklist({"check-a": 3}) + assert result == "- [ ] check-a (3)" + + def test_entries_sorted_alphabetically(self) -> None: + """Multiple entries appear in alphabetical order.""" + counts = {"z-check": 1, "a-check": 2} + lines = M.format_checklist(counts).splitlines() + assert lines[0].startswith("- [ ] a-check") + assert lines[1].startswith("- [ ] z-check") + + def test_links_mode_wraps_name_in_markdown_link(self) -> None: + """--links mode produces a Markdown hyperlink for the check name.""" + result = M.format_checklist({"readability-magic-numbers": 1}, links=True) + assert "[readability-magic-numbers]" in result + assert "http" in result + + def test_no_links_mode_plain_name(self) -> None: + """Without links, the check name appears without markup.""" + result = M.format_checklist({"cert-err34-c": 2}, links=False) + assert "[cert-err34-c](" not in result + assert "cert-err34-c (2)" in result + + def test_empty_dict_returns_empty_string(self) -> None: + """Empty counts dict produces an empty string (no newlines).""" + assert M.format_checklist({}) == "" + + +# --------------------------------------------------------------------------- +# build_arg_parser +# --------------------------------------------------------------------------- + + +class TestBuildArgParser: + """Tests for M.build_arg_parser.""" + + def test_returns_parser(self) -> None: + """build_arg_parser() returns a working ArgumentParser.""" + import argparse + + parser = M.build_arg_parser() + assert isinstance(parser, argparse.ArgumentParser) + + def test_prog_name(self) -> None: + """Parser prog name identifies the script by filename.""" + parser = M.build_arg_parser() + assert "clang_tidy_check_summary" in parser.prog + + def test_defaults_without_args(self) -> None: + """Parsing [] gives None for input, None for output, and False for links.""" + args = M.build_arg_parser().parse_args([]) + assert args.input is None + assert args.output is None + assert args.links is False + + def test_input_path_parsed(self, tmp_path: Path) -> None: + """Positional input argument is parsed as a Path.""" + p = tmp_path / "f.yaml" + args = M.build_arg_parser().parse_args([str(p)]) + assert args.input == p + + def test_output_flag_parsed(self, tmp_path: Path) -> None: + """-o / --output flag is parsed as a Path.""" + p = tmp_path / "out.md" + args = M.build_arg_parser().parse_args(["-o", str(p)]) + assert args.output == p + + def test_links_flag_sets_true(self) -> None: + """--links sets links=True.""" + args = M.build_arg_parser().parse_args(["--links"]) + assert args.links is True + + def test_help_mentions_fixes_yaml(self, capsys: pytest.CaptureFixture[str]) -> None: + """--help output mentions the YAML file to help users find the right file.""" + with pytest.raises(SystemExit): + M.build_arg_parser().parse_args(["--help"]) + out = capsys.readouterr().out + assert "yaml" in out.lower() or "YAML" in out + + +# --------------------------------------------------------------------------- +# main() +# --------------------------------------------------------------------------- + + +class TestMain: + """Integration tests for M.main().""" + + def _write_yaml(self, path: Path, diagnostics: list[dict]) -> Path: + """Write a minimal clang-tidy YAML to path and return it.""" + path.write_text(_yaml(diagnostics), encoding="utf-8") + return path + + def test_no_diagnostics_prints_message_and_returns_zero( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """No diagnostics → informational message and exit code 0.""" + p = self._write_yaml(tmp_path / "empty.yaml", []) + with patch("sys.argv", ["prog", str(p)]): + rc = M.main() + assert rc == 0 + assert "No diagnostics" in capsys.readouterr().err + + def test_single_diagnostic_written_to_stdout( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """One diagnostic prints the checklist to stdout.""" + p = self._write_yaml(tmp_path / "diag.yaml", [_diag("modernize-use-nullptr")]) + with patch("sys.argv", ["prog", str(p)]): + rc = M.main() + assert rc == 0 + out = capsys.readouterr().out + assert "modernize-use-nullptr" in out + assert "- [ ]" in out + + def test_output_written_to_file(self, tmp_path: Path) -> None: + """With -o, the checklist is written to a file, not stdout.""" + p = self._write_yaml(tmp_path / "diag.yaml", [_diag("cert-err34-c")]) + out_file = tmp_path / "summary.md" + with patch("sys.argv", ["prog", str(p), "-o", str(out_file)]): + rc = M.main() + assert rc == 0 + content = out_file.read_text(encoding="utf-8") + assert "cert-err34-c" in content + assert content.endswith("\n") + + def test_links_mode_produces_hyperlinks( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """--links flag produces Markdown hyperlinks in output.""" + p = self._write_yaml(tmp_path / "diag.yaml", [_diag("readability-magic-numbers")]) + with patch("sys.argv", ["prog", str(p), "--links"]): + M.main() + out = capsys.readouterr().out + assert "http" in out + + def test_duplicate_diagnostics_collapsed( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Identical (name, file, offset) triplets appear as count=1.""" + diag = _diag("check-x", "/f.cpp", 10) + p = self._write_yaml(tmp_path / "dup.yaml", [diag, diag]) + with patch("sys.argv", ["prog", str(p)]): + M.main() + out = capsys.readouterr().out + assert "check-x (1)" in out + + def test_checklist_alphabetically_ordered( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Checklist entries appear in alphabetical order.""" + diags = [_diag("z-check", "/a.cpp", 1), _diag("a-check", "/b.cpp", 2)] + p = self._write_yaml(tmp_path / "multi.yaml", diags) + with patch("sys.argv", ["prog", str(p)]): + M.main() + out = capsys.readouterr().out + lines = [ln for ln in out.splitlines() if ln.startswith("- [ ]")] + assert lines[0].startswith("- [ ] a-check") + assert lines[1].startswith("- [ ] z-check") + + def test_stdin_input( + self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + ) -> None: + """With no positional argument, input is read from stdin.""" + yaml_text = _yaml([_diag("cppcoreguidelines-avoid-goto")]) + monkeypatch.setattr(sys, "stdin", StringIO(yaml_text)) + with patch("sys.argv", ["prog"]): + rc = M.main() + assert rc == 0 + out = capsys.readouterr().out + assert "cppcoreguidelines-avoid-goto" in out + + def test_output_file_has_trailing_newline(self, tmp_path: Path) -> None: + """Output file ends with exactly one newline.""" + p = self._write_yaml(tmp_path / "d.yaml", [_diag("cert-err34-c")]) + out_file = tmp_path / "out.md" + with patch("sys.argv", ["prog", str(p), "-o", str(out_file)]): + M.main() + raw = out_file.read_bytes() + assert raw.endswith(b"\n") + assert not raw.endswith(b"\n\n") diff --git a/scripts/test/test_clang_tidy_diff_issues.py b/scripts/test/test_clang_tidy_diff_issues.py index 6047a40f3..4061770ac 100644 --- a/scripts/test/test_clang_tidy_diff_issues.py +++ b/scripts/test/test_clang_tidy_diff_issues.py @@ -2,12 +2,7 @@ from __future__ import annotations -import sys -from pathlib import Path - -# Make the scripts directory importable. -sys.path.insert(0, str(Path(__file__).parent.parent)) - +# sys.path is set up by scripts/test/conftest.py. from clang_tidy_diff_issues import ( # noqa: E402 filter_new_issues, parse_diff, diff --git a/scripts/test/test_clang_tidy_fixes_to_problems.py b/scripts/test/test_clang_tidy_fixes_to_problems.py new file mode 100644 index 000000000..7c60eba60 --- /dev/null +++ b/scripts/test/test_clang_tidy_fixes_to_problems.py @@ -0,0 +1,502 @@ +"""Tests for clang_tidy_fixes_to_problems.py. + +Coverage strategy +----------------- +Unit tests cover every public and module-private function. + +Integration tests exercise main() end-to-end using temporary YAML files and +real (temporary) source files so that offset_to_line_col can read them. The +output format must match the gcc-compatible problem matcher used by VS Code: + + /abs/path/file.cpp:line:col: severity: message [check-name] +""" + +from __future__ import annotations + +import sys +from io import StringIO +from pathlib import Path +from unittest.mock import patch + +import pytest +import yaml + +# sys.path is set up by scripts/test/conftest.py. +import clang_tidy_fixes_to_problems as M # noqa: E402 + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _fixes_yaml( + diagnostics: list[dict], + main_source: str = "/src/main.cpp", +) -> str: + """Minimal clang-tidy-fixes YAML string.""" + return yaml.dump( + { + "MainSourceFile": main_source, + "Diagnostics": diagnostics, + } + ) + + +def _diag( + name: str = "readability-identifier-naming", + message: str = "variable is not in camelCase", + level: str = "warning", + file_path: str = "/src/main.cpp", + file_offset: int = 42, + notes: list[dict] | None = None, +) -> dict: + """Return a minimal diagnostic entry dict.""" + entry: dict = { + "DiagnosticName": name, + "Level": level, + "DiagnosticMessage": { + "FilePath": file_path, + "FileOffset": file_offset, + "Message": message, + }, + } + if notes is not None: + entry["Notes"] = notes + return entry + + +# --------------------------------------------------------------------------- +# parse_clang_tidy_fixes +# --------------------------------------------------------------------------- + + +class TestParseClangTidyFixes: + """Tests for M.parse_clang_tidy_fixes.""" + + def test_basic_parse(self) -> None: + """A single diagnostic is parsed into one Diagnostic dataclass.""" + text = _fixes_yaml([_diag()]) + main_src, diags = M.parse_clang_tidy_fixes(text) + assert main_src == "/src/main.cpp" + assert len(diags) == 1 + assert diags[0].check == "readability-identifier-naming" + assert diags[0].message == "variable is not in camelCase" + assert diags[0].level == "warning" + assert diags[0].file_path == "/src/main.cpp" + assert diags[0].file_offset == 42 + + def test_empty_diagnostics(self) -> None: + """Empty Diagnostics list yields empty result.""" + text = _fixes_yaml([]) + main_src, diags = M.parse_clang_tidy_fixes(text) + assert diags == [] + + def test_invalid_yaml_returns_empty(self, capsys: pytest.CaptureFixture[str]) -> None: + """Malformed YAML prints an error and returns (None, []).""" + main_src, diags = M.parse_clang_tidy_fixes("{not: valid: yaml:") + assert main_src is None + assert diags == [] + assert "Failed to parse" in capsys.readouterr().err + + def test_non_dict_yaml_returns_empty(self) -> None: + """Non-dict top-level YAML returns (None, []).""" + main_src, diags = M.parse_clang_tidy_fixes(yaml.dump([1, 2, 3])) + assert main_src is None + assert diags == [] + + def test_notes_parsed(self) -> None: + """Notes list is parsed into DiagnosticNote objects.""" + note = {"FilePath": "/inc/foo.h", "FileOffset": 10, "Message": "Calling 'bar'"} + text = _fixes_yaml([_diag(notes=[note])]) + _, diags = M.parse_clang_tidy_fixes(text) + assert diags[0].notes is not None + assert len(diags[0].notes) == 1 + assert diags[0].notes[0].message == "Calling 'bar'" + + def test_invalid_offset_treated_as_none(self) -> None: + """Non-numeric FileOffset in a diagnostic is stored as None.""" + entry = { + "DiagnosticName": "my-check", + "Level": "warning", + "DiagnosticMessage": {"FilePath": "/f.cpp", "FileOffset": "bad", "Message": "oops"}, + } + raw = yaml.dump({"MainSourceFile": "/f.cpp", "Diagnostics": [entry]}) + _, diags = M.parse_clang_tidy_fixes(raw) + assert diags[0].file_offset is None + + def test_level_lowercased(self) -> None: + """Level field is stored in lowercase.""" + text = _fixes_yaml([_diag(level="Warning")]) + _, diags = M.parse_clang_tidy_fixes(text) + assert diags[0].level == "warning" + + def test_missing_level_defaults_to_warning(self) -> None: + """A diagnostic without Level defaults to 'warning'.""" + entry = { + "DiagnosticName": "my-check", + "DiagnosticMessage": {"FilePath": "/f.cpp", "FileOffset": 0, "Message": "msg"}, + } + raw = yaml.dump({"MainSourceFile": "/f.cpp", "Diagnostics": [entry]}) + _, diags = M.parse_clang_tidy_fixes(raw) + assert diags[0].level == "warning" + + def test_non_dict_diagnostic_entry_skipped(self) -> None: + """A non-dict entry in Diagnostics is skipped gracefully.""" + raw = yaml.dump({"MainSourceFile": "/f.cpp", "Diagnostics": ["not-a-dict"]}) + _, diags = M.parse_clang_tidy_fixes(raw) + assert diags == [] + + +# --------------------------------------------------------------------------- +# apply_path_map +# --------------------------------------------------------------------------- + + +class TestApplyPathMap: + """Tests for M.apply_path_map.""" + + def test_matching_prefix_replaced(self) -> None: + """A matching OLD prefix is replaced by NEW.""" + result = M.apply_path_map("/old/path/file.cpp", [("/old/path", "/new/path")]) + assert result == "/new/path/file.cpp" + + def test_first_matching_prefix_wins(self) -> None: + """Only the first matching mapping is applied.""" + result = M.apply_path_map("/a/b.cpp", [("/a", "/x"), ("/a", "/y")]) + assert result == "/x/b.cpp" + + def test_no_match_returns_original(self) -> None: + """A path that matches no mapping is returned unchanged.""" + result = M.apply_path_map("/unchanged/file.cpp", [("/other", "/z")]) + assert result == "/unchanged/file.cpp" + + def test_empty_mappings_returns_original(self) -> None: + """Empty mappings list returns the original path.""" + assert M.apply_path_map("/foo/bar.cpp", []) == "/foo/bar.cpp" + + +# --------------------------------------------------------------------------- +# offset_to_line_col +# --------------------------------------------------------------------------- + + +class TestOffsetToLineCol: + """Tests for M.offset_to_line_col.""" + + def test_start_of_file(self, tmp_path: Path) -> None: + """Offset 0 → line 1, col 1.""" + f = tmp_path / "f.cpp" + f.write_bytes(b"int main() {}\n") + assert M.offset_to_line_col(f, 0) == (1, 1) + + def test_middle_of_first_line(self, tmp_path: Path) -> None: + """Offset 4 (after 'int ') → line 1, col 5.""" + f = tmp_path / "f.cpp" + f.write_bytes(b"int main() {}\n") + assert M.offset_to_line_col(f, 4) == (1, 5) + + def test_second_line(self, tmp_path: Path) -> None: + """Offset immediately after the first newline → line 2, col 1.""" + f = tmp_path / "f.cpp" + f.write_bytes(b"line1\nline2\n") + assert M.offset_to_line_col(f, 6) == (2, 1) + + def test_beyond_eof_clamped(self, tmp_path: Path) -> None: + """Offset beyond EOF is clamped and does not raise.""" + f = tmp_path / "f.cpp" + f.write_bytes(b"ab\n") + line, col = M.offset_to_line_col(f, 9999) + assert line >= 1 + assert col >= 1 + + def test_nonexistent_file_returns_1_1(self, tmp_path: Path) -> None: + """A missing file returns (1, 1) without raising.""" + assert M.offset_to_line_col(tmp_path / "ghost.cpp", 10) == (1, 1) + + def test_empty_file_returns_1_1(self, tmp_path: Path) -> None: + """An empty file always returns (1, 1).""" + f = tmp_path / "empty.cpp" + f.write_bytes(b"") + assert M.offset_to_line_col(f, 0) == (1, 1) + + +# --------------------------------------------------------------------------- +# parse_path_map +# --------------------------------------------------------------------------- + + +class TestParsePathMap: + """Tests for M.parse_path_map.""" + + def test_single_mapping(self) -> None: + """A single OLD=NEW string is parsed into one tuple.""" + result = M.parse_path_map(["/__w/phlex=/workspace"]) + assert result == [("/__w/phlex", "/workspace")] + + def test_multiple_mappings(self) -> None: + """Multiple items produce multiple tuples, in order.""" + result = M.parse_path_map(["/a=/x", "/b=/y"]) + assert result == [("/a", "/x"), ("/b", "/y")] + + def test_empty_list_returns_empty(self) -> None: + """An empty list returns an empty list.""" + assert M.parse_path_map([]) == [] + + def test_missing_equals_raises_value_error(self) -> None: + """A mapping without '=' raises ValueError.""" + with pytest.raises(ValueError, match="Invalid"): + M.parse_path_map(["noequalssign"]) + + def test_value_may_contain_equals(self) -> None: + """Only the first '=' is used as the split point.""" + result = M.parse_path_map(["/old=/new=extra"]) + assert result == [("/old", "/new=extra")] + + +# --------------------------------------------------------------------------- +# is_within_workspace +# --------------------------------------------------------------------------- + + +class TestIsWithinWorkspace: + """Tests for M.is_within_workspace.""" + + def test_file_inside_workspace(self, tmp_path: Path) -> None: + """A path inside workspace_root returns True.""" + child = tmp_path / "src" / "file.cpp" + assert M.is_within_workspace(str(child), tmp_path) + + def test_file_outside_workspace(self, tmp_path: Path) -> None: + """A path outside workspace_root returns False.""" + other = tmp_path.parent / "other" / "file.cpp" + assert not M.is_within_workspace(str(other), tmp_path) + + def test_os_error_returns_false(self, tmp_path: Path) -> None: + """An OSError (e.g. bad path chars) returns False without raising.""" + assert not M.is_within_workspace("\x00bad", tmp_path) + + +# --------------------------------------------------------------------------- +# choose_workspace_note +# --------------------------------------------------------------------------- + + +class TestChooseWorkspaceNote: + """Tests for M.choose_workspace_note.""" + + def test_returns_none_when_no_notes(self, tmp_path: Path) -> None: + """Empty notes list returns None.""" + assert M.choose_workspace_note([], tmp_path) is None + + def test_returns_none_when_no_workspace_notes(self, tmp_path: Path) -> None: + """No notes inside the workspace returns None.""" + note = M.DiagnosticNote(file_path="/external/foo.h", message="msg") + assert M.choose_workspace_note([note], tmp_path) is None + + def test_prefers_calling_note(self, tmp_path: Path) -> None: + """A note starting with 'Calling '' is preferred over other workspace notes.""" + calling = M.DiagnosticNote(file_path=str(tmp_path / "a.cpp"), message="Calling 'foo'") + other = M.DiagnosticNote(file_path=str(tmp_path / "b.cpp"), message="Another note") + result = M.choose_workspace_note([other, calling], tmp_path) + assert result is calling + + def test_falls_back_to_first_workspace_note(self, tmp_path: Path) -> None: + """Without a 'Calling' note, the first workspace note is returned.""" + n1 = M.DiagnosticNote(file_path=str(tmp_path / "a.cpp"), message="Note A") + n2 = M.DiagnosticNote(file_path=str(tmp_path / "b.cpp"), message="Note B") + result = M.choose_workspace_note([n1, n2], tmp_path) + assert result is n1 + + def test_note_with_none_file_path_excluded(self, tmp_path: Path) -> None: + """A note with no file_path is excluded from workspace notes.""" + note = M.DiagnosticNote(file_path=None, message="orphan note") + assert M.choose_workspace_note([note], tmp_path) is None + + +# --------------------------------------------------------------------------- +# build_arg_parser +# --------------------------------------------------------------------------- + + +class TestBuildArgParser: + """Tests for M.build_arg_parser.""" + + def test_defaults(self) -> None: + """Parsing [] produces sensible defaults.""" + parser = M.build_arg_parser() + args = parser.parse_args([]) + assert args.input is None + assert args.output is None + assert args.path_map == [] + assert isinstance(args.workspace_root, Path) + + def test_prog_name(self) -> None: + """Parser prog name identifies the script by filename.""" + parser = M.build_arg_parser() + assert "clang_tidy_fixes_to_problems" in parser.prog + + def test_input_parsed(self, tmp_path: Path) -> None: + """Positional input is parsed as a Path.""" + p = tmp_path / "f.yaml" + args = M.build_arg_parser().parse_args([str(p)]) + assert args.input == p + + def test_path_map_flag(self) -> None: + """--path-map can be specified multiple times.""" + args = M.build_arg_parser().parse_args(["--path-map", "/a=/b", "--path-map", "/c=/d"]) + assert args.path_map == ["/a=/b", "/c=/d"] + + def test_help_mentions_path_map(self, capsys: pytest.CaptureFixture[str]) -> None: + """--help output explains --path-map for users navigating CI artifacts.""" + with pytest.raises(SystemExit): + M.build_arg_parser().parse_args(["--help"]) + out = capsys.readouterr().out + assert "path-map" in out or "path_map" in out + + +# --------------------------------------------------------------------------- +# main() — integration tests +# --------------------------------------------------------------------------- + + +class TestMain: + """Integration tests for M.main().""" + + def _write_fixes(self, path: Path, diagnostics: list[dict], main_source: str = "") -> Path: + """Write a fixes YAML and return its path.""" + path.write_text(_fixes_yaml(diagnostics, main_source), encoding="utf-8") + return path + + def test_no_diagnostics_produces_empty_output( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """An empty fixes file produces no output and returns 0.""" + p = self._write_fixes(tmp_path / "empty.yaml", []) + with patch("sys.argv", ["prog", str(p)]): + rc = M.main() + assert rc == 0 + assert capsys.readouterr().out == "" + + def test_single_diagnostic_written_to_stdout( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """One diagnostic with a real source file prints a gcc-style line.""" + src = tmp_path / "main.cpp" + src.write_bytes(b"int x;\n") + diag = _diag(file_path=str(src), file_offset=0) + p = self._write_fixes(tmp_path / "fixes.yaml", [diag], str(src)) + with patch("sys.argv", ["prog", str(p), "--workspace-root", str(tmp_path)]): + rc = M.main() + assert rc == 0 + out = capsys.readouterr().out + assert "readability-identifier-naming" in out + # gcc format: path:line:col: severity: message [check] + assert ": warning:" in out + + def test_output_written_to_file( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """With -o, output is written to a file and a summary is printed to stdout.""" + src = tmp_path / "main.cpp" + src.write_bytes(b"int x;\n") + diag = _diag(file_path=str(src), file_offset=0) + p = self._write_fixes(tmp_path / "fixes.yaml", [diag], str(src)) + out_file = tmp_path / "problems.txt" + with patch( + "sys.argv", + ["prog", str(p), "-o", str(out_file), "--workspace-root", str(tmp_path)], + ): + rc = M.main() + assert rc == 0 + assert out_file.exists() + content = out_file.read_text(encoding="utf-8") + assert "readability-identifier-naming" in content + # The summary line uses "diagnostic(s)" (with the parenthetical s) + stdout = capsys.readouterr().out + assert "diagnostic" in stdout + + def test_stdout_output_ends_with_newline( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """When writing to stdout with content, output ends with newline.""" + src = tmp_path / "main.cpp" + src.write_bytes(b"int x;\n") + diag = _diag(file_path=str(src), file_offset=0) + p = self._write_fixes(tmp_path / "fixes.yaml", [diag], str(src)) + with patch("sys.argv", ["prog", str(p), "--workspace-root", str(tmp_path)]): + M.main() + out = capsys.readouterr().out + assert out.endswith("\n") + + def test_path_map_applied(self, tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None: + """--path-map translates paths in the output.""" + src = tmp_path / "main.cpp" + src.write_bytes(b"int x;\n") + fake_path = "/__w/phlex/phlex/phlex-src/main.cpp" + p = self._write_fixes( + tmp_path / "fixes.yaml", + [_diag(file_path=fake_path, file_offset=0)], + fake_path, + ) + with patch( + "sys.argv", + [ + "prog", + str(p), + "--path-map", + f"/__w/phlex/phlex/phlex-src={tmp_path}", + "--workspace-root", + str(tmp_path), + ], + ): + M.main() + out = capsys.readouterr().out + assert "/__w/phlex" not in out + + def test_stdin_input( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + ) -> None: + """Without a positional arg, the fixes YAML is read from stdin.""" + src = tmp_path / "main.cpp" + src.write_bytes(b"void f();\n") + yaml_text = _fixes_yaml([_diag(file_path=str(src), file_offset=0)], str(src)) + monkeypatch.setattr(sys, "stdin", StringIO(yaml_text)) + with patch("sys.argv", ["prog", "--workspace-root", str(tmp_path)]): + rc = M.main() + assert rc == 0 + + def test_diagnostic_without_file_path_skipped( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """A diagnostic with no FilePath and no MainSourceFile is skipped.""" + raw = yaml.dump( + { + "MainSourceFile": "", + "Diagnostics": [ + { + "DiagnosticName": "some-check", + "Level": "warning", + "DiagnosticMessage": {"FilePath": "", "FileOffset": 0, "Message": "oops"}, + } + ], + } + ) + yaml_path = tmp_path / "fixes.yaml" + yaml_path.write_text(raw, encoding="utf-8") + with patch("sys.argv", ["prog", str(yaml_path), "--workspace-root", str(tmp_path)]): + rc = M.main() + assert rc == 0 + assert capsys.readouterr().out == "" + + def test_invalid_level_defaults_to_warning( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """An unrecognised severity level falls back to 'warning'.""" + src = tmp_path / "main.cpp" + src.write_bytes(b"int x;\n") + diag = _diag(level="fatal", file_path=str(src), file_offset=0) + p = self._write_fixes(tmp_path / "fixes.yaml", [diag], str(src)) + with patch("sys.argv", ["prog", str(p), "--workspace-root", str(tmp_path)]): + M.main() + out = capsys.readouterr().out + assert ": warning:" in out diff --git a/scripts/test/test_codeql_reset_dismissed_alerts.py b/scripts/test/test_codeql_reset_dismissed_alerts.py new file mode 100644 index 000000000..730d84723 --- /dev/null +++ b/scripts/test/test_codeql_reset_dismissed_alerts.py @@ -0,0 +1,468 @@ +"""Tests for codeql_reset_dismissed_alerts.py. + +Coverage strategy +----------------- +All GitHub REST API calls are intercepted with unittest.mock so no network +requests are made. + +Unit tests cover: +- _token(): token lookup from environment variables +- _request(): request construction, success, and HTTPError handling +- _paginate_alerts(): single-page and multi-page iteration +- _to_alert(): parsing a raw alert dict into an Alert dataclass +- reopen_alert(): dry-run and live modes +- parse_args(): argument parsing + +Integration tests exercise main() end-to-end matching the documented usage: + + GITHUB_TOKEN=ghp_... python3 scripts/codeql_reset_dismissed_alerts.py \ + --owner Framework-R-D --repo phlex [--dry-run] +""" + +from __future__ import annotations + +import json +import urllib.error +import urllib.request +from io import BytesIO +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest + +# sys.path is set up by scripts/test/conftest.py. +import codeql_reset_dismissed_alerts as M # noqa: E402 + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _raw_alert( + number: int = 1, + html_url: str = "https://github.com/owner/repo/security/code-scanning/1", + rule_id: str = "py/sql-injection", + dismissed_reason: str | None = "won't fix", +) -> dict[str, Any]: + """Return a minimal raw alert dict as returned by the GitHub API.""" + return { + "number": number, + "html_url": html_url, + "rule": {"id": rule_id}, + "dismissed_reason": dismissed_reason, + } + + +def _fake_urlopen(response_body: bytes, status: int = 200): + """Return a context-manager mock that yields a response-like object.""" + mock_response = MagicMock() + mock_response.read.return_value = response_body + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + return mock_response + + +# --------------------------------------------------------------------------- +# _token +# --------------------------------------------------------------------------- + + +class TestToken: + """Tests for M._token.""" + + def test_github_token_env_used(self, monkeypatch: pytest.MonkeyPatch) -> None: + """GITHUB_TOKEN is picked up from the environment.""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_abc123") + monkeypatch.delenv("GH_TOKEN", raising=False) + assert M._token() == "ghp_abc123" + + def test_gh_token_env_fallback(self, monkeypatch: pytest.MonkeyPatch) -> None: + """GH_TOKEN is used when GITHUB_TOKEN is absent.""" + monkeypatch.delenv("GITHUB_TOKEN", raising=False) + monkeypatch.setenv("GH_TOKEN", "ghp_fallback") + assert M._token() == "ghp_fallback" + + def test_no_token_raises(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Missing both token env vars raises GitHubAPIError.""" + monkeypatch.delenv("GITHUB_TOKEN", raising=False) + monkeypatch.delenv("GH_TOKEN", raising=False) + with pytest.raises(M.GitHubAPIError, match="GITHUB_TOKEN"): + M._token() + + +# --------------------------------------------------------------------------- +# _request +# --------------------------------------------------------------------------- + + +class TestRequest: + """Tests for M._request.""" + + def test_get_request_returns_parsed_json(self, monkeypatch: pytest.MonkeyPatch) -> None: + """A successful GET returns the decoded JSON payload.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + body = json.dumps([{"number": 1}]).encode() + with patch("urllib.request.urlopen", return_value=_fake_urlopen(body)): + result = M._request("GET", "/repos/o/r/code-scanning/alerts") + assert result == [{"number": 1}] + + def test_empty_response_returns_empty_dict(self, monkeypatch: pytest.MonkeyPatch) -> None: + """A 204-style response (empty body) returns {}.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + with patch("urllib.request.urlopen", return_value=_fake_urlopen(b"")): + result = M._request("PATCH", "/repos/o/r/code-scanning/alerts/1") + assert result == {} + + def test_http_error_raises_github_api_error(self, monkeypatch: pytest.MonkeyPatch) -> None: + """An HTTPError from urllib is converted to GitHubAPIError.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + error = urllib.error.HTTPError( + url="https://api.github.com/test", + code=403, + msg="Forbidden", + hdrs={}, # type: ignore[arg-type] + fp=BytesIO(b'{"message":"Forbidden"}'), + ) + with patch("urllib.request.urlopen", side_effect=error): + with pytest.raises(M.GitHubAPIError, match="403"): + M._request("GET", "/repos/o/r/code-scanning/alerts") + + def test_query_params_appended_to_url(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Query parameters are included in the request URL.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + captured_urls: list[str] = [] + + def fake_urlopen(req): + captured_urls.append(req.full_url) + return _fake_urlopen(b"[]") + + params = {"state": "dismissed", "page": 1} + with patch("urllib.request.urlopen", side_effect=fake_urlopen): + M._request("GET", "/repos/o/r/code-scanning/alerts", params=params) + + assert "state=dismissed" in captured_urls[0] + assert "page=1" in captured_urls[0] + + def test_post_sends_json_body(self, monkeypatch: pytest.MonkeyPatch) -> None: + """A payload dict is serialised to JSON and sent as the request body.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + captured_data: list[bytes | None] = [] + + def fake_urlopen(req): + captured_data.append(req.data) + return _fake_urlopen(b"{}") + + with patch("urllib.request.urlopen", side_effect=fake_urlopen): + M._request("PATCH", "/repos/o/r/code-scanning/alerts/1", payload={"state": "open"}) + + assert captured_data[0] == b'{"state": "open"}' + + +# --------------------------------------------------------------------------- +# _paginate_alerts +# --------------------------------------------------------------------------- + + +class TestPaginateAlerts: + """Tests for M._paginate_alerts.""" + + def test_single_page_yields_all_alerts(self, monkeypatch: pytest.MonkeyPatch) -> None: + """All alerts on a single page are yielded and pagination stops.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + page1 = [_raw_alert(1), _raw_alert(2)] + + def fake_request(method, path, params=None, payload=None): + if params and params.get("page") == 1: + return page1 + return [] + + with patch("codeql_reset_dismissed_alerts._request", side_effect=fake_request): + results = list(M._paginate_alerts("owner", "repo")) + + assert len(results) == 2 + + def test_multi_page_yields_all_alerts(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Alerts spread across two pages are all yielded.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + page1 = [_raw_alert(i) for i in range(1, 4)] + page2 = [_raw_alert(i) for i in range(4, 6)] + + def fake_request(method, path, params=None, payload=None): + page = params.get("page", 1) if params else 1 + if page == 1: + return page1 + if page == 2: + return page2 + return [] + + with patch("codeql_reset_dismissed_alerts._request", side_effect=fake_request): + results = list(M._paginate_alerts("owner", "repo")) + + assert len(results) == 5 + + def test_unexpected_response_raises(self, monkeypatch: pytest.MonkeyPatch) -> None: + """A non-list API response raises GitHubAPIError.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + with patch("codeql_reset_dismissed_alerts._request", return_value={"error": "oops"}): + with pytest.raises(M.GitHubAPIError, match="expected a JSON list"): + list(M._paginate_alerts("owner", "repo")) + + +# --------------------------------------------------------------------------- +# _to_alert +# --------------------------------------------------------------------------- + + +class TestToAlert: + """Tests for M._to_alert.""" + + def test_full_alert_parsed(self) -> None: + """All fields of a raw alert dict are correctly mapped.""" + raw = _raw_alert( + 42, "https://github.com/o/r/security/code-scanning/42", "js/xss", "used in tests" + ) + alert = M._to_alert(raw) + assert alert.number == 42 + assert alert.html_url == "https://github.com/o/r/security/code-scanning/42" + assert alert.rule_id == "js/xss" + assert alert.dismissed_reason == "used in tests" + + def test_none_dismissed_reason(self) -> None: + """dismissed_reason=None is stored as None, not 'None'.""" + raw = _raw_alert(dismissed_reason=None) + alert = M._to_alert(raw) + assert alert.dismissed_reason is None + + def test_missing_rule_key(self) -> None: + """A raw dict without a 'rule' key produces an empty rule_id.""" + raw = {"number": 5, "html_url": "", "dismissed_reason": None} + alert = M._to_alert(raw) + assert alert.rule_id == "" + + +# --------------------------------------------------------------------------- +# reopen_alert +# --------------------------------------------------------------------------- + + +class TestReopenAlert: + """Tests for M.reopen_alert.""" + + def _make_alert(self, number: int = 1) -> M.Alert: + """Return a minimal Alert object.""" + return M.Alert(number=number, html_url="", rule_id="py/xss", dismissed_reason=None) + + def test_dry_run_does_not_call_api( + self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + ) -> None: + """In dry-run mode, no API call is made.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + with patch("codeql_reset_dismissed_alerts._request") as mock_req: + M.reopen_alert("owner", "repo", self._make_alert(), dry_run=True) + mock_req.assert_not_called() + assert "DRY RUN" in capsys.readouterr().out + + def test_live_mode_calls_patch( + self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + ) -> None: + """In live mode, PATCH /code-scanning/alerts/ is called.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + with patch("codeql_reset_dismissed_alerts._request") as mock_req: + mock_req.return_value = {} + M.reopen_alert("owner", "repo", self._make_alert(7), dry_run=False) + mock_req.assert_called_once_with( + "PATCH", + "/repos/owner/repo/code-scanning/alerts/7", + payload={"state": "open"}, + ) + assert "Reopened alert #7" in capsys.readouterr().out + + +# --------------------------------------------------------------------------- +# parse_args +# --------------------------------------------------------------------------- + + +class TestParseArgs: + """Tests for M.parse_args.""" + + def test_required_args_parsed(self) -> None: + """--owner and --repo are required and are parsed correctly.""" + args = M.parse_args(["--owner", "Framework-R-D", "--repo", "phlex"]) + assert args.owner == "Framework-R-D" + assert args.repo == "phlex" + assert args.dry_run is False + + def test_dry_run_flag(self) -> None: + """--dry-run sets dry_run=True.""" + args = M.parse_args(["--owner", "o", "--repo", "r", "--dry-run"]) + assert args.dry_run is True + + def test_missing_owner_exits(self) -> None: + """Missing --owner causes SystemExit.""" + with pytest.raises(SystemExit): + M.parse_args(["--repo", "phlex"]) + + def test_missing_repo_exits(self) -> None: + """Missing --repo causes SystemExit.""" + with pytest.raises(SystemExit): + M.parse_args(["--owner", "org"]) + + +# --------------------------------------------------------------------------- +# main() +# --------------------------------------------------------------------------- + + +class TestMain: + """Integration tests for M.main().""" + + def test_no_alerts_returns_zero( + self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + ) -> None: + """No dismissed alerts prints a message and returns 0.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + with patch("codeql_reset_dismissed_alerts._paginate_alerts", return_value=iter([])): + rc = M.main(["--owner", "org", "--repo", "repo"]) + assert rc == 0 + assert "No dismissed" in capsys.readouterr().out + + def test_alerts_listed_and_reopened( + self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + ) -> None: + """Dismissed alerts are listed and then reopened.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + raw_alerts = [_raw_alert(1, rule_id="py/xss"), _raw_alert(2, rule_id="js/xss")] + with ( + patch("codeql_reset_dismissed_alerts._paginate_alerts", return_value=iter(raw_alerts)), + patch("codeql_reset_dismissed_alerts._request", return_value={}) as mock_req, + ): + rc = M.main(["--owner", "org", "--repo", "repo"]) + assert rc == 0 + # Two PATCH calls (one per alert) + assert mock_req.call_count == 2 + out = capsys.readouterr().out + assert "2 dismissed alert" in out + assert "All dismissed alerts reopened" in out + + def test_dry_run_no_api_calls( + self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + ) -> None: + """--dry-run lists alerts without making PATCH calls.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + raw_alerts = [_raw_alert(1)] + with ( + patch("codeql_reset_dismissed_alerts._paginate_alerts", return_value=iter(raw_alerts)), + patch("codeql_reset_dismissed_alerts._request") as mock_req, + ): + rc = M.main(["--owner", "org", "--repo", "repo", "--dry-run"]) + assert rc == 0 + mock_req.assert_not_called() + out = capsys.readouterr().out + assert "DRY RUN" in out + assert "Dry run complete" in out + + def test_api_error_on_list_returns_one( + self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + ) -> None: + """A GitHubAPIError while listing alerts returns exit code 1.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + with patch( + "codeql_reset_dismissed_alerts._paginate_alerts", + side_effect=M.GitHubAPIError("network failure"), + ): + rc = M.main(["--owner", "org", "--repo", "repo"]) + assert rc == 1 + assert "Error" in capsys.readouterr().err + + def test_api_error_on_reopen_returns_one( + self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + ) -> None: + """A GitHubAPIError while reopening an alert returns exit code 1.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + raw_alerts = [_raw_alert(1)] + with ( + patch("codeql_reset_dismissed_alerts._paginate_alerts", return_value=iter(raw_alerts)), + patch( + "codeql_reset_dismissed_alerts._request", + side_effect=M.GitHubAPIError("patch failed"), + ), + ): + rc = M.main(["--owner", "org", "--repo", "repo"]) + assert rc == 1 + assert "Failed to reopen" in capsys.readouterr().err + + def test_dismissed_reason_printed( + self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + ) -> None: + """The dismissal reason is included in the listing output.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + raw_alerts = [_raw_alert(1, dismissed_reason="used in tests")] + with ( + patch("codeql_reset_dismissed_alerts._paginate_alerts", return_value=iter(raw_alerts)), + patch("codeql_reset_dismissed_alerts._request", return_value={}), + ): + M.main(["--owner", "org", "--repo", "repo"]) + out = capsys.readouterr().out + assert "used in tests" in out + + def test_no_token_returns_one( + self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + ) -> None: + """Missing token env var causes main to return 1.""" + monkeypatch.delenv("GITHUB_TOKEN", raising=False) + monkeypatch.delenv("GH_TOKEN", raising=False) + with patch( + "codeql_reset_dismissed_alerts._paginate_alerts", + side_effect=M.GitHubAPIError("Set GITHUB_TOKEN"), + ): + rc = M.main(["--owner", "org", "--repo", "repo"]) + assert rc == 1 + + def test_alert_url_printed_in_listing( + self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + ) -> None: + """The html_url of each alert appears in the listing output.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + raw_alerts = [ + _raw_alert( + 1, + html_url="https://github.com/org/repo/security/code-scanning/1", + rule_id="py/xss", + ) + ] + with ( + patch("codeql_reset_dismissed_alerts._paginate_alerts", return_value=iter(raw_alerts)), + patch("codeql_reset_dismissed_alerts._request", return_value={}), + ): + M.main(["--owner", "org", "--repo", "repo"]) + out = capsys.readouterr().out + assert "https://github.com/org/repo/security/code-scanning/1" in out + + def test_dry_run_completion_message( + self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + ) -> None: + """Dry-run completion message says 'no changes made'.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + raw_alerts = [_raw_alert(1)] + with ( + patch("codeql_reset_dismissed_alerts._paginate_alerts", return_value=iter(raw_alerts)), + patch("codeql_reset_dismissed_alerts._request"), + ): + M.main(["--owner", "org", "--repo", "repo", "--dry-run"]) + out = capsys.readouterr().out + assert "no changes made" in out + + def test_live_completion_message( + self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] + ) -> None: + """Live-run completion message says 'All dismissed alerts reopened.'.""" + monkeypatch.setenv("GITHUB_TOKEN", "tok") + raw_alerts = [_raw_alert(1)] + with ( + patch("codeql_reset_dismissed_alerts._paginate_alerts", return_value=iter(raw_alerts)), + patch("codeql_reset_dismissed_alerts._request", return_value={}), + ): + M.main(["--owner", "org", "--repo", "repo"]) + out = capsys.readouterr().out + assert "All dismissed alerts reopened" in out diff --git a/scripts/test/test_coverage_scripts.py b/scripts/test/test_coverage_scripts.py new file mode 100644 index 000000000..dc93d4e64 --- /dev/null +++ b/scripts/test/test_coverage_scripts.py @@ -0,0 +1,797 @@ +"""Tests for coverage-related scripts. + +Covers: +- normalize_coverage_lcov.py: _relative_subpath, _is_repo_content, normalize, main +- normalize_coverage_xml.py: _relative_subpath, normalize, main +- export_llvm_lcov.py: build_parser, main +- create_coverage_symlinks.py: should_link, iter_source_files, create_symlinks, + parse_args, main +""" + +from __future__ import annotations + +import subprocess +import textwrap +import xml.etree.ElementTree as ET +from pathlib import Path +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest + +# sys.path is set up by scripts/test/conftest.py. +import create_coverage_symlinks as ccs # noqa: E402 +import export_llvm_lcov as ell # noqa: E402 +import normalize_coverage_lcov as ncl # noqa: E402 +import normalize_coverage_xml as ncx # noqa: E402 + +# =========================================================================== +# normalize_coverage_lcov +# =========================================================================== + + +class TestNclRelativeSubpath: + """_relative_subpath from normalize_coverage_lcov.""" + + def test_direct_subpath(self, tmp_path: Path) -> None: + """Direct subpath.""" + base = tmp_path / "repo" + path = base / "src" / "foo.cpp" + base.mkdir() + result = ncl._relative_subpath(path, base) + assert result == Path("src/foo.cpp") + + def test_path_outside_base_returns_none(self, tmp_path: Path) -> None: + """Path outside base returns none.""" + base = tmp_path / "repo" + path = tmp_path / "other" / "foo.cpp" + result = ncl._relative_subpath(path, base) + assert result is None + + def test_none_base_returns_none(self, tmp_path: Path) -> None: + """None base returns none.""" + assert ncl._relative_subpath(tmp_path / "foo", None) is None + + def test_resolved_symlink_path(self, tmp_path: Path) -> None: + """Resolved symlink path.""" + real = tmp_path / "real" + real.mkdir() + link = tmp_path / "link" + link.symlink_to(real) + child = real / "bar.cpp" + child.write_text("", encoding="utf-8") + # path is under link (symlink), base is real — should still resolve + result = ncl._relative_subpath(link / "bar.cpp", real) + assert result is not None + + +class TestNclIsRepoContent: + """_is_repo_content from normalize_coverage_lcov.""" + + def test_phlex_prefix_accepted(self) -> None: + """Phlex prefix accepted.""" + assert ncl._is_repo_content(Path("phlex/src/foo.cpp")) + + def test_plugins_prefix_accepted(self) -> None: + """Plugins prefix accepted.""" + assert ncl._is_repo_content(Path("plugins/py/bar.py")) + + def test_form_prefix_accepted(self) -> None: + """Form prefix accepted.""" + assert ncl._is_repo_content(Path("form/src/baz.c")) + + def test_build_clang_prefix_accepted(self) -> None: + """Build clang prefix accepted.""" + assert ncl._is_repo_content(Path("build-clang/CMakeFiles/foo.cpp")) + + def test_coverage_generated_in_parts_accepted(self) -> None: + """Coverage generated in parts accepted.""" + assert ncl._is_repo_content(Path("some/.coverage-generated/foo.cpp")) + + def test_unknown_prefix_rejected(self) -> None: + """Unknown prefix rejected.""" + assert not ncl._is_repo_content(Path("external/lib/foo.h")) + + def test_empty_path_rejected(self) -> None: + """Empty path rejected.""" + assert not ncl._is_repo_content(Path("")) + + +class TestNclNormalize: + """normalize() from normalize_coverage_lcov.""" + + def _make_lcov(self, sf_path: str, extra_lines: str = "") -> str: + return textwrap.dedent( + f"""\ + TN: + SF:{sf_path} + FN:1,main + FNDA:1,main + FNF:1 + FNH:1 + DA:1,1 + LF:1 + LH:1 + {extra_lines}end_of_record + """ + ) + + def test_absolute_path_rewritten_relative(self, tmp_path: Path) -> None: + """Absolute path rewritten relative.""" + repo = tmp_path / "repo" + repo.mkdir() + src = repo / "phlex" / "src" / "foo.cpp" + src.parent.mkdir(parents=True) + src.write_text("int main(){}", encoding="utf-8") + + report = tmp_path / "coverage.info" + report.write_text(self._make_lcov(str(src)), encoding="utf-8") + + missing, external = ncl.normalize(report, repo) + assert missing == [] + assert external == [] + content = report.read_text() + assert "SF:phlex/src/foo.cpp" in content + + def test_external_file_excluded_from_output(self, tmp_path: Path) -> None: + """External file excluded from output.""" + repo = tmp_path / "repo" + repo.mkdir() + report = tmp_path / "coverage.info" + report.write_text(self._make_lcov("/usr/include/stdio.h"), encoding="utf-8") + + missing, external = ncl.normalize(report, repo) + assert len(external) == 1 + # Record must be dropped (no SF: for the external file) + content = report.read_text() + assert "SF:" not in content + + def test_missing_file_in_repo_reported(self, tmp_path: Path) -> None: + """Missing file in repo reported.""" + repo = tmp_path / "repo" + repo.mkdir() + (repo / "phlex").mkdir() + report = tmp_path / "coverage.info" + # file is within the repo prefix but does not exist on disk + absent = str(repo / "phlex" / "absent.cpp") + report.write_text(self._make_lcov(absent), encoding="utf-8") + + missing, external = ncl.normalize(report, repo) + assert len(missing) == 1 + assert "phlex/absent.cpp" in missing[0] + + def test_relative_sf_resolved_against_coverage_root(self, tmp_path: Path) -> None: + """Relative sf resolved against coverage root.""" + repo = tmp_path / "repo" + repo.mkdir() + coverage_root = repo / "phlex" + coverage_root.mkdir() + src = coverage_root / "foo.cpp" + src.write_text("", encoding="utf-8") + + report = tmp_path / "coverage.info" + # SF path is relative to coverage_root + report.write_text(self._make_lcov("foo.cpp"), encoding="utf-8") + + missing, external = ncl.normalize(report, repo, coverage_root=coverage_root) + assert missing == [] + content = report.read_text() + assert "SF:phlex/foo.cpp" in content + + def test_absolute_paths_flag(self, tmp_path: Path) -> None: + """Absolute paths flag.""" + repo = tmp_path / "repo" + repo.mkdir() + src = repo / "phlex" / "x.cpp" + src.parent.mkdir() + src.write_text("", encoding="utf-8") + + report = tmp_path / "coverage.info" + report.write_text(self._make_lcov(str(src)), encoding="utf-8") + + ncl.normalize(report, repo, absolute_paths=True) + content = report.read_text() + # path must be absolute + assert content.startswith("TN:") or "SF:/" in content + assert "SF:/" in content + + def test_record_without_sf_preserved(self, tmp_path: Path) -> None: + """Record without sf preserved.""" + repo = tmp_path / "repo" + repo.mkdir() + report = tmp_path / "coverage.info" + # A record with no SF line (unusual but defensively handled) + report.write_text("TN:\nend_of_record\n", encoding="utf-8") + + missing, external = ncl.normalize(report, repo) + assert missing == [] + assert external == [] + + def test_empty_sf_value_preserved(self, tmp_path: Path) -> None: + """Empty sf value preserved.""" + repo = tmp_path / "repo" + repo.mkdir() + report = tmp_path / "coverage.info" + report.write_text("TN:\nSF:\nend_of_record\n", encoding="utf-8") + + missing, external = ncl.normalize(report, repo) + assert missing == [] + assert external == [] + + def test_trailing_lines_without_end_of_record_handled(self, tmp_path: Path) -> None: + """Trailing lines without end of record handled.""" + repo = tmp_path / "repo" + repo.mkdir() + # No end_of_record terminator — should not crash + report = tmp_path / "coverage.info" + report.write_text("TN:\nSF:\nDA:1,1\n", encoding="utf-8") + ncl.normalize(report, repo) # must not raise + + +class TestNclMain: + """main() from normalize_coverage_lcov.""" + + def _make_report(self, tmp_path: Path, sf_path: str) -> Path: + report = tmp_path / "coverage.info" + report.write_text(f"TN:\nSF:{sf_path}\nend_of_record\n", encoding="utf-8") + return report + + def test_success_returns_zero(self, tmp_path: Path) -> None: + """Success returns zero.""" + repo = tmp_path / "repo" + repo.mkdir() + src = repo / "phlex" / "ok.cpp" + src.parent.mkdir() + src.write_text("", encoding="utf-8") + report = self._make_report(tmp_path, str(src)) + rc = ncl.main([str(report), "--repo-root", str(repo)]) + assert rc == 0 + + def test_missing_file_returns_nonzero(self, tmp_path: Path) -> None: + """Missing file returns nonzero.""" + repo = tmp_path / "repo" + repo.mkdir() + (repo / "phlex").mkdir() + report = self._make_report(tmp_path, str(repo / "phlex" / "missing.cpp")) + rc = ncl.main([str(report), "--repo-root", str(repo)]) + assert rc != 0 + + def test_external_file_warns_to_stderr( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """External file warns to stderr.""" + repo = tmp_path / "repo" + repo.mkdir() + report = self._make_report(tmp_path, "/usr/include/stdio.h") + ncl.main([str(report), "--repo-root", str(repo)]) + assert "outside the repository" in capsys.readouterr().err + + +# =========================================================================== +# normalize_coverage_xml +# =========================================================================== + + +def _make_xml( + filenames: list[str], + source: str | None = None, +) -> str: + sources = f"{source or '.'}" if source else "" + classes = "".join( + f'' + for fn in filenames + ) + return ( + '\n' + '\n' + f"{sources}\n" + f"{classes}\n" + "\n" + ) + + +class TestNcxRelativeSubpath: + """_relative_subpath from normalize_coverage_xml.""" + + def test_subpath_within_base(self, tmp_path: Path) -> None: + """Subpath within base.""" + base = tmp_path / "repo" + path = base / "src" / "foo.cpp" + result = ncx._relative_subpath(path, base) + assert result == Path("src/foo.cpp") + + def test_outside_base_returns_none(self, tmp_path: Path) -> None: + """Outside base returns none.""" + base = tmp_path / "repo" + path = tmp_path / "other" / "foo.cpp" + assert ncx._relative_subpath(path, base) is None + + def test_none_base_returns_none(self, tmp_path: Path) -> None: + """None base returns none.""" + assert ncx._relative_subpath(tmp_path / "foo", None) is None + + +class TestNcxNormalize: + """normalize() from normalize_coverage_xml.""" + + def test_absolute_path_rewritten_relative(self, tmp_path: Path) -> None: + """Absolute path rewritten relative.""" + repo = tmp_path / "repo" + repo.mkdir() + src = repo / "phlex" / "foo.cpp" + src.parent.mkdir() + src.write_text("", encoding="utf-8") + + report = tmp_path / "coverage.xml" + report.write_text(_make_xml([str(src)]), encoding="utf-8") + + missing, external = ncx.normalize(report, repo) + assert missing == [] + assert external == [] + tree = ET.parse(report) + filenames = [cls.get("filename") for cls in tree.findall(".//class")] + assert filenames == ["phlex/foo.cpp"] + + def test_external_file_reported(self, tmp_path: Path) -> None: + """External file reported.""" + repo = tmp_path / "repo" + repo.mkdir() + report = tmp_path / "coverage.xml" + report.write_text(_make_xml(["/usr/include/stdio.h"]), encoding="utf-8") + + missing, external = ncx.normalize(report, repo) + assert len(external) == 1 + assert "stdio.h" in external[0] + + def test_missing_file_reported(self, tmp_path: Path) -> None: + """Missing file reported.""" + repo = tmp_path / "repo" + repo.mkdir() + (repo / "phlex").mkdir() + report = tmp_path / "coverage.xml" + # relative filename that does not exist on disk + report.write_text(_make_xml(["phlex/absent.cpp"]), encoding="utf-8") + + missing, external = ncx.normalize(report, repo) + assert len(missing) == 1 + + def test_source_element_normalized(self, tmp_path: Path) -> None: + """Source element normalized.""" + repo = tmp_path / "repo" + repo.mkdir() + report = tmp_path / "coverage.xml" + report.write_text(_make_xml([], source="/old/path"), encoding="utf-8") + + ncx.normalize(report, repo) + tree = ET.parse(report) + sources = tree.findall("sources/source") + assert len(sources) == 1 + assert sources[0].text == str(repo) + + def test_custom_source_dir(self, tmp_path: Path) -> None: + """Custom source dir.""" + repo = tmp_path / "repo" + repo.mkdir() + custom_src = tmp_path / "custom" + report = tmp_path / "coverage.xml" + report.write_text(_make_xml([]), encoding="utf-8") + + ncx.normalize(report, repo, source_dir=custom_src) + tree = ET.parse(report) + sources = tree.findall("sources/source") + assert sources[0].text == str(custom_src) + + def test_relative_filename_within_coverage_root(self, tmp_path: Path) -> None: + """Relative filename within coverage root.""" + repo = tmp_path / "repo" + repo.mkdir() + coverage_root = repo / "phlex" + coverage_root.mkdir() + src = coverage_root / "foo.cpp" + src.write_text("", encoding="utf-8") + + report = tmp_path / "coverage.xml" + # filename is relative to coverage_root + report.write_text(_make_xml(["foo.cpp"]), encoding="utf-8") + + missing, external = ncx.normalize(report, repo, coverage_root=coverage_root) + assert missing == [] + + def test_path_map_applied(self, tmp_path: Path) -> None: + """Path map applied.""" + repo = tmp_path / "repo" + repo.mkdir() + from_dir = tmp_path / "generated" + from_dir.mkdir() + to_dir = repo / "build-clang" + to_dir.mkdir() + src = from_dir / "auto.cpp" + src.write_text("", encoding="utf-8") + + report = tmp_path / "coverage.xml" + report.write_text(_make_xml([str(src)]), encoding="utf-8") + + missing, external = ncx.normalize(report, repo, path_maps=[(from_dir, to_dir)]) + assert external == [] + + def test_no_classes_produces_no_errors(self, tmp_path: Path) -> None: + """No classes produces no errors.""" + repo = tmp_path / "repo" + repo.mkdir() + report = tmp_path / "coverage.xml" + report.write_text(_make_xml([]), encoding="utf-8") + missing, external = ncx.normalize(report, repo) + assert missing == [] + assert external == [] + + +class TestNcxMain: + """main() from normalize_coverage_xml.""" + + def _report(self, tmp_path: Path, filenames: list[str]) -> Path: + report = tmp_path / "coverage.xml" + report.write_text(_make_xml(filenames), encoding="utf-8") + return report + + def test_success_returns_zero(self, tmp_path: Path) -> None: + """Success returns zero.""" + repo = tmp_path / "repo" + repo.mkdir() + src = repo / "phlex" / "ok.cpp" + src.parent.mkdir() + src.write_text("", encoding="utf-8") + report = self._report(tmp_path, [str(src)]) + rc = ncx.main([str(report), "--repo-root", str(repo)]) + assert rc == 0 + + def test_external_returns_nonzero(self, tmp_path: Path) -> None: + """External returns nonzero.""" + repo = tmp_path / "repo" + repo.mkdir() + report = self._report(tmp_path, ["/usr/include/stdio.h"]) + rc = ncx.main([str(report), "--repo-root", str(repo)]) + assert rc != 0 + + def test_missing_file_returns_nonzero(self, tmp_path: Path) -> None: + """Missing file returns nonzero.""" + repo = tmp_path / "repo" + repo.mkdir() + (repo / "phlex").mkdir() + report = self._report(tmp_path, [str(repo / "phlex" / "missing.cpp")]) + rc = ncx.main([str(report), "--repo-root", str(repo)]) + assert rc != 0 + + def test_invalid_path_map_raises(self, tmp_path: Path) -> None: + """Invalid path map raises.""" + repo = tmp_path / "repo" + repo.mkdir() + report = self._report(tmp_path, []) + with pytest.raises(SystemExit): + ncx.main([str(report), "--repo-root", str(repo), "--path-map", "noequalssign"]) + + def test_path_map_parsed(self, tmp_path: Path) -> None: + """Path map parsed.""" + repo = tmp_path / "repo" + repo.mkdir() + src = repo / "phlex" / "ok.cpp" + src.parent.mkdir() + src.write_text("", encoding="utf-8") + report = self._report(tmp_path, [str(src)]) + # valid path map: should not raise + rc = ncx.main( + [ + str(report), + "--repo-root", + str(repo), + "--path-map", + f"{tmp_path}={repo}", + ] + ) + assert rc == 0 + + +# =========================================================================== +# export_llvm_lcov +# =========================================================================== + + +class TestEllBuildParser: + """build_parser() from export_llvm_lcov.""" + + def test_parser_returns_parser(self) -> None: + """Parser returns parser.""" + import argparse + + p = ell.build_parser() + assert isinstance(p, argparse.ArgumentParser) + + def test_required_args_parsed(self) -> None: + """Required args parsed.""" + p = ell.build_parser() + args = p.parse_args(["out.info", "/usr/bin/llvm-cov", "export", "-instr-profile=foo"]) + assert args.output == "out.info" + assert args.llvm_cov == "/usr/bin/llvm-cov" + assert "export" in args.llvm_cov_args + + def test_no_llvm_cov_args_produces_empty_list(self) -> None: + """No llvm cov args produces empty list.""" + p = ell.build_parser() + args = p.parse_args(["out.info", "/usr/bin/llvm-cov"]) + assert args.llvm_cov_args == [] + + +class TestEllMain: + """main() from export_llvm_lcov.""" + + def test_success_writes_output_file(self, tmp_path: Path) -> None: + """Success writes output file.""" + out = tmp_path / "out.info" + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(returncode=0) + rc = ell.main([str(out), "llvm-cov", "export", "-foo"]) + assert rc == 0 + assert mock_run.called + # The output file must be created (even if empty when subprocess is mocked). + assert out.exists() + + def test_creates_parent_directories(self, tmp_path: Path) -> None: + """Creates parent directories.""" + out = tmp_path / "nested" / "deep" / "out.info" + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(returncode=0) + ell.main([str(out), "llvm-cov", "export", "-foo"]) + assert out.parent.exists() + + def test_subprocess_error_propagates_returncode(self, tmp_path: Path) -> None: + """Subprocess error propagates returncode.""" + out = tmp_path / "out.info" + error = subprocess.CalledProcessError(returncode=42, cmd=["llvm-cov"]) + with patch("subprocess.run", side_effect=error): + rc = ell.main([str(out), "llvm-cov", "export", "-foo"]) + assert rc == 42 + + def test_missing_llvm_cov_args_exits_with_error(self, tmp_path: Path) -> None: + """Missing llvm cov args exits with error.""" + out = tmp_path / "out.info" + with pytest.raises(SystemExit): + ell.main([str(out), "llvm-cov"]) + + def test_command_includes_llvm_cov_args(self, tmp_path: Path) -> None: + """Command includes llvm cov args.""" + out = tmp_path / "out.info" + captured: list[Any] = [] + + def _run(cmd: list[str], **kwargs: Any) -> MagicMock: + captured.append(cmd) + return MagicMock(returncode=0) + + with patch("subprocess.run", side_effect=_run): + ell.main([str(out), "llvm-cov", "export", "-instr-profile=foo", "binary"]) + + assert captured[0] == ["llvm-cov", "export", "-instr-profile=foo", "binary"] + + +# =========================================================================== +# create_coverage_symlinks +# =========================================================================== + + +class TestCcsShouldLink: + """should_link() from create_coverage_symlinks.""" + + def test_cpp_file_accepted(self, tmp_path: Path) -> None: + """Cpp file accepted.""" + f = tmp_path / "foo.cpp" + f.write_text("", encoding="utf-8") + assert ccs.should_link(f) + + def test_h_file_accepted(self, tmp_path: Path) -> None: + """H file accepted.""" + f = tmp_path / "foo.h" + f.write_text("", encoding="utf-8") + assert ccs.should_link(f) + + def test_c_file_accepted(self, tmp_path: Path) -> None: + """C file accepted.""" + f = tmp_path / "foo.c" + f.write_text("", encoding="utf-8") + assert ccs.should_link(f) + + def test_icc_file_accepted(self, tmp_path: Path) -> None: + """Icc file accepted.""" + f = tmp_path / "foo.icc" + f.write_text("", encoding="utf-8") + assert ccs.should_link(f) + + def test_tcc_file_accepted(self, tmp_path: Path) -> None: + """Tcc file accepted.""" + f = tmp_path / "foo.tcc" + f.write_text("", encoding="utf-8") + assert ccs.should_link(f) + + def test_i_file_accepted(self, tmp_path: Path) -> None: + """I file accepted.""" + f = tmp_path / "foo.i" + f.write_text("", encoding="utf-8") + assert ccs.should_link(f) + + def test_ii_file_accepted(self, tmp_path: Path) -> None: + """Ii file accepted.""" + f = tmp_path / "foo.ii" + f.write_text("", encoding="utf-8") + assert ccs.should_link(f) + + def test_txt_file_rejected(self, tmp_path: Path) -> None: + """Txt file rejected.""" + f = tmp_path / "readme.txt" + f.write_text("", encoding="utf-8") + assert not ccs.should_link(f) + + def test_py_file_rejected(self, tmp_path: Path) -> None: + """Py file rejected.""" + f = tmp_path / "script.py" + f.write_text("", encoding="utf-8") + assert not ccs.should_link(f) + + def test_directory_rejected(self, tmp_path: Path) -> None: + """Directory rejected.""" + d = tmp_path / "dir" + d.mkdir() + assert not ccs.should_link(d) + + def test_nonexistent_file_rejected(self, tmp_path: Path) -> None: + """Nonexistent file rejected.""" + assert not ccs.should_link(tmp_path / "ghost.cpp") + + def test_cxx_extension_accepted(self, tmp_path: Path) -> None: + """Cxx extension accepted.""" + f = tmp_path / "bar.cxx" + f.write_text("", encoding="utf-8") + assert ccs.should_link(f) + + def test_hpp_extension_accepted(self, tmp_path: Path) -> None: + """Hpp extension accepted.""" + f = tmp_path / "bar.hpp" + f.write_text("", encoding="utf-8") + assert ccs.should_link(f) + + +class TestCcsIterSourceFiles: + """iter_source_files() from create_coverage_symlinks.""" + + def test_yields_cpp_files(self, tmp_path: Path) -> None: + """Yields cpp files.""" + (tmp_path / "a.cpp").write_text("", encoding="utf-8") + (tmp_path / "b.h").write_text("", encoding="utf-8") + (tmp_path / "c.txt").write_text("", encoding="utf-8") + result = list(ccs.iter_source_files(tmp_path)) + names = {p.name for p in result} + assert "a.cpp" in names + assert "b.h" in names + assert "c.txt" not in names + + def test_recurses_into_subdirectories(self, tmp_path: Path) -> None: + """Recurses into subdirectories.""" + sub = tmp_path / "sub" + sub.mkdir() + (sub / "nested.cpp").write_text("", encoding="utf-8") + result = list(ccs.iter_source_files(tmp_path)) + assert any(p.name == "nested.cpp" for p in result) + + def test_empty_directory_yields_nothing(self, tmp_path: Path) -> None: + """Empty directory yields nothing.""" + assert list(ccs.iter_source_files(tmp_path)) == [] + + +class TestCcsCreateSymlinks: + """create_symlinks() from create_coverage_symlinks.""" + + def test_symlinks_created_for_cpp_files(self, tmp_path: Path) -> None: + """Symlinks created for cpp files.""" + build = tmp_path / "build" + build.mkdir() + (build / "foo.cpp").write_text("int main(){}", encoding="utf-8") + output = tmp_path / "links" + + ccs.create_symlinks(build, output) + + link = output / "foo.cpp" + assert link.is_symlink() + assert link.resolve() == (build / "foo.cpp").resolve() + + def test_non_source_files_not_linked(self, tmp_path: Path) -> None: + """Non source files not linked.""" + build = tmp_path / "build" + build.mkdir() + (build / "readme.txt").write_text("hello", encoding="utf-8") + output = tmp_path / "links" + + ccs.create_symlinks(build, output) + assert not (output / "readme.txt").exists() + + def test_output_dir_recreated_if_exists(self, tmp_path: Path) -> None: + """Output dir recreated if exists.""" + build = tmp_path / "build" + build.mkdir() + (build / "a.cpp").write_text("", encoding="utf-8") + output = tmp_path / "links" + output.mkdir() + # Pre-existing stale file in output + (output / "stale.cpp").write_text("", encoding="utf-8") + + ccs.create_symlinks(build, output) + assert not (output / "stale.cpp").exists() + assert (output / "a.cpp").is_symlink() + + def test_nested_paths_preserved(self, tmp_path: Path) -> None: + """Nested paths preserved.""" + build = tmp_path / "build" + sub = build / "sub" + sub.mkdir(parents=True) + (sub / "nested.cpp").write_text("", encoding="utf-8") + output = tmp_path / "links" + + ccs.create_symlinks(build, output) + assert (output / "sub" / "nested.cpp").is_symlink() + + def test_existing_symlink_replaced(self, tmp_path: Path) -> None: + """Existing symlink replaced.""" + build = tmp_path / "build" + build.mkdir() + src = build / "foo.cpp" + src.write_text("v2", encoding="utf-8") + output = tmp_path / "links" + output.mkdir() + # Create an old symlink pointing somewhere else + old_target = tmp_path / "old.cpp" + old_target.write_text("v1", encoding="utf-8") + (output / "foo.cpp").symlink_to(old_target) + + ccs.create_symlinks(build, output) + # symlink must now point to the build source, not old_target + assert (output / "foo.cpp").resolve() == src.resolve() + + +class TestCcsParseArgs: + """parse_args() from create_coverage_symlinks.""" + + def test_required_args_parsed(self, tmp_path: Path) -> None: + """Required args parsed.""" + argv = ["prog", "--build-root", str(tmp_path), "--output-root", str(tmp_path)] + args = ccs.parse_args(argv) + assert args.build_root == str(tmp_path) + assert args.output_root == str(tmp_path) + + def test_missing_required_arg_raises(self) -> None: + """Missing required arg raises.""" + with pytest.raises(SystemExit): + ccs.parse_args(["prog", "--build-root", "/foo"]) + + +class TestCcsMain: + """main() from create_coverage_symlinks.""" + + def test_success_returns_zero(self, tmp_path: Path) -> None: + """Success returns zero.""" + build = tmp_path / "build" + build.mkdir() + (build / "foo.cpp").write_text("", encoding="utf-8") + output = tmp_path / "out" + rc = ccs.main(["prog", "--build-root", str(build), "--output-root", str(output)]) + assert rc == 0 + assert (output / "foo.cpp").is_symlink() + + def test_nonexistent_build_root_returns_one(self, tmp_path: Path) -> None: + """Nonexistent build root returns one.""" + rc = ccs.main( + [ + "prog", + "--build-root", + str(tmp_path / "no_such"), + "--output-root", + str(tmp_path / "out"), + ] + ) + assert rc == 1 diff --git a/scripts/test/test_fix_header_guards.py b/scripts/test/test_fix_header_guards.py new file mode 100644 index 000000000..94fd54428 --- /dev/null +++ b/scripts/test/test_fix_header_guards.py @@ -0,0 +1,469 @@ +"""Tests for fix_header_guards.py. + +Coverage strategy +----------------- +Unit tests cover compute_expected_guard, check_header_guard, and fix_header_guard +using temporary files. + +Integration tests exercise main() end-to-end, matching how the script is +invoked from .github/workflows/header-guards-check.yaml and +.github/workflows/header-guards-fix.yaml: + + # Check mode + python3 scripts/fix_header_guards.py --check --root . phlex plugins form + + # Fix mode + python3 scripts/fix_header_guards.py --root . phlex plugins form +""" + +from __future__ import annotations + +import textwrap +from pathlib import Path +from unittest.mock import patch + +import pytest + +# sys.path is set up by scripts/test/conftest.py. +import fix_header_guards as M # noqa: E402 + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_header( + tmp_path: Path, + subdir: str, + filename: str, + content: str, +) -> Path: + """Write content to tmp_path// and return the path.""" + parent = tmp_path / subdir + parent.mkdir(parents=True, exist_ok=True) + f = parent / filename + f.write_text(content, encoding="utf-8") + return f + + +def _correct_guard_content(guard: str) -> str: + """Return a minimal header file whose guard matches the given macro.""" + return textwrap.dedent( + f"""\ + #ifndef {guard} + #define {guard} + + // content + + #endif // {guard} + """ + ) + + +def _wrong_guard_content(guard: str, wrong_macro: str = "WRONG_GUARD_HPP") -> str: + """Return a minimal header with an incorrect guard macro.""" + return textwrap.dedent( + f"""\ + #ifndef {wrong_macro} + #define {wrong_macro} + + // content + + #endif // {wrong_macro} + """ + ) + + +# --------------------------------------------------------------------------- +# compute_expected_guard +# --------------------------------------------------------------------------- + + +class TestComputeExpectedGuard: + """Tests for M.compute_expected_guard.""" + + def test_simple_two_level_path(self, tmp_path: Path) -> None: + """phlex/foo.hpp → PHLEX_FOO_HPP.""" + f = tmp_path / "phlex" / "foo.hpp" + assert M.compute_expected_guard(f, tmp_path) == "PHLEX_FOO_HPP" + + def test_three_level_path(self, tmp_path: Path) -> None: + """phlex/detail/bar.hpp → PHLEX_DETAIL_BAR_HPP.""" + f = tmp_path / "phlex" / "detail" / "bar.hpp" + assert M.compute_expected_guard(f, tmp_path) == "PHLEX_DETAIL_BAR_HPP" + + def test_four_level_path(self, tmp_path: Path) -> None: + """phlex/a/b/c.hpp → PHLEX_A_B_C_HPP.""" + f = tmp_path / "phlex" / "a" / "b" / "c.hpp" + assert M.compute_expected_guard(f, tmp_path) == "PHLEX_A_B_C_HPP" + + def test_h_extension(self, tmp_path: Path) -> None: + """A .h file produces an _H suffix.""" + f = tmp_path / "phlex" / "c_compat.h" + assert M.compute_expected_guard(f, tmp_path) == "PHLEX_C_COMPAT_H" + + def test_hyphen_in_subdir_converted_to_underscore(self, tmp_path: Path) -> None: + """Hyphens in directory or file names are replaced with underscores.""" + f = tmp_path / "my-dir" / "my-file.hpp" + assert M.compute_expected_guard(f, tmp_path) == "MY_DIR_MY_FILE_HPP" + + def test_hyphen_in_filename_converted_to_underscore(self, tmp_path: Path) -> None: + """Hyphens in the file stem are replaced with underscores.""" + f = tmp_path / "phlex" / "my-util.hpp" + assert M.compute_expected_guard(f, tmp_path) == "PHLEX_MY_UTIL_HPP" + + def test_uppercase_components_preserved(self, tmp_path: Path) -> None: + """Components are always uppercased regardless of input case.""" + f = tmp_path / "Phlex" / "FooBar.hpp" + assert M.compute_expected_guard(f, tmp_path) == "PHLEX_FOOBAR_HPP" + + def test_plugins_subdirectory(self, tmp_path: Path) -> None: + """plugins/python/module.hpp → PLUGINS_PYTHON_MODULE_HPP.""" + f = tmp_path / "plugins" / "python" / "module.hpp" + assert M.compute_expected_guard(f, tmp_path) == "PLUGINS_PYTHON_MODULE_HPP" + + +# --------------------------------------------------------------------------- +# check_header_guard +# --------------------------------------------------------------------------- + + +class TestCheckHeaderGuard: + """Tests for M.check_header_guard.""" + + def test_correct_guard_returns_true(self, tmp_path: Path) -> None: + """A file whose guard matches the expected macro is valid.""" + guard = "PHLEX_FOO_HPP" + f = _make_header(tmp_path, "phlex", "foo.hpp", _correct_guard_content(guard)) + valid, expected = M.check_header_guard(f, tmp_path) + assert valid is True + assert expected is None + + def test_wrong_guard_returns_false(self, tmp_path: Path) -> None: + """A file with a wrong guard name is invalid and returns the expected macro.""" + f = _make_header(tmp_path, "phlex", "foo.hpp", _wrong_guard_content("PHLEX_FOO_HPP")) + valid, expected = M.check_header_guard(f, tmp_path) + assert valid is False + assert expected == "PHLEX_FOO_HPP" + + def test_too_short_file_is_valid(self, tmp_path: Path) -> None: + """A file with fewer than 3 lines is treated as valid (no guard expected).""" + f = _make_header(tmp_path, "phlex", "empty.hpp", "// no guard\n") + valid, expected = M.check_header_guard(f, tmp_path) + assert valid is True + assert expected is None + + def test_no_ifndef_is_valid(self, tmp_path: Path) -> None: + """A file without #ifndef is treated as valid (pragmatic approach).""" + content = textwrap.dedent( + """\ + #pragma once + // header content + void foo(); + """ + ) + f = _make_header(tmp_path, "phlex", "pragma.hpp", content) + valid, expected = M.check_header_guard(f, tmp_path) + assert valid is True + + def test_missing_endif_comment_makes_guard_invalid(self, tmp_path: Path) -> None: + """If #endif has no comment, endif_macro is None and the guard is invalid.""" + content = textwrap.dedent( + """\ + #ifndef PHLEX_FOO_HPP + #define PHLEX_FOO_HPP + + void foo(); + + #endif + """ + ) + f = _make_header(tmp_path, "phlex", "foo.hpp", content) + valid, _ = M.check_header_guard(f, tmp_path) + # endif_macro is None, so ifndef==define==expected but endif_macro≠expected + assert valid is False + + def test_mismatched_ifndef_and_define(self, tmp_path: Path) -> None: + """When #ifndef and #define use different macros, the guard is invalid.""" + content = textwrap.dedent( + """\ + #ifndef PHLEX_FOO_HPP + #define PHLEX_BAR_HPP + + void foo(); + + #endif // PHLEX_FOO_HPP + """ + ) + f = _make_header(tmp_path, "phlex", "foo.hpp", content) + valid, _ = M.check_header_guard(f, tmp_path) + assert valid is False + + +# --------------------------------------------------------------------------- +# fix_header_guard +# --------------------------------------------------------------------------- + + +class TestFixHeaderGuard: + """Tests for M.fix_header_guard.""" + + def test_wrong_guard_is_fixed(self, tmp_path: Path) -> None: + """A file with a wrong guard is updated to the expected macro.""" + f = _make_header(tmp_path, "phlex", "bar.hpp", _wrong_guard_content("PHLEX_BAR_HPP")) + modified = M.fix_header_guard(f, tmp_path) + assert modified is True + content = f.read_text(encoding="utf-8") + assert "#ifndef PHLEX_BAR_HPP" in content + assert "#define PHLEX_BAR_HPP" in content + assert "#endif // PHLEX_BAR_HPP" in content + + def test_correct_guard_not_modified(self, tmp_path: Path) -> None: + """A file with the correct guard is not written.""" + guard = "PHLEX_BAR_HPP" + original = _correct_guard_content(guard) + f = _make_header(tmp_path, "phlex", "bar.hpp", original) + modified = M.fix_header_guard(f, tmp_path) + assert modified is False + assert f.read_text(encoding="utf-8") == original + + def test_short_file_not_modified(self, tmp_path: Path) -> None: + """A file with fewer than 3 lines is not touched.""" + f = _make_header(tmp_path, "phlex", "short.hpp", "// one line\n") + assert M.fix_header_guard(f, tmp_path) is False + + def test_no_ifndef_not_modified(self, tmp_path: Path) -> None: + """A file without a guard structure is not modified.""" + content = textwrap.dedent( + """\ + #pragma once + void foo(); + // end + """ + ) + f = _make_header(tmp_path, "phlex", "prag.hpp", content) + assert M.fix_header_guard(f, tmp_path) is False + + def test_only_ifndef_fixed(self, tmp_path: Path) -> None: + """Only the #ifndef line is replaced when it has the wrong macro.""" + content = textwrap.dedent( + """\ + #ifndef WRONG + #define PHLEX_OK_HPP + + // body + + #endif // PHLEX_OK_HPP + """ + ) + f = _make_header(tmp_path, "phlex", "ok.hpp", content) + M.fix_header_guard(f, tmp_path) + lines = f.read_text(encoding="utf-8").splitlines() + assert lines[0] == "#ifndef PHLEX_OK_HPP" + + def test_endif_line_fixed(self, tmp_path: Path) -> None: + """The #endif comment is updated when it has the wrong macro.""" + content = textwrap.dedent( + """\ + #ifndef PHLEX_END_HPP + #define PHLEX_END_HPP + + void foo(); + + #endif // OLD_MACRO + """ + ) + f = _make_header(tmp_path, "phlex", "end.hpp", content) + M.fix_header_guard(f, tmp_path) + last_line = f.read_text(encoding="utf-8").splitlines()[-1] + assert last_line == "#endif // PHLEX_END_HPP" + + def test_fix_preserves_body_content(self, tmp_path: Path) -> None: + """The body content between the guard and #endif is preserved unchanged.""" + content = textwrap.dedent( + """\ + #ifndef WRONG_GUARD + #define WRONG_GUARD + + class Foo { + int x; + }; + + #endif // WRONG_GUARD + """ + ) + f = _make_header(tmp_path, "phlex", "body.hpp", content) + M.fix_header_guard(f, tmp_path) + body = f.read_text(encoding="utf-8") + assert "class Foo {" in body + assert " int x;" in body + + +# --------------------------------------------------------------------------- +# main() — integration tests +# --------------------------------------------------------------------------- + + +class TestMain: + """Integration tests for M.main() (check and fix modes).""" + + # ------------------------------------------------------------------ + # Check mode + # ------------------------------------------------------------------ + + def test_check_mode_all_correct_returns_normally( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """All correct guards → main() returns without sys.exit.""" + guard = "PHLEX_GOOD_HPP" + _make_header(tmp_path, "phlex", "good.hpp", _correct_guard_content(guard)) + argv = ["prog", "--check", "--root", str(tmp_path), str(tmp_path / "phlex")] + with patch("sys.argv", argv): + M.main() # must not raise + assert "All header guards are correct" in capsys.readouterr().out + + def test_check_mode_wrong_guard_exits_one( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """A file with a wrong guard causes main() to sys.exit(1).""" + _make_header(tmp_path, "phlex", "bad.hpp", _wrong_guard_content("PHLEX_BAD_HPP")) + argv = ["prog", "--check", "--root", str(tmp_path), str(tmp_path / "phlex")] + with patch("sys.argv", argv): + with pytest.raises(SystemExit) as exc_info: + M.main() + assert exc_info.value.code == 1 + out = capsys.readouterr().out + assert "Found" in out + assert "bad.hpp" in out + + def test_check_mode_reports_expected_guard( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """The expected guard name is shown in the check-mode failure message.""" + _make_header(tmp_path, "phlex", "thing.hpp", _wrong_guard_content("PHLEX_THING_HPP")) + argv = ["prog", "--check", "--root", str(tmp_path), str(tmp_path / "phlex")] + with patch("sys.argv", argv): + with pytest.raises(SystemExit): + M.main() + assert "PHLEX_THING_HPP" in capsys.readouterr().out + + def test_check_mode_multiple_files( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Multiple files are checked; only the bad ones are reported.""" + guard_a = "PHLEX_A_HPP" + _make_header(tmp_path, "phlex", "a.hpp", _correct_guard_content(guard_a)) + _make_header(tmp_path, "phlex", "b.hpp", _wrong_guard_content("PHLEX_B_HPP")) + argv = ["prog", "--check", "--root", str(tmp_path), str(tmp_path / "phlex")] + with patch("sys.argv", argv): + with pytest.raises(SystemExit) as exc_info: + M.main() + assert exc_info.value.code == 1 + out = capsys.readouterr().out + assert "b.hpp" in out + assert "a.hpp" not in out + + def test_check_mode_skips_non_header_files( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Non-.hpp/.h files are skipped in directory scan mode.""" + phlex_dir = tmp_path / "phlex" + phlex_dir.mkdir() + (phlex_dir / "source.cpp").write_text("int x;\n", encoding="utf-8") + with patch("sys.argv", ["prog", "--check", "--root", str(tmp_path), str(phlex_dir)]): + M.main() # must not raise — .cpp is not checked + assert "All header guards are correct" in capsys.readouterr().out + + # ------------------------------------------------------------------ + # Fix mode + # ------------------------------------------------------------------ + + def test_fix_mode_fixes_wrong_guard( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Fix mode rewrites the wrong guard and prints the file name.""" + f = _make_header(tmp_path, "phlex", "fix_me.hpp", _wrong_guard_content("PHLEX_FIX_ME_HPP")) + with patch("sys.argv", ["prog", "--root", str(tmp_path), str(tmp_path / "phlex")]): + M.main() + content = f.read_text(encoding="utf-8") + assert "#ifndef PHLEX_FIX_ME_HPP" in content + assert "Fixed" in capsys.readouterr().out + + def test_fix_mode_nothing_to_fix( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Fix mode prints 'No header guards needed fixing.' when nothing changed.""" + guard = "PHLEX_CLEAN_HPP" + _make_header(tmp_path, "phlex", "clean.hpp", _correct_guard_content(guard)) + with patch("sys.argv", ["prog", "--root", str(tmp_path), str(tmp_path / "phlex")]): + M.main() + assert "No header guards needed fixing" in capsys.readouterr().out + + def test_fix_mode_multiple_directories( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Multiple directory arguments are all processed.""" + f1 = _make_header(tmp_path, "phlex", "p.hpp", _wrong_guard_content("PHLEX_P_HPP")) + f2 = _make_header(tmp_path, "plugins", "q.hpp", _wrong_guard_content("PLUGINS_Q_HPP")) + with patch( + "sys.argv", + [ + "prog", + "--root", + str(tmp_path), + str(tmp_path / "phlex"), + str(tmp_path / "plugins"), + ], + ): + M.main() + assert "#ifndef PHLEX_P_HPP" in f1.read_text(encoding="utf-8") + assert "#ifndef PLUGINS_Q_HPP" in f2.read_text(encoding="utf-8") + + def test_fix_mode_direct_file_argument( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """A direct file path argument (not a directory) is processed.""" + f = _make_header(tmp_path, "phlex", "direct.hpp", _wrong_guard_content("PHLEX_DIRECT_HPP")) + with patch("sys.argv", ["prog", "--root", str(tmp_path), str(f)]): + M.main() + assert "#ifndef PHLEX_DIRECT_HPP" in f.read_text(encoding="utf-8") + + # ------------------------------------------------------------------ + # Workflow-level invocation tests (matching CI commands) + # ------------------------------------------------------------------ + + def test_workflow_check_invocation_all_correct( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Simulate: python3 scripts/fix_header_guards.py --check --root . phlex plugins form.""" + for subdir, stem, ext in [ + ("phlex", "alpha", "hpp"), + ("plugins", "beta", "hpp"), + ("form", "gamma", "h"), + ]: + guard = f"{subdir.upper()}_{stem.upper()}_{ext.upper()}" + _make_header(tmp_path, subdir, f"{stem}.{ext}", _correct_guard_content(guard)) + + subdirs = [str(tmp_path / d) for d in ("phlex", "plugins", "form")] + with patch("sys.argv", ["prog", "--check", "--root", str(tmp_path)] + subdirs): + M.main() # must not raise + assert "All header guards are correct" in capsys.readouterr().out + + def test_workflow_fix_invocation( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Simulate: python3 scripts/fix_header_guards.py --root . phlex plugins form.""" + bad_files = [ + ("phlex", "x.hpp", "PHLEX_X_HPP"), + ("form", "y.h", "FORM_Y_H"), + ] + for subdir, fname, expected_guard in bad_files: + _make_header(tmp_path, subdir, fname, _wrong_guard_content(expected_guard)) + + plugins_dir = tmp_path / "plugins" + plugins_dir.mkdir() + subdirs = [str(tmp_path / d) for d in ("phlex", "plugins", "form")] + with patch("sys.argv", ["prog", "--root", str(tmp_path)] + subdirs): + M.main() + out = capsys.readouterr().out + assert "Fixed" in out diff --git a/scripts/test/test_sarif_alerts.py b/scripts/test/test_sarif_alerts.py new file mode 100644 index 000000000..ee2ba2741 --- /dev/null +++ b/scripts/test/test_sarif_alerts.py @@ -0,0 +1,513 @@ +"""Tests for sarif-alerts.py. + +The script is a user-callable CLI tool that reads one or more SARIF files (or +directories of SARIF files) and prints a human-readable summary. Tests cover: + +- _collect_sarif_paths: file vs directory expansion +- _process_sarif: level/baseline filtering, location extraction, message + truncation, error handling +- main(): argument parsing, missing files, directory input, exit codes +""" + +from __future__ import annotations + +import importlib.util +import json +import os +from collections.abc import Callable, Iterator +from pathlib import Path +from typing import Any + +import pytest + +# --------------------------------------------------------------------------- +# Import the module. The file is named "sarif-alerts.py" (with a hyphen), +# which is not a valid Python identifier, so importlib is required. +# sys.path is set up by scripts/test/conftest.py. +# --------------------------------------------------------------------------- +_spec = importlib.util.spec_from_file_location( + "sarif_alerts", Path(__file__).parent.parent / "sarif-alerts.py" +) +assert _spec is not None and _spec.loader is not None +sarif_alerts = importlib.util.module_from_spec(_spec) +# exec_module is defined on ExecutionLoader (a subclass of Loader); the assert +# above confirms loader is not None, and spec_from_file_location always returns +# a SourceFileLoader which does implement exec_module. +assert hasattr(_spec.loader, "exec_module") +_spec.loader.exec_module(sarif_alerts) # type: ignore[union-attr] + +# Typed aliases so Pylance can reason about call sites instead of treating +# these as Unknown (the module was loaded dynamically via importlib). +# pylint: disable=protected-access +# pylint: disable-next=line-too-long +_collect: Callable[[list[Path]], list[Path]] = sarif_alerts._collect_sarif_paths # type: ignore[attr-defined] +_process: Callable[..., Iterator[str]] = sarif_alerts._process_sarif # type: ignore[attr-defined] +# pylint: enable=protected-access +_main: Callable[[list[str] | None], int] = sarif_alerts.main # type: ignore[attr-defined] + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_result( + rule_id: str = "py/sql-injection", + level: str = "error", + baseline_state: str = "new", + uri: str = "src/app.py", + start_line: int | None = 10, + message: str = "Possible SQL injection.", +) -> dict[str, Any]: + region: dict[str, Any] = {} + if start_line is not None: + region["startLine"] = start_line + return { + "ruleId": rule_id, + "level": level, + "baselineState": baseline_state, + "message": {"text": message}, + "locations": [ + { + "physicalLocation": { + "artifactLocation": {"uri": uri}, + "region": region, + } + } + ], + } + + +def _make_sarif(results: list[dict[str, Any]]) -> dict[str, Any]: + return { + "version": "2.1.0", + "runs": [{"tool": {"driver": {"name": "CodeQL"}}, "results": results}], + } + + +def _write_sarif(path: Path, results: list[dict[str, Any]]) -> Path: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(_make_sarif(results)), encoding="utf-8") + return path + + +# --------------------------------------------------------------------------- +# _collect_sarif_paths +# --------------------------------------------------------------------------- + + +class TestCollectSarifPaths: + """Tests for TestCollectSarifPaths.""" + + def test_file_returned_as_is(self, tmp_path: Path) -> None: + """File returned as is.""" + f = tmp_path / "a.sarif" + f.write_text("{}", encoding="utf-8") + result = _collect([f]) + assert result == [f] + + def test_directory_expanded_to_sarif_files(self, tmp_path: Path) -> None: + """Directory expanded to sarif files.""" + (tmp_path / "a.sarif").write_text("{}", encoding="utf-8") + (tmp_path / "b.sarif").write_text("{}", encoding="utf-8") + (tmp_path / "note.txt").write_text("ignored", encoding="utf-8") + result = _collect([tmp_path]) + assert sorted(result) == sorted([tmp_path / "a.sarif", tmp_path / "b.sarif"]) + + def test_directory_recursive(self, tmp_path: Path) -> None: + """Directory recursive.""" + sub = tmp_path / "sub" + sub.mkdir() + (sub / "c.sarif").write_text("{}", encoding="utf-8") + result = _collect([tmp_path]) + assert tmp_path / "sub" / "c.sarif" in result + + def test_empty_directory_warns( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Empty directory warns.""" + _collect([tmp_path]) + assert "no .sarif files" in capsys.readouterr().err + + def test_mixed_files_and_dirs(self, tmp_path: Path) -> None: + """Mixed files and dirs.""" + f = tmp_path / "direct.sarif" + f.write_text("{}", encoding="utf-8") + sub = tmp_path / "sub" + sub.mkdir() + (sub / "nested.sarif").write_text("{}", encoding="utf-8") + result = _collect([f, sub]) + assert f in result + assert sub / "nested.sarif" in result + + def test_sorted_order_within_directory(self, tmp_path: Path) -> None: + """Sorted order within directory.""" + for name in ("c.sarif", "a.sarif", "b.sarif"): + (tmp_path / name).write_text("{}", encoding="utf-8") + result = _collect([tmp_path]) + assert result == sorted(result) + + +# --------------------------------------------------------------------------- +# _process_sarif +# --------------------------------------------------------------------------- + + +class TestProcessSarif: + """Tests for TestProcessSarif.""" + + def test_basic_result_formatted(self, tmp_path: Path) -> None: + """Basic result formatted.""" + f = _write_sarif(tmp_path / "r.sarif", [_make_result()]) + lines = list(_process(f)) + assert len(lines) == 1 + assert "py/sql-injection" in lines[0] + assert "error/new" in lines[0] + assert "src/app.py:10" in lines[0] + assert "Possible SQL injection." in lines[0] + + def test_uri_only_when_no_line(self, tmp_path: Path) -> None: + """Uri only when no line.""" + f = _write_sarif(tmp_path / "r.sarif", [_make_result(start_line=None)]) + lines = list(_process(f)) + assert "src/app.py" in lines[0] + assert ":None" not in lines[0] + + def test_no_locations_shows_unknown(self, tmp_path: Path) -> None: + """No locations shows unknown.""" + result = _make_result() + result["locations"] = [] + f = tmp_path / "r.sarif" + f.write_text(json.dumps(_make_sarif([result])), encoding="utf-8") + lines = list(_process(f)) + assert "(unknown location)" in lines[0] + + def test_min_level_filters_below_threshold(self, tmp_path: Path) -> None: + """Min level filters below threshold.""" + f = _write_sarif( + tmp_path / "r.sarif", + [ + _make_result(level="note"), + _make_result(level="warning"), + _make_result(level="error"), + ], + ) + lines = list(_process(f, min_level="warning")) + assert len(lines) == 2 + assert all("note" not in ln for ln in lines) + + def test_min_level_none_shows_all(self, tmp_path: Path) -> None: + """Min level none shows all.""" + f = _write_sarif( + tmp_path / "r.sarif", + [_make_result(level="none"), _make_result(level="note")], + ) + lines = list(_process(f, min_level="none")) + assert len(lines) == 2 + + def test_baseline_filter_exact(self, tmp_path: Path) -> None: + """Baseline filter exact.""" + f = _write_sarif( + tmp_path / "r.sarif", + [ + _make_result(baseline_state="new"), + _make_result(baseline_state="absent"), + _make_result(baseline_state="unchanged"), + ], + ) + lines = list(_process(f, baseline_filter={"new"})) + assert len(lines) == 1 + assert "new" in lines[0] + + def test_baseline_filter_multiple(self, tmp_path: Path) -> None: + """Baseline filter multiple.""" + f = _write_sarif( + tmp_path / "r.sarif", + [ + _make_result(baseline_state="new"), + _make_result(baseline_state="absent"), + _make_result(baseline_state="unchanged"), + ], + ) + lines = list(_process(f, baseline_filter={"new", "absent"})) + assert len(lines) == 2 + + def test_baseline_filter_none_shows_all(self, tmp_path: Path) -> None: + """Baseline filter none shows all.""" + f = _write_sarif( + tmp_path / "r.sarif", + [_make_result(baseline_state="new"), _make_result(baseline_state="unchanged")], + ) + lines = list(_process(f, baseline_filter=None)) + assert len(lines) == 2 + + def test_message_truncated(self, tmp_path: Path) -> None: + """Message truncated.""" + long_msg = "x" * 300 + f = _write_sarif(tmp_path / "r.sarif", [_make_result(message=long_msg)]) + lines = list(_process(f, max_message=50)) + msg_part = lines[0].split("— ", 1)[1] + assert len(msg_part) <= 50 + assert msg_part.endswith("…") + + def test_message_whitespace_collapsed(self, tmp_path: Path) -> None: + """Message whitespace collapsed.""" + f = _write_sarif(tmp_path / "r.sarif", [_make_result(message="a b\n c")]) + lines = list(_process(f)) + assert "a b c" in lines[0] + + def test_empty_results_yields_nothing(self, tmp_path: Path) -> None: + """Empty results yields nothing.""" + f = _write_sarif(tmp_path / "r.sarif", []) + assert not list(_process(f)) + + def test_multiple_runs_merged(self, tmp_path: Path) -> None: + """Multiple runs merged.""" + sarif = { + "version": "2.1.0", + "runs": [ + { + "tool": {"driver": {"name": "CodeQL"}}, + "results": [_make_result(rule_id="py/r1")], + }, + { + "tool": {"driver": {"name": "CodeQL"}}, + "results": [_make_result(rule_id="cpp/r2")], + }, + ], + } + f = tmp_path / "r.sarif" + f.write_text(json.dumps(sarif), encoding="utf-8") + lines = list(_process(f)) + rule_ids = {ln.split()[0] for ln in lines} + assert rule_ids == {"py/r1", "cpp/r2"} + + def test_invalid_json_raises_value_error(self, tmp_path: Path) -> None: + """Invalid json raises value error.""" + f = tmp_path / "bad.sarif" + f.write_text("{not valid json}", encoding="utf-8") + with pytest.raises(ValueError, match="Invalid JSON"): + list(_process(f)) + + def test_non_object_json_raises_value_error(self, tmp_path: Path) -> None: + """Non object json raises value error.""" + f = tmp_path / "bad.sarif" + f.write_text("[1, 2, 3]", encoding="utf-8") + with pytest.raises(ValueError, match="Not a SARIF document"): + list(_process(f)) + + @pytest.mark.skipif(os.getuid() == 0, reason="root bypasses file permission checks") + def test_unreadable_file_raises_oserror(self, tmp_path: Path) -> None: + """Unreadable file raises oserror.""" + f = tmp_path / "locked.sarif" + f.write_text("{}", encoding="utf-8") + f.chmod(0o000) + try: + with pytest.raises(OSError, match="Cannot read"): + list(_process(f)) + finally: + f.chmod(0o644) + + def test_missing_message_field(self, tmp_path: Path) -> None: + """Missing message field.""" + result = _make_result() + del result["message"] + f = tmp_path / "r.sarif" + f.write_text(json.dumps(_make_sarif([result])), encoding="utf-8") + lines = list(_process(f)) + # Should still yield a line without crashing; message portion is empty + assert len(lines) == 1 + assert "— " in lines[0] + + def test_missing_rule_id(self, tmp_path: Path) -> None: + """Missing rule id.""" + result = _make_result() + del result["ruleId"] + f = tmp_path / "r.sarif" + f.write_text(json.dumps(_make_sarif([result])), encoding="utf-8") + lines = list(_process(f)) + assert "" in lines[0] + + def test_missing_level_defaults_to_none(self, tmp_path: Path) -> None: + """Missing level defaults to none.""" + result = _make_result() + del result["level"] + f = tmp_path / "r.sarif" + f.write_text(json.dumps(_make_sarif([result])), encoding="utf-8") + lines = list(_process(f, min_level="none")) + assert "none/" in lines[0] + + def test_missing_baseline_defaults_to_unchanged(self, tmp_path: Path) -> None: + """Missing baseline defaults to unchanged.""" + result = _make_result() + del result["baselineState"] + f = tmp_path / "r.sarif" + f.write_text(json.dumps(_make_sarif([result])), encoding="utf-8") + lines = list(_process(f)) + assert "/unchanged" in lines[0] + + +# --------------------------------------------------------------------------- +# main() +# --------------------------------------------------------------------------- + + +class TestMain: + """Tests for TestMain.""" + + def _sarif_file(self, tmp_path: Path, results: list[dict]) -> Path: + return _write_sarif(tmp_path / "results.sarif", results) + + def test_returns_zero_on_success(self, tmp_path: Path) -> None: + """Returns zero on success.""" + f = self._sarif_file(tmp_path, [_make_result()]) + rc = _main([str(f)]) + assert rc == 0 + + def test_missing_file_returns_one( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Missing file returns one.""" + rc = _main([str(tmp_path / "nonexistent.sarif")]) + assert rc == 1 + assert "Error" in capsys.readouterr().err + + def test_bad_json_returns_one( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Bad json returns one.""" + f = tmp_path / "bad.sarif" + f.write_text("{bad json}", encoding="utf-8") + rc = _main([str(f)]) + assert rc == 1 + assert "Error" in capsys.readouterr().err + + def test_output_contains_result_line( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Output contains result line.""" + f = self._sarif_file(tmp_path, [_make_result(rule_id="py/sql-injection")]) + _main([str(f)]) + out = capsys.readouterr().out + assert "py/sql-injection" in out + + def test_total_line_printed(self, tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None: + """Total line printed.""" + f = self._sarif_file(tmp_path, [_make_result(), _make_result()]) + _main([str(f)]) + out = capsys.readouterr().out + assert "Total alerts: 2" in out + + def test_level_filter_applied( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Level filter applied.""" + f = self._sarif_file( + tmp_path, + [_make_result(level="note"), _make_result(level="error")], + ) + _main([str(f), "--level", "warning"]) + out = capsys.readouterr().out + assert "Total alerts: 1" in out + assert "note" not in out + + def test_baseline_filter_applied( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Baseline filter applied.""" + f = self._sarif_file( + tmp_path, + [_make_result(baseline_state="new"), _make_result(baseline_state="unchanged")], + ) + _main([str(f), "--baseline", "new"]) + out = capsys.readouterr().out + assert "Total alerts: 1" in out + assert "unchanged" not in out + + def test_baseline_filter_repeated( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Baseline filter repeated.""" + f = self._sarif_file( + tmp_path, + [ + _make_result(baseline_state="new"), + _make_result(baseline_state="absent"), + _make_result(baseline_state="unchanged"), + ], + ) + _main([str(f), "--baseline", "new", "--baseline", "absent"]) + out = capsys.readouterr().out + assert "Total alerts: 2" in out + + def test_directory_input(self, tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None: + """Directory input.""" + sarif_dir = tmp_path / "sarif" + _write_sarif(sarif_dir / "a.sarif", [_make_result(rule_id="py/r1")]) + _write_sarif(sarif_dir / "b.sarif", [_make_result(rule_id="py/r2")]) + rc = _main([str(sarif_dir)]) + assert rc == 0 + out = capsys.readouterr().out + assert "py/r1" in out + assert "py/r2" in out + assert "Total alerts: 2" in out + + def test_multiple_files(self, tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None: + """Multiple files.""" + f1 = _write_sarif(tmp_path / "a.sarif", [_make_result(rule_id="r1")]) + f2 = _write_sarif(tmp_path / "b.sarif", [_make_result(rule_id="r2")]) + _main([str(f1), str(f2)]) + out = capsys.readouterr().out + assert "Total alerts: 2" in out + + def test_header_line_printed_per_file( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Header line printed per file.""" + f = self._sarif_file(tmp_path, [_make_result()]) + _main([str(f)]) + out = capsys.readouterr().out + assert f"== {f} ==" in out + + def test_partial_failure_still_processes_good_files( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Partial failure still processes good files.""" + good = self._sarif_file(tmp_path, [_make_result(rule_id="good/rule")]) + bad = tmp_path / "bad.sarif" + bad.write_text("{broken}", encoding="utf-8") + rc = _main([str(bad), str(good)]) + assert rc == 1 # error due to bad file + out = capsys.readouterr().out + assert "good/rule" in out # good file was still processed + + def test_empty_sarif_shows_zero_total( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """Empty sarif shows zero total.""" + f = self._sarif_file(tmp_path, []) + _main([str(f)]) + assert "Total alerts: 0" in capsys.readouterr().out + + def test_max_message_arg(self, tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None: + """Max message arg.""" + long_msg = "y" * 300 + f = self._sarif_file(tmp_path, [_make_result(message=long_msg)]) + _main([str(f), "--max-message", "30"]) + out = capsys.readouterr().out + # Take only the result line (first line after the "— " separator) + msg_part = out.split("— ", 1)[1].splitlines()[0] + assert len(msg_part) <= 30 + assert msg_part.endswith("…") + + def test_sys_not_imported_lazily(self) -> None: + """`sys` must be imported at module level. + + Missing-file errors must work when main() is called programmatically + (not via the ``__main__`` guard). + """ + assert hasattr(sarif_alerts, "__file__") + src = Path(sarif_alerts.__file__ or "").read_text(encoding="utf-8") + # 'import sys' must appear before any function definition that uses it + import_pos = src.index("import sys") + main_pos = src.index("def main(") + assert import_pos < main_pos, "'import sys' must appear at module level before main()"