Anthropic-Cybersecurity-Skills/skills/integrating-sast-into-github-actions-pipeline/scripts/agent.py

#!/usr/bin/env python3
"""Agent for running Semgrep SAST scans and generating SARIF for GitHub Advanced Security."""

import subprocess
import json
import argparse
import sys
import os


def run_semgrep_scan(target_dir, config="auto", output_format="json"):
    """Run Semgrep SAST scan on a code directory."""
    print(f"[*] Running Semgrep scan on {target_dir} (config={config})...")
    cmd = ["semgrep", "scan", "--config", config, "--json", "--no-git", target_dir]
    result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
    if result.returncode not in (0, 1):
        print(f"  [-] Semgrep error: {result.stderr[:300]}")
        return {}
    data = json.loads(result.stdout) if result.stdout else {}
    results = data.get("results", [])
    errors = data.get("errors", [])
    print(f"  Findings: {len(results)}, Errors: {len(errors)}")
    return data


def parse_semgrep_results(scan_data):
    """Parse Semgrep findings into structured format."""
    results = scan_data.get("results", [])
    findings = []
    severity_counts = {}
    for r in results:
        severity = r.get("extra", {}).get("severity", "WARNING")
        finding = {
            "rule_id": r.get("check_id", "unknown"),
            "message": r.get("extra", {}).get("message", ""),
            "severity": severity,
            "file": r.get("path", ""),
            "line": r.get("start", {}).get("line", 0),
            "cwe": r.get("extra", {}).get("metadata", {}).get("cwe", []),
            "owasp": r.get("extra", {}).get("metadata", {}).get("owasp", []),
        }
        findings.append(finding)
        severity_counts[severity] = severity_counts.get(severity, 0) + 1
    print(f"\n[*] Severity breakdown:")
    for sev, count in sorted(severity_counts.items()):
        print(f"  {sev}: {count}")
    return findings


def generate_sarif(findings, output_path):
    """Convert Semgrep findings to SARIF 2.1.0 format."""
    rules, results = [], []
    seen_rules = set()
    for f in findings:
        rid = f["rule_id"]
        if rid not in seen_rules:
            rules.append({"id": rid, "shortDescription": {"text": f["message"][:200]},
                           "defaultConfiguration": {"level": "warning"}})
            seen_rules.add(rid)
        results.append({
            "ruleId": rid, "message": {"text": f["message"][:500]},
            "level": "error" if f["severity"] == "ERROR" else "warning",
            "locations": [{"physicalLocation": {
                "artifactLocation": {"uri": f["file"]},
                "region": {"startLine": f["line"]}}}],
        })
    sarif = {"$schema": "https://json.schemastore.org/sarif-2.1.0.json", "version": "2.1.0",
             "runs": [{"tool": {"driver": {"name": "Semgrep", "rules": rules}},
                        "results": results}]}
    with open(output_path, "w") as f:
        json.dump(sarif, f, indent=2)
    print(f"[*] SARIF report: {output_path}")


def apply_quality_gate(findings, fail_on="ERROR"):
    """Apply quality gate based on severity threshold."""
    severity_order = {"INFO": 0, "WARNING": 1, "ERROR": 2}
    threshold = severity_order.get(fail_on, 2)
    blocking = [f for f in findings if severity_order.get(f["severity"], 0) >= threshold]
    if blocking:
        print(f"\n[!] QUALITY GATE FAILED: {len(blocking)} findings at {fail_on}+")
        for b in blocking[:5]:
            print(f"  {b['file']}:{b['line']} - {b['rule_id']}")
        return False
    print(f"\n[+] Quality gate passed (threshold: {fail_on})")
    return True


def generate_github_actions_workflow(config="auto"):
    """Generate a GitHub Actions SAST workflow YAML."""
    workflow = f"""name: SAST Scan
on:
  pull_request:
    branches: [main]
  push:
    branches: [main]
jobs:
  semgrep:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: returntocorp/semgrep-action@v1
        with:
          config: {config}
          generateSarif: "1"
      - uses: github/codeql-action/upload-sarif@v3
        with:
          sarif_file: semgrep.sarif
        if: always()
"""
    print("[*] Generated GitHub Actions workflow:")
    print(workflow)
    return workflow


def main():
    parser = argparse.ArgumentParser(description="SAST GitHub Actions Pipeline Agent")
    parser.add_argument("action", choices=["scan", "parse", "sarif", "gate", "gen-workflow"])
    parser.add_argument("--target", default=".", help="Directory to scan")
    parser.add_argument("--config", default="auto", help="Semgrep config (auto, p/ci, p/owasp)")
    parser.add_argument("--report", help="Existing Semgrep JSON report")
    parser.add_argument("--fail-on", default="ERROR", choices=["INFO", "WARNING", "ERROR"])
    parser.add_argument("-o", "--output", default=".")
    args = parser.parse_args()

    os.makedirs(args.output, exist_ok=True)
    if args.action == "scan":
        data = run_semgrep_scan(args.target, args.config)
        findings = parse_semgrep_results(data)
        generate_sarif(findings, os.path.join(args.output, "semgrep.sarif"))
        apply_quality_gate(findings, args.fail_on)
    elif args.action == "parse":
        with open(args.report) as f:
            data = json.load(f)
        parse_semgrep_results(data)
    elif args.action == "sarif":
        with open(args.report) as f:
            data = json.load(f)
        findings = parse_semgrep_results(data)
        generate_sarif(findings, os.path.join(args.output, "semgrep.sarif"))
    elif args.action == "gate":
        with open(args.report) as f:
            data = json.load(f)
        findings = parse_semgrep_results(data)
        passed = apply_quality_gate(findings, args.fail_on)
        sys.exit(0 if passed else 1)
    elif args.action == "gen-workflow":
        generate_github_actions_workflow(args.config)


if __name__ == "__main__":
    main()