#!/usr/bin/env python3 """Agent for running Semgrep SAST scans and generating SARIF for GitHub Advanced Security.""" import subprocess import json import argparse import sys import os def run_semgrep_scan(target_dir, config="auto", output_format="json"): """Run Semgrep SAST scan on a code directory.""" print(f"[*] Running Semgrep scan on {target_dir} (config={config})...") cmd = ["semgrep", "scan", "--config", config, "--json", "--no-git", target_dir] result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) if result.returncode not in (0, 1): print(f" [-] Semgrep error: {result.stderr[:300]}") return {} data = json.loads(result.stdout) if result.stdout else {} results = data.get("results", []) errors = data.get("errors", []) print(f" Findings: {len(results)}, Errors: {len(errors)}") return data def parse_semgrep_results(scan_data): """Parse Semgrep findings into structured format.""" results = scan_data.get("results", []) findings = [] severity_counts = {} for r in results: severity = r.get("extra", {}).get("severity", "WARNING") finding = { "rule_id": r.get("check_id", "unknown"), "message": r.get("extra", {}).get("message", ""), "severity": severity, "file": r.get("path", ""), "line": r.get("start", {}).get("line", 0), "cwe": r.get("extra", {}).get("metadata", {}).get("cwe", []), "owasp": r.get("extra", {}).get("metadata", {}).get("owasp", []), } findings.append(finding) severity_counts[severity] = severity_counts.get(severity, 0) + 1 print(f"\n[*] Severity breakdown:") for sev, count in sorted(severity_counts.items()): print(f" {sev}: {count}") return findings def generate_sarif(findings, output_path): """Convert Semgrep findings to SARIF 2.1.0 format.""" rules, results = [], [] seen_rules = set() for f in findings: rid = f["rule_id"] if rid not in seen_rules: rules.append({"id": rid, "shortDescription": {"text": f["message"][:200]}, "defaultConfiguration": {"level": "warning"}}) seen_rules.add(rid) results.append({ "ruleId": rid, "message": {"text": f["message"][:500]}, "level": "error" if f["severity"] == "ERROR" else "warning", "locations": [{"physicalLocation": { "artifactLocation": {"uri": f["file"]}, "region": {"startLine": f["line"]}}}], }) sarif = {"$schema": "https://json.schemastore.org/sarif-2.1.0.json", "version": "2.1.0", "runs": [{"tool": {"driver": {"name": "Semgrep", "rules": rules}}, "results": results}]} with open(output_path, "w") as f: json.dump(sarif, f, indent=2) print(f"[*] SARIF report: {output_path}") def apply_quality_gate(findings, fail_on="ERROR"): """Apply quality gate based on severity threshold.""" severity_order = {"INFO": 0, "WARNING": 1, "ERROR": 2} threshold = severity_order.get(fail_on, 2) blocking = [f for f in findings if severity_order.get(f["severity"], 0) >= threshold] if blocking: print(f"\n[!] QUALITY GATE FAILED: {len(blocking)} findings at {fail_on}+") for b in blocking[:5]: print(f" {b['file']}:{b['line']} - {b['rule_id']}") return False print(f"\n[+] Quality gate passed (threshold: {fail_on})") return True def generate_github_actions_workflow(config="auto"): """Generate a GitHub Actions SAST workflow YAML.""" workflow = f"""name: SAST Scan on: pull_request: branches: [main] push: branches: [main] jobs: semgrep: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: returntocorp/semgrep-action@v1 with: config: {config} generateSarif: "1" - uses: github/codeql-action/upload-sarif@v3 with: sarif_file: semgrep.sarif if: always() """ print("[*] Generated GitHub Actions workflow:") print(workflow) return workflow def main(): parser = argparse.ArgumentParser(description="SAST GitHub Actions Pipeline Agent") parser.add_argument("action", choices=["scan", "parse", "sarif", "gate", "gen-workflow"]) parser.add_argument("--target", default=".", help="Directory to scan") parser.add_argument("--config", default="auto", help="Semgrep config (auto, p/ci, p/owasp)") parser.add_argument("--report", help="Existing Semgrep JSON report") parser.add_argument("--fail-on", default="ERROR", choices=["INFO", "WARNING", "ERROR"]) parser.add_argument("-o", "--output", default=".") args = parser.parse_args() os.makedirs(args.output, exist_ok=True) if args.action == "scan": data = run_semgrep_scan(args.target, args.config) findings = parse_semgrep_results(data) generate_sarif(findings, os.path.join(args.output, "semgrep.sarif")) apply_quality_gate(findings, args.fail_on) elif args.action == "parse": with open(args.report) as f: data = json.load(f) parse_semgrep_results(data) elif args.action == "sarif": with open(args.report) as f: data = json.load(f) findings = parse_semgrep_results(data) generate_sarif(findings, os.path.join(args.output, "semgrep.sarif")) elif args.action == "gate": with open(args.report) as f: data = json.load(f) findings = parse_semgrep_results(data) passed = apply_quality_gate(findings, args.fail_on) sys.exit(0 if passed else 1) elif args.action == "gen-workflow": generate_github_actions_workflow(args.config) if __name__ == "__main__": main()