Files
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

153 lines
5.7 KiB
Python

#!/usr/bin/env python3
"""Agent for running Semgrep SAST scans and generating SARIF for GitHub Advanced Security."""
import subprocess
import json
import argparse
import sys
import os
def run_semgrep_scan(target_dir, config="auto", output_format="json"):
"""Run Semgrep SAST scan on a code directory."""
print(f"[*] Running Semgrep scan on {target_dir} (config={config})...")
cmd = ["semgrep", "scan", "--config", config, "--json", "--no-git", target_dir]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
if result.returncode not in (0, 1):
print(f" [-] Semgrep error: {result.stderr[:300]}")
return {}
data = json.loads(result.stdout) if result.stdout else {}
results = data.get("results", [])
errors = data.get("errors", [])
print(f" Findings: {len(results)}, Errors: {len(errors)}")
return data
def parse_semgrep_results(scan_data):
"""Parse Semgrep findings into structured format."""
results = scan_data.get("results", [])
findings = []
severity_counts = {}
for r in results:
severity = r.get("extra", {}).get("severity", "WARNING")
finding = {
"rule_id": r.get("check_id", "unknown"),
"message": r.get("extra", {}).get("message", ""),
"severity": severity,
"file": r.get("path", ""),
"line": r.get("start", {}).get("line", 0),
"cwe": r.get("extra", {}).get("metadata", {}).get("cwe", []),
"owasp": r.get("extra", {}).get("metadata", {}).get("owasp", []),
}
findings.append(finding)
severity_counts[severity] = severity_counts.get(severity, 0) + 1
print(f"\n[*] Severity breakdown:")
for sev, count in sorted(severity_counts.items()):
print(f" {sev}: {count}")
return findings
def generate_sarif(findings, output_path):
"""Convert Semgrep findings to SARIF 2.1.0 format."""
rules, results = [], []
seen_rules = set()
for f in findings:
rid = f["rule_id"]
if rid not in seen_rules:
rules.append({"id": rid, "shortDescription": {"text": f["message"][:200]},
"defaultConfiguration": {"level": "warning"}})
seen_rules.add(rid)
results.append({
"ruleId": rid, "message": {"text": f["message"][:500]},
"level": "error" if f["severity"] == "ERROR" else "warning",
"locations": [{"physicalLocation": {
"artifactLocation": {"uri": f["file"]},
"region": {"startLine": f["line"]}}}],
})
sarif = {"$schema": "https://json.schemastore.org/sarif-2.1.0.json", "version": "2.1.0",
"runs": [{"tool": {"driver": {"name": "Semgrep", "rules": rules}},
"results": results}]}
with open(output_path, "w") as f:
json.dump(sarif, f, indent=2)
print(f"[*] SARIF report: {output_path}")
def apply_quality_gate(findings, fail_on="ERROR"):
"""Apply quality gate based on severity threshold."""
severity_order = {"INFO": 0, "WARNING": 1, "ERROR": 2}
threshold = severity_order.get(fail_on, 2)
blocking = [f for f in findings if severity_order.get(f["severity"], 0) >= threshold]
if blocking:
print(f"\n[!] QUALITY GATE FAILED: {len(blocking)} findings at {fail_on}+")
for b in blocking[:5]:
print(f" {b['file']}:{b['line']} - {b['rule_id']}")
return False
print(f"\n[+] Quality gate passed (threshold: {fail_on})")
return True
def generate_github_actions_workflow(config="auto"):
"""Generate a GitHub Actions SAST workflow YAML."""
workflow = f"""name: SAST Scan
on:
pull_request:
branches: [main]
push:
branches: [main]
jobs:
semgrep:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: returntocorp/semgrep-action@v1
with:
config: {config}
generateSarif: "1"
- uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: semgrep.sarif
if: always()
"""
print("[*] Generated GitHub Actions workflow:")
print(workflow)
return workflow
def main():
parser = argparse.ArgumentParser(description="SAST GitHub Actions Pipeline Agent")
parser.add_argument("action", choices=["scan", "parse", "sarif", "gate", "gen-workflow"])
parser.add_argument("--target", default=".", help="Directory to scan")
parser.add_argument("--config", default="auto", help="Semgrep config (auto, p/ci, p/owasp)")
parser.add_argument("--report", help="Existing Semgrep JSON report")
parser.add_argument("--fail-on", default="ERROR", choices=["INFO", "WARNING", "ERROR"])
parser.add_argument("-o", "--output", default=".")
args = parser.parse_args()
os.makedirs(args.output, exist_ok=True)
if args.action == "scan":
data = run_semgrep_scan(args.target, args.config)
findings = parse_semgrep_results(data)
generate_sarif(findings, os.path.join(args.output, "semgrep.sarif"))
apply_quality_gate(findings, args.fail_on)
elif args.action == "parse":
with open(args.report) as f:
data = json.load(f)
parse_semgrep_results(data)
elif args.action == "sarif":
with open(args.report) as f:
data = json.load(f)
findings = parse_semgrep_results(data)
generate_sarif(findings, os.path.join(args.output, "semgrep.sarif"))
elif args.action == "gate":
with open(args.report) as f:
data = json.load(f)
findings = parse_semgrep_results(data)
passed = apply_quality_gate(findings, args.fail_on)
sys.exit(0 if passed else 1)
elif args.action == "gen-workflow":
generate_github_actions_workflow(args.config)
if __name__ == "__main__":
main()