Files
mukul975 27c6414ca5 Add folder anatomy (scripts/agent.py + references/api-reference.md) for 648 cybersecurity skills
Complete skill folder anatomy across all cybersecurity skills:
- scripts/agent.py: 80-150 line Python agents using real libraries (impacket,
  boto3, azure-mgmt-*, kubernetes, pefile, yara, scapy, shodan, stix2, etc.)
- references/api-reference.md: real API documentation with method signatures
- LICENSE: MIT license for all skill folders
2026-03-10 21:02:12 +01:00

154 lines
5.7 KiB
Python

#!/usr/bin/env python3
"""Agent for running Semgrep SAST scans and generating SARIF for GitHub Advanced Security."""
import subprocess
import json
import argparse
import sys
import os
from datetime import datetime
def run_semgrep_scan(target_dir, config="auto", output_format="json"):
"""Run Semgrep SAST scan on a code directory."""
print(f"[*] Running Semgrep scan on {target_dir} (config={config})...")
cmd = ["semgrep", "scan", "--config", config, "--json", "--no-git", target_dir]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
if result.returncode not in (0, 1):
print(f" [-] Semgrep error: {result.stderr[:300]}")
return {}
data = json.loads(result.stdout) if result.stdout else {}
results = data.get("results", [])
errors = data.get("errors", [])
print(f" Findings: {len(results)}, Errors: {len(errors)}")
return data
def parse_semgrep_results(scan_data):
"""Parse Semgrep findings into structured format."""
results = scan_data.get("results", [])
findings = []
severity_counts = {}
for r in results:
severity = r.get("extra", {}).get("severity", "WARNING")
finding = {
"rule_id": r.get("check_id", "unknown"),
"message": r.get("extra", {}).get("message", ""),
"severity": severity,
"file": r.get("path", ""),
"line": r.get("start", {}).get("line", 0),
"cwe": r.get("extra", {}).get("metadata", {}).get("cwe", []),
"owasp": r.get("extra", {}).get("metadata", {}).get("owasp", []),
}
findings.append(finding)
severity_counts[severity] = severity_counts.get(severity, 0) + 1
print(f"\n[*] Severity breakdown:")
for sev, count in sorted(severity_counts.items()):
print(f" {sev}: {count}")
return findings
def generate_sarif(findings, output_path):
"""Convert Semgrep findings to SARIF 2.1.0 format."""
rules, results = [], []
seen_rules = set()
for f in findings:
rid = f["rule_id"]
if rid not in seen_rules:
rules.append({"id": rid, "shortDescription": {"text": f["message"][:200]},
"defaultConfiguration": {"level": "warning"}})
seen_rules.add(rid)
results.append({
"ruleId": rid, "message": {"text": f["message"][:500]},
"level": "error" if f["severity"] == "ERROR" else "warning",
"locations": [{"physicalLocation": {
"artifactLocation": {"uri": f["file"]},
"region": {"startLine": f["line"]}}}],
})
sarif = {"$schema": "https://json.schemastore.org/sarif-2.1.0.json", "version": "2.1.0",
"runs": [{"tool": {"driver": {"name": "Semgrep", "rules": rules}},
"results": results}]}
with open(output_path, "w") as f:
json.dump(sarif, f, indent=2)
print(f"[*] SARIF report: {output_path}")
def apply_quality_gate(findings, fail_on="ERROR"):
"""Apply quality gate based on severity threshold."""
severity_order = {"INFO": 0, "WARNING": 1, "ERROR": 2}
threshold = severity_order.get(fail_on, 2)
blocking = [f for f in findings if severity_order.get(f["severity"], 0) >= threshold]
if blocking:
print(f"\n[!] QUALITY GATE FAILED: {len(blocking)} findings at {fail_on}+")
for b in blocking[:5]:
print(f" {b['file']}:{b['line']} - {b['rule_id']}")
return False
print(f"\n[+] Quality gate passed (threshold: {fail_on})")
return True
def generate_github_actions_workflow(config="auto"):
"""Generate a GitHub Actions SAST workflow YAML."""
workflow = f"""name: SAST Scan
on:
pull_request:
branches: [main]
push:
branches: [main]
jobs:
semgrep:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: returntocorp/semgrep-action@v1
with:
config: {config}
generateSarif: "1"
- uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: semgrep.sarif
if: always()
"""
print("[*] Generated GitHub Actions workflow:")
print(workflow)
return workflow
def main():
parser = argparse.ArgumentParser(description="SAST GitHub Actions Pipeline Agent")
parser.add_argument("action", choices=["scan", "parse", "sarif", "gate", "gen-workflow"])
parser.add_argument("--target", default=".", help="Directory to scan")
parser.add_argument("--config", default="auto", help="Semgrep config (auto, p/ci, p/owasp)")
parser.add_argument("--report", help="Existing Semgrep JSON report")
parser.add_argument("--fail-on", default="ERROR", choices=["INFO", "WARNING", "ERROR"])
parser.add_argument("-o", "--output", default=".")
args = parser.parse_args()
os.makedirs(args.output, exist_ok=True)
if args.action == "scan":
data = run_semgrep_scan(args.target, args.config)
findings = parse_semgrep_results(data)
generate_sarif(findings, os.path.join(args.output, "semgrep.sarif"))
apply_quality_gate(findings, args.fail_on)
elif args.action == "parse":
with open(args.report) as f:
data = json.load(f)
parse_semgrep_results(data)
elif args.action == "sarif":
with open(args.report) as f:
data = json.load(f)
findings = parse_semgrep_results(data)
generate_sarif(findings, os.path.join(args.output, "semgrep.sarif"))
elif args.action == "gate":
with open(args.report) as f:
data = json.load(f)
findings = parse_semgrep_results(data)
passed = apply_quality_gate(findings, args.fail_on)
sys.exit(0 if passed else 1)
elif args.action == "gen-workflow":
generate_github_actions_workflow(args.config)
if __name__ == "__main__":
main()