mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-06-12 22:24:56 +03:00
381 lines
13 KiB
Python
381 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
SAST Pipeline Orchestration Script
|
|
|
|
Runs CodeQL and Semgrep scans, aggregates SARIF results, evaluates quality gates,
|
|
and produces a consolidated report. Designed to be invoked from GitHub Actions
|
|
or any CI/CD platform.
|
|
|
|
Usage:
|
|
python process.py --repo-path /path/to/repo --output report.json
|
|
python process.py --repo-path . --severity-threshold high --fail-on-findings
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
|
|
@dataclass
|
|
class ScanFinding:
|
|
rule_id: str
|
|
severity: str
|
|
message: str
|
|
file_path: str
|
|
start_line: int
|
|
end_line: int
|
|
tool: str
|
|
cwe: str = ""
|
|
owasp: str = ""
|
|
fingerprint: str = ""
|
|
|
|
|
|
@dataclass
|
|
class ScanResult:
|
|
tool: str
|
|
findings: list = field(default_factory=list)
|
|
rules_evaluated: int = 0
|
|
scan_duration_seconds: float = 0.0
|
|
exit_code: int = 0
|
|
error_message: str = ""
|
|
|
|
|
|
SEVERITY_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3, "note": 4, "none": 5}
|
|
|
|
|
|
def run_semgrep(repo_path: str, config: str = "auto", extra_configs: Optional[list] = None) -> ScanResult:
|
|
"""Run Semgrep scan and return structured results."""
|
|
result = ScanResult(tool="semgrep")
|
|
sarif_output = os.path.join(repo_path, "semgrep-results.sarif")
|
|
|
|
cmd = [
|
|
"semgrep", "ci",
|
|
"--config", config,
|
|
"--sarif",
|
|
"--output", sarif_output,
|
|
"--json",
|
|
"--quiet"
|
|
]
|
|
|
|
if extra_configs:
|
|
for cfg in extra_configs:
|
|
cmd.extend(["--config", cfg])
|
|
|
|
start_time = datetime.now(timezone.utc)
|
|
|
|
try:
|
|
proc = subprocess.run(
|
|
cmd,
|
|
cwd=repo_path,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=600
|
|
)
|
|
result.exit_code = proc.returncode
|
|
|
|
if proc.returncode not in (0, 1):
|
|
result.error_message = proc.stderr[:500]
|
|
return result
|
|
|
|
except subprocess.TimeoutExpired:
|
|
result.error_message = "Semgrep scan timed out after 600 seconds"
|
|
result.exit_code = -1
|
|
return result
|
|
except FileNotFoundError:
|
|
result.error_message = "semgrep binary not found. Install with: pip install semgrep"
|
|
result.exit_code = -1
|
|
return result
|
|
|
|
result.scan_duration_seconds = (datetime.now(timezone.utc) - start_time).total_seconds()
|
|
|
|
if os.path.exists(sarif_output):
|
|
result.findings = parse_sarif(sarif_output, "semgrep")
|
|
with open(sarif_output, "r") as f:
|
|
sarif_data = json.load(f)
|
|
for run in sarif_data.get("runs", []):
|
|
result.rules_evaluated = len(run.get("tool", {}).get("driver", {}).get("rules", []))
|
|
|
|
return result
|
|
|
|
|
|
def run_codeql_query(repo_path: str, language: str, database_path: str) -> ScanResult:
|
|
"""Run CodeQL analysis on a pre-created database and return structured results."""
|
|
result = ScanResult(tool=f"codeql-{language}")
|
|
sarif_output = os.path.join(repo_path, f"codeql-{language}-results.sarif")
|
|
|
|
cmd = [
|
|
"codeql", "database", "analyze",
|
|
database_path,
|
|
f"codeql/{language}-queries:codeql-suites/{language}-security-extended.qls",
|
|
"--format=sarifv2.1.0",
|
|
f"--output={sarif_output}",
|
|
"--threads=0"
|
|
]
|
|
|
|
start_time = datetime.now(timezone.utc)
|
|
|
|
try:
|
|
proc = subprocess.run(
|
|
cmd,
|
|
cwd=repo_path,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=1200
|
|
)
|
|
result.exit_code = proc.returncode
|
|
|
|
if proc.returncode != 0:
|
|
result.error_message = proc.stderr[:500]
|
|
|
|
except subprocess.TimeoutExpired:
|
|
result.error_message = "CodeQL analysis timed out after 1200 seconds"
|
|
result.exit_code = -1
|
|
return result
|
|
except FileNotFoundError:
|
|
result.error_message = "codeql binary not found. Install from https://github.com/github/codeql-cli-binaries"
|
|
result.exit_code = -1
|
|
return result
|
|
|
|
result.scan_duration_seconds = (datetime.now(timezone.utc) - start_time).total_seconds()
|
|
|
|
if os.path.exists(sarif_output):
|
|
result.findings = parse_sarif(sarif_output, f"codeql-{language}")
|
|
with open(sarif_output, "r") as f:
|
|
sarif_data = json.load(f)
|
|
for run in sarif_data.get("runs", []):
|
|
result.rules_evaluated = len(run.get("tool", {}).get("driver", {}).get("rules", []))
|
|
|
|
return result
|
|
|
|
|
|
def parse_sarif(sarif_path: str, tool_name: str) -> list:
|
|
"""Parse a SARIF file and extract findings as ScanFinding objects."""
|
|
findings = []
|
|
|
|
with open(sarif_path, "r") as f:
|
|
sarif_data = json.load(f)
|
|
|
|
for run in sarif_data.get("runs", []):
|
|
rules_map = {}
|
|
for rule in run.get("tool", {}).get("driver", {}).get("rules", []):
|
|
rule_id = rule.get("id", "")
|
|
properties = rule.get("properties", {})
|
|
cwe_tags = [t for t in properties.get("tags", []) if t.startswith("CWE")]
|
|
owasp_tags = [t for t in properties.get("tags", []) if "owasp" in t.lower()]
|
|
rules_map[rule_id] = {
|
|
"cwe": cwe_tags[0] if cwe_tags else "",
|
|
"owasp": owasp_tags[0] if owasp_tags else "",
|
|
"severity": rule.get("defaultConfiguration", {}).get("level", "warning")
|
|
}
|
|
|
|
for result in run.get("results", []):
|
|
rule_id = result.get("ruleId", "unknown")
|
|
rule_info = rules_map.get(rule_id, {})
|
|
|
|
level = result.get("level", rule_info.get("severity", "warning"))
|
|
severity_map = {"error": "high", "warning": "medium", "note": "low", "none": "none"}
|
|
severity = severity_map.get(level, "medium")
|
|
|
|
security_severity = None
|
|
for rule in run.get("tool", {}).get("driver", {}).get("rules", []):
|
|
if rule.get("id") == rule_id:
|
|
security_severity = rule.get("properties", {}).get("security-severity")
|
|
break
|
|
|
|
if security_severity:
|
|
score = float(security_severity)
|
|
if score >= 9.0:
|
|
severity = "critical"
|
|
elif score >= 7.0:
|
|
severity = "high"
|
|
elif score >= 4.0:
|
|
severity = "medium"
|
|
else:
|
|
severity = "low"
|
|
|
|
locations = result.get("locations", [{}])
|
|
physical = locations[0].get("physicalLocation", {}) if locations else {}
|
|
artifact = physical.get("artifactLocation", {})
|
|
region = physical.get("region", {})
|
|
|
|
findings.append(ScanFinding(
|
|
rule_id=rule_id,
|
|
severity=severity,
|
|
message=result.get("message", {}).get("text", ""),
|
|
file_path=artifact.get("uri", "unknown"),
|
|
start_line=region.get("startLine", 0),
|
|
end_line=region.get("endLine", region.get("startLine", 0)),
|
|
tool=tool_name,
|
|
cwe=rule_info.get("cwe", ""),
|
|
owasp=rule_info.get("owasp", ""),
|
|
fingerprint=str(result.get("fingerprints", {}).get("primaryLocationLineHash", ""))
|
|
))
|
|
|
|
return findings
|
|
|
|
|
|
def evaluate_quality_gate(findings: list, severity_threshold: str) -> dict:
|
|
"""Evaluate quality gate based on finding severities."""
|
|
threshold_level = SEVERITY_ORDER.get(severity_threshold.lower(), 1)
|
|
|
|
blocking_findings = [
|
|
f for f in findings
|
|
if SEVERITY_ORDER.get(f.severity.lower(), 5) <= threshold_level
|
|
]
|
|
|
|
severity_counts = {}
|
|
for f in findings:
|
|
sev = f.severity.lower()
|
|
severity_counts[sev] = severity_counts.get(sev, 0) + 1
|
|
|
|
return {
|
|
"passed": len(blocking_findings) == 0,
|
|
"threshold": severity_threshold,
|
|
"total_findings": len(findings),
|
|
"blocking_findings": len(blocking_findings),
|
|
"severity_counts": severity_counts,
|
|
"blocking_details": [
|
|
{
|
|
"rule_id": f.rule_id,
|
|
"severity": f.severity,
|
|
"file": f.file_path,
|
|
"line": f.start_line,
|
|
"tool": f.tool,
|
|
"message": f.message[:200]
|
|
}
|
|
for f in blocking_findings
|
|
]
|
|
}
|
|
|
|
|
|
def generate_report(scan_results: list, quality_gate: dict, repo_path: str) -> dict:
|
|
"""Generate a consolidated SAST report."""
|
|
all_findings = []
|
|
for sr in scan_results:
|
|
all_findings.extend(sr.findings)
|
|
|
|
cwe_counts = {}
|
|
for f in all_findings:
|
|
if f.cwe:
|
|
cwe_counts[f.cwe] = cwe_counts.get(f.cwe, 0) + 1
|
|
|
|
report = {
|
|
"report_metadata": {
|
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
"repository": repo_path,
|
|
"report_version": "1.0.0"
|
|
},
|
|
"scan_summary": [
|
|
{
|
|
"tool": sr.tool,
|
|
"findings_count": len(sr.findings),
|
|
"rules_evaluated": sr.rules_evaluated,
|
|
"duration_seconds": sr.scan_duration_seconds,
|
|
"status": "success" if sr.exit_code in (0, 1) else "error",
|
|
"error": sr.error_message
|
|
}
|
|
for sr in scan_results
|
|
],
|
|
"quality_gate": quality_gate,
|
|
"top_cwes": sorted(cwe_counts.items(), key=lambda x: x[1], reverse=True)[:10],
|
|
"findings": [
|
|
{
|
|
"rule_id": f.rule_id,
|
|
"severity": f.severity,
|
|
"tool": f.tool,
|
|
"file": f.file_path,
|
|
"line": f.start_line,
|
|
"cwe": f.cwe,
|
|
"owasp": f.owasp,
|
|
"message": f.message[:300]
|
|
}
|
|
for f in sorted(all_findings, key=lambda x: SEVERITY_ORDER.get(x.severity.lower(), 5))
|
|
]
|
|
}
|
|
|
|
return report
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="SAST Pipeline Orchestration")
|
|
parser.add_argument("--repo-path", required=True, help="Path to the repository to scan")
|
|
parser.add_argument("--output", default="sast-report.json", help="Output report file path")
|
|
parser.add_argument("--severity-threshold", default="high",
|
|
choices=["critical", "high", "medium", "low"],
|
|
help="Minimum severity to block pipeline")
|
|
parser.add_argument("--fail-on-findings", action="store_true",
|
|
help="Exit with non-zero code if quality gate fails")
|
|
parser.add_argument("--semgrep-config", default="auto",
|
|
help="Semgrep configuration (default: auto)")
|
|
parser.add_argument("--semgrep-extra-configs", nargs="*",
|
|
help="Additional Semgrep config paths")
|
|
parser.add_argument("--skip-semgrep", action="store_true", help="Skip Semgrep scan")
|
|
parser.add_argument("--skip-codeql", action="store_true", help="Skip CodeQL scan")
|
|
parser.add_argument("--codeql-language", default=None, help="Language for CodeQL analysis")
|
|
parser.add_argument("--codeql-db-path", default=None, help="Path to CodeQL database")
|
|
parser.add_argument("--sarif-only", nargs="*",
|
|
help="Only parse existing SARIF files instead of running scans")
|
|
args = parser.parse_args()
|
|
|
|
repo_path = os.path.abspath(args.repo_path)
|
|
scan_results = []
|
|
|
|
if args.sarif_only:
|
|
for sarif_file in args.sarif_only:
|
|
tool_name = Path(sarif_file).stem
|
|
findings = parse_sarif(sarif_file, tool_name)
|
|
sr = ScanResult(tool=tool_name, findings=findings)
|
|
scan_results.append(sr)
|
|
else:
|
|
if not args.skip_semgrep:
|
|
print("[*] Running Semgrep scan...")
|
|
semgrep_result = run_semgrep(
|
|
repo_path,
|
|
config=args.semgrep_config,
|
|
extra_configs=args.semgrep_extra_configs
|
|
)
|
|
scan_results.append(semgrep_result)
|
|
print(f" Found {len(semgrep_result.findings)} findings in {semgrep_result.scan_duration_seconds:.1f}s")
|
|
|
|
if semgrep_result.error_message:
|
|
print(f" Warning: {semgrep_result.error_message}")
|
|
|
|
if not args.skip_codeql and args.codeql_language and args.codeql_db_path:
|
|
print(f"[*] Running CodeQL analysis for {args.codeql_language}...")
|
|
codeql_result = run_codeql_query(repo_path, args.codeql_language, args.codeql_db_path)
|
|
scan_results.append(codeql_result)
|
|
print(f" Found {len(codeql_result.findings)} findings in {codeql_result.scan_duration_seconds:.1f}s")
|
|
|
|
all_findings = []
|
|
for sr in scan_results:
|
|
all_findings.extend(sr.findings)
|
|
|
|
quality_gate = evaluate_quality_gate(all_findings, args.severity_threshold)
|
|
|
|
report = generate_report(scan_results, quality_gate, repo_path)
|
|
|
|
output_path = os.path.abspath(args.output)
|
|
with open(output_path, "w") as f:
|
|
json.dump(report, f, indent=2)
|
|
print(f"\n[*] Report written to {output_path}")
|
|
|
|
if quality_gate["passed"]:
|
|
print(f"[PASS] Quality gate passed. {quality_gate['total_findings']} findings, none blocking.")
|
|
else:
|
|
print(f"[FAIL] Quality gate failed. {quality_gate['blocking_findings']} blocking findings:")
|
|
for detail in quality_gate["blocking_details"]:
|
|
print(f" - [{detail['severity'].upper()}] {detail['rule_id']} in {detail['file']}:{detail['line']}")
|
|
|
|
if args.fail_on_findings and not quality_gate["passed"]:
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|