#!/usr/bin/env python3 """Security audit scanner for common vulnerabilities. Scans source files for hardcoded secrets, eval() usage, SQL string concatenation, and sensitive data in console output. Outputs JSON. Usage: python security-audit.py ./src python security-audit.py ./src --severity high --format pretty """ import argparse import json import os import re import sys from dataclasses import asdict, dataclass, field from pathlib import Path SCAN_EXTENSIONS = { ".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".go", ".rb", ".php", ".env", ".yaml", ".yml", ".toml", ".json", } SKIP_DIRS = { "node_modules", ".git", "__pycache__", ".venv", "venv", "dist", "build", ".next", ".nuxt", "vendor", } @dataclass class Finding: file: str line: int rule: str severity: str message: str snippet: str @dataclass class AuditReport: scanned_files: int = 0 findings: list = field(default_factory=list) summary: dict = field(default_factory=dict) # --- Detection Rules --- SECRET_PATTERNS = [ (r'(?i)(api[_-]?key|apikey)\s*[=:]\s*["\'][A-Za-z0-9_\-]{16,}["\']', "Possible API key"), (r'(?i)(secret|password|passwd|pwd)\s*[=:]\s*["\'][^"\']{8,}["\']', "Possible hardcoded secret"), (r'(?i)(aws_access_key_id|aws_secret_access_key)\s*[=:]\s*["\'][^"\']+["\']', "AWS credential"), (r'(?i)bearer\s+[A-Za-z0-9_\-\.]{20,}', "Possible bearer token"), (r'(?i)(ghp_|gho_|github_pat_)[A-Za-z0-9_]{20,}', "GitHub token"), (r'(?i)(sk-|pk_live_|pk_test_|sk_live_|sk_test_)[A-Za-z0-9]{20,}', "API secret key"), (r'-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----', "Private key in source"), ] EVAL_PATTERNS = [ (r'\beval\s*\(', "eval() usage detected"), (r'\bexec\s*\(', "exec() usage detected (Python)"), (r'new\s+Function\s*\(', "new Function() usage (dynamic code)"), (r'\bchild_process\.exec\s*\(', "child_process.exec (command injection risk)"), (r'subprocess\.call\s*\([^)]*shell\s*=\s*True', "subprocess with shell=True"), (r'os\.system\s*\(', "os.system() usage (command injection risk)"), ] SQL_PATTERNS = [ (r'(?i)(SELECT|INSERT|UPDATE|DELETE|DROP)\s+.*([\+]|\.format\(|f["\']|%\s)', "SQL string concatenation"), (r'(?i)execute\s*\(\s*f["\']', "SQL f-string in execute()"), (r'(?i)\.query\s*\(\s*`[^`]*\$\{', "SQL template literal injection"), (r'(?i)\.raw\s*\(\s*f["\']', "Raw SQL with f-string"), ] SENSITIVE_LOG_PATTERNS = [ (r'console\.log\s*\(.*(?i)(password|secret|token|key|credential)', "Sensitive data in console.log"), (r'print\s*\(.*(?i)(password|secret|token|key|credential)', "Sensitive data in print()"), (r'logger?\.(info|debug|warn)\s*\(.*(?i)(password|secret|token)', "Sensitive data in logger"), ] RULES = [ ("hardcoded-secret", "high", SECRET_PATTERNS), ("dangerous-eval", "high", EVAL_PATTERNS), ("sql-injection", "high", SQL_PATTERNS), ("sensitive-logging", "medium", SENSITIVE_LOG_PATTERNS), ] def should_scan(path: Path) -> bool: if path.suffix not in SCAN_EXTENSIONS: return False for part in path.parts: if part in SKIP_DIRS: return False return True def scan_file(filepath: Path) -> list[Finding]: findings = [] try: content = filepath.read_text(encoding="utf-8", errors="ignore") except (OSError, PermissionError): return findings lines = content.splitlines() for line_num, line in enumerate(lines, start=1): stripped = line.strip() if stripped.startswith(("#", "//", "*", "/*")): continue for rule_name, severity, patterns in RULES: for pattern, message in patterns: if re.search(pattern, line): findings.append(Finding( file=str(filepath), line=line_num, rule=rule_name, severity=severity, message=message, snippet=line.strip()[:120], )) return findings def scan_directory(target: Path, severity_filter: str | None = None) -> AuditReport: report = AuditReport() severity_order = {"high": 3, "medium": 2, "low": 1} min_severity = severity_order.get(severity_filter, 0) if severity_filter else 0 for root, dirs, files in os.walk(target): dirs[:] = [d for d in dirs if d not in SKIP_DIRS] for fname in files: fpath = Path(root) / fname if not should_scan(fpath): continue report.scanned_files += 1 for finding in scan_file(fpath): if severity_order.get(finding.severity, 0) >= min_severity: report.findings.append(finding) report.summary = { "total": len(report.findings), "high": sum(1 for f in report.findings if f.severity == "high"), "medium": sum(1 for f in report.findings if f.severity == "medium"), "low": sum(1 for f in report.findings if f.severity == "low"), "by_rule": {}, } for f in report.findings: report.summary["by_rule"][f.rule] = report.summary["by_rule"].get(f.rule, 0) + 1 return report def main(): parser = argparse.ArgumentParser( description="Scan source files for common security issues.", epilog="Example: python security-audit.py ./src --severity high", ) parser.add_argument("target", help="Directory or file to scan") parser.add_argument( "--severity", choices=["low", "medium", "high"], help="Minimum severity to report (default: all)", ) parser.add_argument( "--format", choices=["json", "pretty"], default="json", help="Output format (default: json)", ) args = parser.parse_args() target = Path(args.target) if not target.exists(): print(f"Error: {target} does not exist", file=sys.stderr) sys.exit(1) report = scan_directory(target, args.severity) output = { "scanned_files": report.scanned_files, "summary": report.summary, "findings": [asdict(f) for f in report.findings], } if args.format == "pretty": print(f"\nScanned {report.scanned_files} files\n") print(f"Findings: {report.summary['total']} total " f"({report.summary['high']} high, {report.summary['medium']} medium)") print("-" * 60) for f in report.findings: print(f"[{f.severity.upper()}] {f.file}:{f.line}") print(f" Rule: {f.rule}") print(f" {f.message}") print(f" > {f.snippet}") print() else: print(json.dumps(output, indent=2)) sys.exit(1 if report.summary.get("high", 0) > 0 else 0) if __name__ == "__main__": main()