Files
Anthropic-Cybersecurity-Skills/skills/detecting-sql-injection-via-waf-logs/scripts/agent.py
T
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

233 lines
9.2 KiB
Python

#!/usr/bin/env python3
"""SQL Injection WAF Log Analysis Agent - Detects SQLi attacks from ModSecurity and WAF logs."""
import json
import re
import logging
import argparse
from datetime import datetime
from collections import defaultdict
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)
SQLI_PATTERNS = [
(r"(?i)\bUNION\s+(?:ALL\s+)?SELECT\b", "UNION-based", "critical"),
(r"(?i)\bOR\s+['\"]?\d+['\"]?\s*=\s*['\"]?\d+", "Tautology (OR 1=1)", "high"),
(r"(?i)\bAND\s+['\"]?\d+['\"]?\s*=\s*['\"]?\d+", "Tautology (AND 1=1)", "high"),
(r"(?i)\bSLEEP\s*\(\s*\d+\s*\)", "Time-based blind (SLEEP)", "critical"),
(r"(?i)\bBENCHMARK\s*\(", "Time-based blind (BENCHMARK)", "critical"),
(r"(?i)\bWAITFOR\s+DELAY\b", "Time-based blind (WAITFOR)", "critical"),
(r"(?i)['\"]\s*;\s*(?:DROP|DELETE|UPDATE|INSERT)\b", "Stacked query", "critical"),
(r"(?i)\bINFORMATION_SCHEMA\b", "Schema enumeration", "high"),
(r"(?i)\bLOAD_FILE\s*\(", "File read (LOAD_FILE)", "critical"),
(r"(?i)\bINTO\s+(?:OUT|DUMP)FILE\b", "File write (INTO OUTFILE)", "critical"),
(r"(?i)\bCONCAT\s*\(.*\bSELECT\b", "Nested SELECT in CONCAT", "high"),
(r"(?i)\bGROUP_CONCAT\s*\(", "Data extraction (GROUP_CONCAT)", "high"),
(r"(?i)\bEXTRACTVALUE\s*\(", "Error-based (EXTRACTVALUE)", "high"),
(r"(?i)\bUPDATEXML\s*\(", "Error-based (UPDATEXML)", "high"),
(r"(?i)(?:--|#|/\*)\s*$", "Comment termination", "medium"),
(r"(?i)\bCHAR\s*\(\s*\d+(?:\s*,\s*\d+)*\s*\)", "CHAR() encoding bypass", "medium"),
(r"(?i)0x[0-9a-f]{6,}", "Hex encoding bypass", "medium"),
]
MODSEC_RULE_MAP = {
"942100": "SQL Injection via libinjection",
"942110": "SQL Injection (common keywords)",
"942120": "SQL Injection operator",
"942130": "SQL Injection tautology",
"942140": "SQL Injection (DB names)",
"942150": "SQL Injection (functions)",
"942160": "SQL Injection blind test (sleep/benchmark)",
"942170": "SQL Injection (UNION query)",
"942180": "SQL Injection bypass (basic auth)",
"942190": "SQL Injection (MSSQL exec)",
"942200": "SQL Injection (MySQL comment/space obfuscation)",
"942210": "SQL Injection (chained)",
"942220": "SQL Injection (integer overflow)",
"942230": "SQL Injection (conditional)",
"942240": "SQL Injection (MySQL charset switch)",
"942250": "SQL Injection (MATCH AGAINST)",
"942260": "SQL Injection bypass (basic auth 2)",
"942270": "SQL Injection (common DB names)",
"942280": "SQL Injection (pg_sleep/waitfor)",
"942290": "SQL Injection (MongoDB)",
}
def parse_modsecurity_audit_log(log_file):
"""Parse ModSecurity audit log format."""
entries = []
current_entry = {}
current_section = None
with open(log_file, "r", encoding="utf-8", errors="ignore") as f:
for line in f:
line = line.rstrip()
if line.startswith("--") and line.endswith("-A--"):
if current_entry:
entries.append(current_entry)
current_entry = {"id": line.strip("-A-").strip("-"), "sections": {}}
current_section = "A"
elif line.startswith("--") and re.match(r"--\w+-[A-Z]--$", line):
current_section = line[-3]
elif current_section:
current_entry.setdefault("sections", {})
current_entry["sections"].setdefault(current_section, [])
current_entry["sections"][current_section].append(line)
if current_entry:
entries.append(current_entry)
logger.info("Parsed %d ModSecurity audit log entries", len(entries))
return entries
def parse_json_waf_log(log_file):
"""Parse JSON-formatted WAF logs (AWS WAF, Cloudflare)."""
entries = []
with open(log_file, "r", encoding="utf-8", errors="ignore") as f:
for line in f:
try:
entry = json.loads(line.strip())
entries.append(entry)
except json.JSONDecodeError:
continue
logger.info("Parsed %d JSON WAF log entries", len(entries))
return entries
def classify_sqli(payload):
"""Classify SQL injection type and severity from payload string."""
matches = []
for pattern, attack_type, severity in SQLI_PATTERNS:
if re.search(pattern, payload):
matches.append({"type": attack_type, "severity": severity})
return matches
def analyze_modsecurity_entries(entries):
"""Analyze parsed ModSecurity entries for SQLi attacks."""
findings = []
for entry in entries:
sections = entry.get("sections", {})
request_lines = sections.get("B", [])
header_lines = sections.get("H", [])
request_uri = ""
source_ip = ""
rule_ids = []
if request_lines:
first_line = request_lines[0]
parts = first_line.split(" ")
if len(parts) >= 2:
request_uri = parts[1]
for line in header_lines:
m = re.search(r"id\s*\"(\d+)\"", line)
if m:
rule_ids.append(m.group(1))
m = re.search(r"Remote-Addr:\s*(\S+)", line)
if m:
source_ip = m.group(1)
sqli_rules = [rid for rid in rule_ids if rid in MODSEC_RULE_MAP]
if sqli_rules:
sqli_classes = classify_sqli(request_uri)
findings.append({
"source_ip": source_ip,
"request_uri": request_uri[:500],
"rules_triggered": [{"id": r, "desc": MODSEC_RULE_MAP.get(r, "Unknown")} for r in sqli_rules],
"sqli_classification": sqli_classes if sqli_classes else [{"type": "WAF rule match", "severity": "high"}],
"severity": "critical" if any(c["severity"] == "critical" for c in sqli_classes) else "high",
})
return findings
def analyze_json_waf_entries(entries):
"""Analyze JSON WAF log entries for SQLi patterns."""
findings = []
for entry in entries:
uri = entry.get("httpRequest", {}).get("uri", "") or entry.get("ClientRequestURI", "")
args = entry.get("httpRequest", {}).get("args", "") or entry.get("queryString", "")
source_ip = entry.get("httpRequest", {}).get("clientIp", "") or entry.get("ClientIP", "")
action = entry.get("action", "") or entry.get("Action", "")
payload = f"{uri}?{args}" if args else uri
sqli_classes = classify_sqli(payload)
if sqli_classes:
findings.append({
"source_ip": source_ip,
"request_uri": payload[:500],
"action": action,
"sqli_classification": sqli_classes,
"severity": max((c["severity"] for c in sqli_classes), key=lambda s: {"critical": 3, "high": 2, "medium": 1}.get(s, 0)),
})
return findings
def correlate_campaigns(findings, time_window_sec=300, min_requests=5):
"""Identify SQLi attack campaigns by source IP clustering."""
ip_groups = defaultdict(list)
for f in findings:
ip_groups[f["source_ip"]].append(f)
campaigns = []
for ip, group in ip_groups.items():
if len(group) >= min_requests:
attack_types = set()
for f in group:
for c in f.get("sqli_classification", []):
attack_types.add(c["type"])
campaigns.append({
"source_ip": ip,
"request_count": len(group),
"attack_types": list(attack_types),
"severity": "critical" if len(attack_types) > 2 else "high",
"classification": "automated" if len(group) > 20 else "manual",
})
logger.warning("SQLi campaign: %s (%d requests, %d attack types)", ip, len(group), len(attack_types))
return campaigns
def generate_report(findings, campaigns):
"""Generate SQLi detection report."""
critical = [f for f in findings if f.get("severity") == "critical"]
report = {
"timestamp": datetime.utcnow().isoformat(),
"total_sqli_events": len(findings),
"critical_events": len(critical),
"unique_sources": len(set(f["source_ip"] for f in findings if f.get("source_ip"))),
"campaigns_detected": len(campaigns),
"campaigns": campaigns,
"top_findings": findings[:100],
}
print(f"SQLI REPORT: {len(findings)} events, {len(campaigns)} campaigns, {len(critical)} critical")
return report
def main():
parser = argparse.ArgumentParser(description="SQL Injection WAF Log Analysis Agent")
parser.add_argument("--log-file", required=True, help="WAF log file path")
parser.add_argument("--format", choices=["modsecurity", "json"], default="modsecurity")
parser.add_argument("--output", default="sqli_report.json")
args = parser.parse_args()
if args.format == "modsecurity":
entries = parse_modsecurity_audit_log(args.log_file)
findings = analyze_modsecurity_entries(entries)
else:
entries = parse_json_waf_log(args.log_file)
findings = analyze_json_waf_entries(entries)
campaigns = correlate_campaigns(findings)
report = generate_report(findings, campaigns)
with open(args.output, "w") as f:
json.dump(report, f, indent=2)
logger.info("Report saved to %s", args.output)
if __name__ == "__main__":
main()