#!/usr/bin/env python3 """ModSecurity WAF audit log analysis and rule tuning agent.""" import json import argparse import re from datetime import datetime from collections import defaultdict SECTION_PATTERN = re.compile(r'^--([a-f0-9]+)-([A-Z])--$') CRS_CATEGORIES = { "911": "Method Enforcement", "913": "Scanner Detection", "920": "Protocol Enforcement", "921": "Protocol Attack", "930": "Local File Inclusion", "931": "Remote File Inclusion", "932": "Remote Code Execution", "933": "PHP Injection", "934": "Node.js Injection", "941": "XSS Attack", "942": "SQL Injection", "943": "Session Fixation", "944": "Java Attack", "949": "Inbound Blocking", "959": "Outbound Blocking", } def parse_audit_log(log_path, max_entries=5000): """Parse ModSecurity serial audit log format.""" entries = [] current = {} current_section = None with open(log_path, "r", errors="replace") as f: for line in f: match = SECTION_PATTERN.match(line.strip()) if match: tx_id = match.group(1) section = match.group(2) if section == "A": if current and current.get("tx_id"): entries.append(current) if len(entries) >= max_entries: break current = {"tx_id": tx_id, "sections": {}} current_section = section current["sections"][section] = "" elif current_section and current_section in current.get("sections", {}): current["sections"][current_section] += line if current and current.get("tx_id"): entries.append(current) parsed = [] for entry in entries: record = {"tx_id": entry["tx_id"]} section_a = entry["sections"].get("A", "") if section_a: parts = section_a.strip().split() if len(parts) >= 3: record["timestamp"] = parts[0] if parts else "" record["client_ip"] = parts[1] if len(parts) > 1 else "" section_b = entry["sections"].get("B", "") if section_b: first_line = section_b.strip().split("\n")[0] req_parts = first_line.split() if len(req_parts) >= 2: record["method"] = req_parts[0] record["uri"] = req_parts[1] section_h = entry["sections"].get("H", "") record["rules_matched"] = [] for rule_match in re.finditer( r'\[id "(\d+)"\].*?\[msg "([^"]+)"\].*?\[severity "([^"]+)"\]', section_h ): record["rules_matched"].append({ "rule_id": rule_match.group(1), "message": rule_match.group(2), "severity": rule_match.group(3), }) anomaly = re.search(r'Inbound Anomaly Score.*?(\d+)', section_h) if anomaly: record["anomaly_score"] = int(anomaly.group(1)) parsed.append(record) return parsed def analyze_rule_frequency(entries): """Analyze which rules fire most frequently for tuning.""" rule_counts = defaultdict(int) rule_msgs = {} for entry in entries: for rule in entry.get("rules_matched", []): rid = rule["rule_id"] rule_counts[rid] += 1 rule_msgs[rid] = rule["message"] sorted_rules = sorted(rule_counts.items(), key=lambda x: x[1], reverse=True) results = [] for rid, count in sorted_rules: category = CRS_CATEGORIES.get(rid[:3], "Other") results.append({ "rule_id": rid, "count": count, "message": rule_msgs.get(rid, ""), "category": category, }) return results def identify_false_positive_candidates(entries, threshold=50): """Identify rules that may be false positives based on frequency and pattern.""" rule_ips = defaultdict(set) rule_uris = defaultdict(set) rule_counts = defaultdict(int) for entry in entries: for rule in entry.get("rules_matched", []): rid = rule["rule_id"] rule_counts[rid] += 1 rule_ips[rid].add(entry.get("client_ip", "")) rule_uris[rid].add(entry.get("uri", "")) candidates = [] for rid, count in rule_counts.items(): if count >= threshold and len(rule_ips[rid]) > 10: candidates.append({ "rule_id": rid, "hit_count": count, "unique_ips": len(rule_ips[rid]), "unique_uris": len(rule_uris[rid]), "recommendation": f"SecRuleRemoveById {rid}", "reason": "High frequency across many IPs — likely false positive", }) return candidates def generate_exclusion_rules(candidates): """Generate ModSecurity rule exclusion configuration.""" lines = ["# Auto-generated false positive exclusions"] for c in candidates: lines.append(f"# Rule {c['rule_id']}: {c['hit_count']} hits, " f"{c['unique_ips']} unique IPs") lines.append(f"SecRuleRemoveById {c['rule_id']}") return "\n".join(lines) def analyze_attack_summary(entries): """Summarize detected attacks by category and severity.""" category_counts = defaultdict(int) severity_counts = defaultdict(int) top_attackers = defaultdict(int) for entry in entries: for rule in entry.get("rules_matched", []): cat = CRS_CATEGORIES.get(rule["rule_id"][:3], "Other") category_counts[cat] += 1 severity_counts[rule["severity"]] += 1 if entry.get("anomaly_score", 0) >= 5: top_attackers[entry.get("client_ip", "")] += 1 return { "by_category": dict(sorted(category_counts.items(), key=lambda x: x[1], reverse=True)), "by_severity": dict(severity_counts), "top_attackers": dict(sorted(top_attackers.items(), key=lambda x: x[1], reverse=True)[:20]), } def run_audit(args): """Execute ModSecurity audit log analysis.""" print(f"\n{'='*60}") print(f" MODSECURITY AUDIT LOG ANALYSIS") print(f" Generated: {datetime.utcnow().isoformat()} UTC") print(f"{'='*60}\n") report = {} entries = parse_audit_log(args.audit_log, args.max_entries) report["total_entries"] = len(entries) print(f"Parsed {len(entries)} audit log entries\n") attack_summary = analyze_attack_summary(entries) report["attack_summary"] = attack_summary print(f"--- ATTACK SUMMARY ---") for cat, count in list(attack_summary["by_category"].items())[:10]: print(f" {cat}: {count}") print(f"\n Severity: {attack_summary['by_severity']}") print(f"\n--- TOP ATTACKERS ---") for ip, count in list(attack_summary["top_attackers"].items())[:10]: print(f" {ip}: {count} alerts") rule_freq = analyze_rule_frequency(entries) report["rule_frequency"] = rule_freq[:20] print(f"\n--- TOP FIRING RULES ---") for r in rule_freq[:15]: print(f" [{r['rule_id']}] {r['count']}x — {r['message'][:60]}") if args.tune: fp_candidates = identify_false_positive_candidates(entries, args.fp_threshold) report["false_positive_candidates"] = fp_candidates print(f"\n--- FALSE POSITIVE CANDIDATES ({len(fp_candidates)}) ---") for c in fp_candidates[:10]: print(f" Rule {c['rule_id']}: {c['hit_count']} hits, " f"{c['unique_ips']} IPs — {c['reason']}") if fp_candidates: exclusions = generate_exclusion_rules(fp_candidates) report["exclusion_config"] = exclusions return report def main(): parser = argparse.ArgumentParser(description="ModSecurity Audit Log Agent") parser.add_argument("--audit-log", required=True, help="Path to ModSecurity audit log file") parser.add_argument("--max-entries", type=int, default=5000, help="Max log entries to parse (default: 5000)") parser.add_argument("--tune", action="store_true", help="Identify false positive candidates for tuning") parser.add_argument("--fp-threshold", type=int, default=50, help="Minimum hits for false positive candidate (default: 50)") parser.add_argument("--output", help="Save report to JSON file") args = parser.parse_args() report = run_audit(args) if args.output: with open(args.output, "w") as f: json.dump(report, f, indent=2, default=str) print(f"\n[+] Report saved to {args.output}") if __name__ == "__main__": main()