Files
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

242 lines
8.5 KiB
Python

#!/usr/bin/env python3
"""ModSecurity WAF audit log analysis and rule tuning agent."""
import json
import argparse
import re
from datetime import datetime
from collections import defaultdict
SECTION_PATTERN = re.compile(r'^--([a-f0-9]+)-([A-Z])--$')
CRS_CATEGORIES = {
"911": "Method Enforcement",
"913": "Scanner Detection",
"920": "Protocol Enforcement",
"921": "Protocol Attack",
"930": "Local File Inclusion",
"931": "Remote File Inclusion",
"932": "Remote Code Execution",
"933": "PHP Injection",
"934": "Node.js Injection",
"941": "XSS Attack",
"942": "SQL Injection",
"943": "Session Fixation",
"944": "Java Attack",
"949": "Inbound Blocking",
"959": "Outbound Blocking",
}
def parse_audit_log(log_path, max_entries=5000):
"""Parse ModSecurity serial audit log format."""
entries = []
current = {}
current_section = None
with open(log_path, "r", errors="replace") as f:
for line in f:
match = SECTION_PATTERN.match(line.strip())
if match:
tx_id = match.group(1)
section = match.group(2)
if section == "A":
if current and current.get("tx_id"):
entries.append(current)
if len(entries) >= max_entries:
break
current = {"tx_id": tx_id, "sections": {}}
current_section = section
current["sections"][section] = ""
elif current_section and current_section in current.get("sections", {}):
current["sections"][current_section] += line
if current and current.get("tx_id"):
entries.append(current)
parsed = []
for entry in entries:
record = {"tx_id": entry["tx_id"]}
section_a = entry["sections"].get("A", "")
if section_a:
parts = section_a.strip().split()
if len(parts) >= 3:
record["timestamp"] = parts[0] if parts else ""
record["client_ip"] = parts[1] if len(parts) > 1 else ""
section_b = entry["sections"].get("B", "")
if section_b:
first_line = section_b.strip().split("\n")[0]
req_parts = first_line.split()
if len(req_parts) >= 2:
record["method"] = req_parts[0]
record["uri"] = req_parts[1]
section_h = entry["sections"].get("H", "")
record["rules_matched"] = []
for rule_match in re.finditer(
r'\[id "(\d+)"\].*?\[msg "([^"]+)"\].*?\[severity "([^"]+)"\]',
section_h
):
record["rules_matched"].append({
"rule_id": rule_match.group(1),
"message": rule_match.group(2),
"severity": rule_match.group(3),
})
anomaly = re.search(r'Inbound Anomaly Score.*?(\d+)', section_h)
if anomaly:
record["anomaly_score"] = int(anomaly.group(1))
parsed.append(record)
return parsed
def analyze_rule_frequency(entries):
"""Analyze which rules fire most frequently for tuning."""
rule_counts = defaultdict(int)
rule_msgs = {}
for entry in entries:
for rule in entry.get("rules_matched", []):
rid = rule["rule_id"]
rule_counts[rid] += 1
rule_msgs[rid] = rule["message"]
sorted_rules = sorted(rule_counts.items(), key=lambda x: x[1], reverse=True)
results = []
for rid, count in sorted_rules:
category = CRS_CATEGORIES.get(rid[:3], "Other")
results.append({
"rule_id": rid,
"count": count,
"message": rule_msgs.get(rid, ""),
"category": category,
})
return results
def identify_false_positive_candidates(entries, threshold=50):
"""Identify rules that may be false positives based on frequency and pattern."""
rule_ips = defaultdict(set)
rule_uris = defaultdict(set)
rule_counts = defaultdict(int)
for entry in entries:
for rule in entry.get("rules_matched", []):
rid = rule["rule_id"]
rule_counts[rid] += 1
rule_ips[rid].add(entry.get("client_ip", ""))
rule_uris[rid].add(entry.get("uri", ""))
candidates = []
for rid, count in rule_counts.items():
if count >= threshold and len(rule_ips[rid]) > 10:
candidates.append({
"rule_id": rid,
"hit_count": count,
"unique_ips": len(rule_ips[rid]),
"unique_uris": len(rule_uris[rid]),
"recommendation": f"SecRuleRemoveById {rid}",
"reason": "High frequency across many IPs — likely false positive",
})
return candidates
def generate_exclusion_rules(candidates):
"""Generate ModSecurity rule exclusion configuration."""
lines = ["# Auto-generated false positive exclusions"]
for c in candidates:
lines.append(f"# Rule {c['rule_id']}: {c['hit_count']} hits, "
f"{c['unique_ips']} unique IPs")
lines.append(f"SecRuleRemoveById {c['rule_id']}")
return "\n".join(lines)
def analyze_attack_summary(entries):
"""Summarize detected attacks by category and severity."""
category_counts = defaultdict(int)
severity_counts = defaultdict(int)
top_attackers = defaultdict(int)
for entry in entries:
for rule in entry.get("rules_matched", []):
cat = CRS_CATEGORIES.get(rule["rule_id"][:3], "Other")
category_counts[cat] += 1
severity_counts[rule["severity"]] += 1
if entry.get("anomaly_score", 0) >= 5:
top_attackers[entry.get("client_ip", "")] += 1
return {
"by_category": dict(sorted(category_counts.items(), key=lambda x: x[1], reverse=True)),
"by_severity": dict(severity_counts),
"top_attackers": dict(sorted(top_attackers.items(), key=lambda x: x[1], reverse=True)[:20]),
}
def run_audit(args):
"""Execute ModSecurity audit log analysis."""
print(f"\n{'='*60}")
print(f" MODSECURITY AUDIT LOG ANALYSIS")
print(f" Generated: {datetime.utcnow().isoformat()} UTC")
print(f"{'='*60}\n")
report = {}
entries = parse_audit_log(args.audit_log, args.max_entries)
report["total_entries"] = len(entries)
print(f"Parsed {len(entries)} audit log entries\n")
attack_summary = analyze_attack_summary(entries)
report["attack_summary"] = attack_summary
print(f"--- ATTACK SUMMARY ---")
for cat, count in list(attack_summary["by_category"].items())[:10]:
print(f" {cat}: {count}")
print(f"\n Severity: {attack_summary['by_severity']}")
print(f"\n--- TOP ATTACKERS ---")
for ip, count in list(attack_summary["top_attackers"].items())[:10]:
print(f" {ip}: {count} alerts")
rule_freq = analyze_rule_frequency(entries)
report["rule_frequency"] = rule_freq[:20]
print(f"\n--- TOP FIRING RULES ---")
for r in rule_freq[:15]:
print(f" [{r['rule_id']}] {r['count']}x — {r['message'][:60]}")
if args.tune:
fp_candidates = identify_false_positive_candidates(entries, args.fp_threshold)
report["false_positive_candidates"] = fp_candidates
print(f"\n--- FALSE POSITIVE CANDIDATES ({len(fp_candidates)}) ---")
for c in fp_candidates[:10]:
print(f" Rule {c['rule_id']}: {c['hit_count']} hits, "
f"{c['unique_ips']} IPs — {c['reason']}")
if fp_candidates:
exclusions = generate_exclusion_rules(fp_candidates)
report["exclusion_config"] = exclusions
return report
def main():
parser = argparse.ArgumentParser(description="ModSecurity Audit Log Agent")
parser.add_argument("--audit-log", required=True,
help="Path to ModSecurity audit log file")
parser.add_argument("--max-entries", type=int, default=5000,
help="Max log entries to parse (default: 5000)")
parser.add_argument("--tune", action="store_true",
help="Identify false positive candidates for tuning")
parser.add_argument("--fp-threshold", type=int, default=50,
help="Minimum hits for false positive candidate (default: 50)")
parser.add_argument("--output", help="Save report to JSON file")
args = parser.parse_args()
report = run_audit(args)
if args.output:
with open(args.output, "w") as f:
json.dump(report, f, indent=2, default=str)
print(f"\n[+] Report saved to {args.output}")
if __name__ == "__main__":
main()