#!/usr/bin/env python3
"""Agent for performing false positive reduction analysis in SIEM environments."""

import json
import argparse
import csv
from datetime import datetime
from collections import Counter


def analyze_alerts(csv_file, threshold=5):
    """Analyze SIEM alert CSV to identify false positive patterns."""
    with open(csv_file, "r", encoding="utf-8", errors="replace") as f:
        reader = csv.DictReader(f)
        rows = list(reader)
    alerts = []
    for row in rows:
        alerts.append({
            "rule": row.get("rule_name", row.get("Rule", row.get("alert_name", ""))),
            "source": row.get("src_ip", row.get("source_ip", row.get("Source", ""))),
            "dest": row.get("dst_ip", row.get("dest_ip", row.get("Destination", ""))),
            "severity": row.get("severity", row.get("Severity", "")),
            "status": row.get("status", row.get("Status", row.get("disposition", ""))).lower(),
            "timestamp": row.get("timestamp", row.get("Time", "")),
        })
    total = len(alerts)
    fp_alerts = [a for a in alerts if a["status"] in ("false_positive", "fp", "closed_fp", "benign")]
    fp_rate = len(fp_alerts) / total * 100 if total else 0
    rule_counts = Counter(a["rule"] for a in alerts)
    fp_by_rule = Counter(a["rule"] for a in fp_alerts)
    noisy_rules = []
    for rule, count in rule_counts.most_common():
        fp_count = fp_by_rule.get(rule, 0)
        rate = fp_count / count * 100 if count else 0
        if rate >= threshold or fp_count >= 10:
            noisy_rules.append({"rule": rule, "total": count, "false_positives": fp_count, "fp_rate": round(rate, 1)})
    source_fp = Counter(a["source"] for a in fp_alerts)
    top_fp_sources = [{"source": s, "fp_count": c} for s, c in source_fp.most_common(10)]
    return {
        "total_alerts": total,
        "false_positives": len(fp_alerts),
        "fp_rate_pct": round(fp_rate, 1),
        "noisy_rules": sorted(noisy_rules, key=lambda x: x["fp_rate"], reverse=True),
        "top_fp_sources": top_fp_sources,
    }


def generate_tuning_recommendations(csv_file):
    """Generate SIEM rule tuning recommendations from alert analysis."""
    analysis = analyze_alerts(csv_file)
    recommendations = []
    for rule in analysis["noisy_rules"]:
        if rule["fp_rate"] >= 90:
            action = "DISABLE"
            reason = f"FP rate {rule['fp_rate']}% — rule generates almost exclusively false positives"
        elif rule["fp_rate"] >= 70:
            action = "ADD_WHITELIST"
            reason = f"FP rate {rule['fp_rate']}% — add source/destination whitelists"
        elif rule["fp_rate"] >= 50:
            action = "TUNE_THRESHOLD"
            reason = f"FP rate {rule['fp_rate']}% — increase detection threshold or add conditions"
        else:
            action = "REVIEW"
            reason = f"FP rate {rule['fp_rate']}% with {rule['false_positives']} FPs — manual review needed"
        recommendations.append({"rule": rule["rule"], "action": action, "reason": reason, **rule})
    return {
        "generated": datetime.utcnow().isoformat(),
        "overall_fp_rate": analysis["fp_rate_pct"],
        "rules_to_tune": len(recommendations),
        "recommendations": recommendations,
        "top_fp_sources": analysis["top_fp_sources"],
    }


def simulate_tuning_impact(csv_file, rules_to_disable=None, sources_to_whitelist=None):
    """Simulate the impact of proposed tuning changes on alert volume."""
    with open(csv_file, "r", encoding="utf-8", errors="replace") as f:
        reader = csv.DictReader(f)
        rows = list(reader)
    rules_to_disable = rules_to_disable or []
    sources_to_whitelist = sources_to_whitelist or []
    original = len(rows)
    remaining = []
    suppressed = {"by_rule": 0, "by_source": 0}
    for row in rows:
        rule = row.get("rule_name", row.get("Rule", row.get("alert_name", "")))
        source = row.get("src_ip", row.get("source_ip", row.get("Source", "")))
        if rule in rules_to_disable:
            suppressed["by_rule"] += 1
            continue
        if source in sources_to_whitelist:
            suppressed["by_source"] += 1
            continue
        remaining.append(row)
    reduction = (1 - len(remaining) / original) * 100 if original else 0
    fp_remaining = sum(1 for r in remaining if r.get("status", r.get("Status", "")).lower() in ("false_positive", "fp", "closed_fp", "benign"))
    new_fp_rate = fp_remaining / len(remaining) * 100 if remaining else 0
    return {
        "original_alerts": original,
        "remaining_alerts": len(remaining),
        "suppressed": suppressed,
        "reduction_pct": round(reduction, 1),
        "new_fp_rate_pct": round(new_fp_rate, 1),
    }


def main():
    parser = argparse.ArgumentParser(description="SIEM False Positive Reduction Agent")
    sub = parser.add_subparsers(dest="command")
    a = sub.add_parser("analyze", help="Analyze alert false positive patterns")
    a.add_argument("--csv", required=True, help="SIEM alert export CSV")
    a.add_argument("--threshold", type=float, default=5, help="Min FP rate to flag")
    t = sub.add_parser("tune", help="Generate tuning recommendations")
    t.add_argument("--csv", required=True)
    s = sub.add_parser("simulate", help="Simulate tuning impact")
    s.add_argument("--csv", required=True)
    s.add_argument("--disable-rules", nargs="*", default=[])
    s.add_argument("--whitelist-sources", nargs="*", default=[])
    args = parser.parse_args()
    if args.command == "analyze":
        result = analyze_alerts(args.csv, args.threshold)
    elif args.command == "tune":
        result = generate_tuning_recommendations(args.csv)
    elif args.command == "simulate":
        result = simulate_tuning_impact(args.csv, args.disable_rules, args.whitelist_sources)
    else:
        parser.print_help()
        return
    print(json.dumps(result, indent=2, default=str))


if __name__ == "__main__":
    main()