Add 5 new cybersecurity skills with full implementations

- implementing-vulnerability-management-with-greenbone: python-gvm GMP API, scan task creation, XML report parsing - detecting-email-account-compromise: Microsoft Graph inbox rules, impossible travel detection, OAuth grant analysis - performing-threat-intelligence-sharing-with-misp: PyMISP event creation, attribute management, sharing validation - analyzing-cobaltstrike-malleable-c2-profiles: dissect.cobaltstrike C2Profile parsing, Suricata rule generation - hunting-for-registry-run-key-persistence: Sysmon Event 13 analysis, T1547.001 detection, Sigma rule generation
2026-08-03 09:20:18 +03:00 · 2026-03-11 00:41:59 +01:00
parent e77d55ad50
commit 757f1c8eae
16 changed files with 1439 additions and 0 deletions
@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+"""SIEM use case tuning agent - analyzes alert data to reduce false positives and optimize detection rules."""
+
+import json
+import csv
+import math
+import argparse
+from collections import defaultdict
+from datetime import datetime
+
+
+def load_alert_data(filepath):
+    """Load alert/notable event export (CSV with columns: rule_name, timestamp, disposition, source, user)."""
+    alerts = []
+    with open(filepath, newline="", encoding="utf-8") as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            alerts.append({
+                "rule_name": row.get("rule_name", row.get("search_name", "")),
+                "timestamp": row.get("timestamp", row.get("_time", "")),
+                "disposition": row.get("disposition", row.get("status", "unknown")),
+                "source": row.get("source", row.get("src", "")),
+                "user": row.get("user", row.get("dest_user", "")),
+                "severity": row.get("severity", "medium"),
+            })
+    return alerts
+
+
+def calculate_rule_metrics(alerts):
+    """Calculate per-rule alert volume, FP rate, and disposition breakdown."""
+    rule_stats = defaultdict(lambda: {"total": 0, "true_positive": 0, "false_positive": 0,
+                                       "pending": 0, "sources": set(), "users": set()})
+    for alert in alerts:
+        rule = alert["rule_name"]
+        rule_stats[rule]["total"] += 1
+        disp = alert["disposition"].lower()
+        if disp in ("true_positive", "tp", "confirmed", "escalated"):
+            rule_stats[rule]["true_positive"] += 1
+        elif disp in ("false_positive", "fp", "benign", "closed_fp"):
+            rule_stats[rule]["false_positive"] += 1
+        else:
+            rule_stats[rule]["pending"] += 1
+        if alert["source"]:
+            rule_stats[rule]["sources"].add(alert["source"])
+        if alert["user"]:
+            rule_stats[rule]["users"].add(alert["user"])
+
+    metrics = []
+    for rule, stats in rule_stats.items():
+        reviewed = stats["true_positive"] + stats["false_positive"]
+        fp_rate = stats["false_positive"] / reviewed if reviewed > 0 else 0.0
+        precision = stats["true_positive"] / reviewed if reviewed > 0 else 0.0
+        metrics.append({
+            "rule_name": rule,
+            "total_alerts": stats["total"],
+            "true_positives": stats["true_positive"],
+            "false_positives": stats["false_positive"],
+            "pending": stats["pending"],
+            "fp_rate": round(fp_rate, 4),
+            "precision": round(precision, 4),
+            "unique_sources": len(stats["sources"]),
+            "unique_users": len(stats["users"]),
+            "top_sources": list(stats["sources"])[:10],
+        })
+    return sorted(metrics, key=lambda x: x["fp_rate"], reverse=True)
+
+
+def identify_whitelist_candidates(alerts, fp_threshold=0.8):
+    """Identify source/user pairs that consistently trigger FPs for a given rule."""
+    rule_source_stats = defaultdict(lambda: defaultdict(lambda: {"tp": 0, "fp": 0}))
+    for alert in alerts:
+        disp = alert["disposition"].lower()
+        key = alert["source"] or alert["user"]
+        if not key:
+            continue
+        if disp in ("false_positive", "fp", "benign", "closed_fp"):
+            rule_source_stats[alert["rule_name"]][key]["fp"] += 1
+        elif disp in ("true_positive", "tp", "confirmed", "escalated"):
+            rule_source_stats[alert["rule_name"]][key]["tp"] += 1
+
+    candidates = []
+    for rule, sources in rule_source_stats.items():
+        for source, counts in sources.items():
+            total = counts["tp"] + counts["fp"]
+            if total >= 3 and counts["fp"] / total >= fp_threshold:
+                candidates.append({
+                    "rule_name": rule,
+                    "entity": source,
+                    "fp_count": counts["fp"],
+                    "tp_count": counts["tp"],
+                    "fp_ratio": round(counts["fp"] / total, 4),
+                    "recommendation": "Add to whitelist" if counts["tp"] == 0 else "Review before whitelisting",
+                })
+    return sorted(candidates, key=lambda x: x["fp_count"], reverse=True)
+
+
+def compute_threshold_recommendation(alerts, rule_name, field="total"):
+    """Compute statistical threshold for a rule based on hourly alert distribution."""
+    hourly_counts = defaultdict(int)
+    for alert in alerts:
+        if alert["rule_name"] != rule_name:
+            continue
+        try:
+            dt = datetime.fromisoformat(alert["timestamp"].replace("Z", "+00:00"))
+            hourly_counts[dt.strftime("%Y-%m-%d %H")] += 1
+        except (ValueError, AttributeError):
+            continue
+    if not hourly_counts:
+        return None
+    values = list(hourly_counts.values())
+    mean = sum(values) / len(values)
+    variance = sum((x - mean) ** 2 for x in values) / len(values)
+    stdev = math.sqrt(variance)
+    return {
+        "rule_name": rule_name,
+        "hourly_mean": round(mean, 2),
+        "hourly_stdev": round(stdev, 2),
+        "suggested_threshold_2sd": round(mean + 2 * stdev, 0),
+        "suggested_threshold_3sd": round(mean + 3 * stdev, 0),
+        "sample_hours": len(hourly_counts),
+    }
+
+
+def generate_tuning_report(metrics, whitelist, thresholds):
+    """Generate comprehensive tuning report with recommendations."""
+    high_fp_rules = [m for m in metrics if m["fp_rate"] > 0.7]
+    medium_fp_rules = [m for m in metrics if 0.3 < m["fp_rate"] <= 0.7]
+    total_alerts = sum(m["total_alerts"] for m in metrics)
+    total_fp = sum(m["false_positives"] for m in metrics)
+    projected_reduction = sum(w["fp_count"] for w in whitelist)
+
+    return {
+        "analysis_time": datetime.utcnow().isoformat() + "Z",
+        "summary": {
+            "total_rules_analyzed": len(metrics),
+            "total_alerts": total_alerts,
+            "total_false_positives": total_fp,
+            "overall_fp_rate": round(total_fp / total_alerts, 4) if total_alerts else 0,
+            "high_fp_rules": len(high_fp_rules),
+            "whitelist_candidates": len(whitelist),
+            "projected_alert_reduction": projected_reduction,
+        },
+        "high_fp_rules": high_fp_rules,
+        "medium_fp_rules": medium_fp_rules,
+        "whitelist_recommendations": whitelist[:20],
+        "threshold_recommendations": thresholds,
+        "actions": [
+            {"priority": "high", "action": f"Disable or rewrite {len(high_fp_rules)} rules with FP rate > 70%"},
+            {"priority": "medium", "action": f"Add {len(whitelist)} whitelist entries to reduce {projected_reduction} FP alerts"},
+            {"priority": "low", "action": f"Review {len(medium_fp_rules)} rules with FP rate 30-70%"},
+        ],
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser(description="SIEM Use Case Tuning Agent")
+    parser.add_argument("--alert-csv", required=True, help="CSV export of SIEM alerts with disposition data")
+    parser.add_argument("--fp-threshold", type=float, default=0.8, help="FP ratio threshold for whitelist candidates")
+    parser.add_argument("--top-rules", type=int, default=5, help="Number of top rules to compute thresholds for")
+    parser.add_argument("--output", default="tuning_report.json", help="Output report path")
+    args = parser.parse_args()
+
+    alerts = load_alert_data(args.alert_csv)
+    print(f"[+] Loaded {len(alerts)} alerts from {args.alert_csv}")
+
+    metrics = calculate_rule_metrics(alerts)
+    print(f"[+] Analyzed {len(metrics)} unique detection rules")
+
+    whitelist = identify_whitelist_candidates(alerts, args.fp_threshold)
+    print(f"[+] Found {len(whitelist)} whitelist candidates (FP ratio >= {args.fp_threshold})")
+
+    thresholds = []
+    for m in metrics[:args.top_rules]:
+        t = compute_threshold_recommendation(alerts, m["rule_name"])
+        if t:
+            thresholds.append(t)
+
+    report = generate_tuning_report(metrics, whitelist, thresholds)
+    with open(args.output, "w") as f:
+        json.dump(report, f, indent=2)
+    print(f"[+] Tuning report saved to {args.output}")
+    print(f"[+] Overall FP rate: {report['summary']['overall_fp_rate']:.1%}")
+    print(f"[+] Projected alert reduction from whitelisting: {report['summary']['projected_alert_reduction']}")
+
+
+if __name__ == "__main__":
+    main()