#!/usr/bin/env python3 """Agent for detecting C2 beaconing patterns in Zeek conn.log data.""" import json import argparse from datetime import datetime import numpy as np import pandas as pd from zat.log_to_dataframe import LogToDataFrame def load_conn_log(log_path): """Load Zeek conn.log into a Pandas DataFrame using ZAT.""" log_to_df = LogToDataFrame() df = log_to_df.create_dataframe(log_path) return df def calculate_beacon_score(intervals): """Calculate a beacon score based on interval regularity.""" if len(intervals) < 5: return 0.0 std_dev = np.std(intervals) mean_val = np.mean(intervals) if mean_val == 0: return 0.0 cv = std_dev / mean_val score = max(0, 1.0 - cv) * 100 return round(score, 2) def detect_beaconing(conn_df, min_connections=10, max_cv=0.3): """Detect beaconing by analyzing connection interval regularity.""" conn_df = conn_df.sort_values("ts") beacons = [] grouped = conn_df.groupby(["id.orig_h", "id.resp_h", "id.resp_p"]) for (src, dst, port), group in grouped: if len(group) < min_connections: continue times = group["ts"].sort_values() intervals = times.diff().dt.total_seconds().dropna().values if len(intervals) < 5: continue std_dev = float(np.std(intervals)) mean_interval = float(np.mean(intervals)) if mean_interval == 0: continue cv = std_dev / mean_interval beacon_score = calculate_beacon_score(intervals) if cv <= max_cv: beacons.append({ "src_ip": src, "dst_ip": dst, "dst_port": int(port) if not pd.isna(port) else 0, "connection_count": len(group), "mean_interval_sec": round(mean_interval, 2), "std_dev_sec": round(std_dev, 2), "coefficient_of_variation": round(cv, 4), "beacon_score": beacon_score, "first_seen": str(times.iloc[0]), "last_seen": str(times.iloc[-1]), }) return sorted(beacons, key=lambda x: x["beacon_score"], reverse=True) def detect_jitter_beaconing(conn_df, base_interval=60, jitter_pct=0.2, min_conns=10): """Detect beaconing with expected interval and jitter tolerance.""" conn_df = conn_df.sort_values("ts") matches = [] grouped = conn_df.groupby(["id.orig_h", "id.resp_h"]) for (src, dst), group in grouped: if len(group) < min_conns: continue times = group["ts"].sort_values() intervals = times.diff().dt.total_seconds().dropna().values lower = base_interval * (1 - jitter_pct) upper = base_interval * (1 + jitter_pct) matching = np.sum((intervals >= lower) & (intervals <= upper)) match_pct = matching / len(intervals) if match_pct > 0.7: matches.append({ "src_ip": src, "dst_ip": dst, "connections": len(group), "matching_intervals": int(matching), "match_percentage": round(match_pct * 100, 1), "expected_interval": base_interval, }) return matches def analyze_dns_beaconing(dns_log_path, min_queries=20, max_cv=0.25): """Analyze Zeek dns.log for DNS-based beaconing patterns.""" log_to_df = LogToDataFrame() dns_df = log_to_df.create_dataframe(dns_log_path) dns_df = dns_df.sort_values("ts") beacons = [] grouped = dns_df.groupby(["id.orig_h", "query"]) for (src, query), group in grouped: if len(group) < min_queries: continue times = group["ts"].sort_values() intervals = times.diff().dt.total_seconds().dropna().values if len(intervals) < 5: continue std_dev = float(np.std(intervals)) mean_val = float(np.mean(intervals)) if mean_val == 0: continue cv = std_dev / mean_val if cv <= max_cv: beacons.append({ "src_ip": src, "query": query, "query_count": len(group), "mean_interval_sec": round(mean_val, 2), "std_dev_sec": round(std_dev, 2), "cv": round(cv, 4), "beacon_score": calculate_beacon_score(intervals), }) return sorted(beacons, key=lambda x: x["beacon_score"], reverse=True) def filter_whitelisted(beacons, whitelist_domains=None): """Remove known-good destinations from beacon results.""" if not whitelist_domains: whitelist_domains = ["microsoft.com", "google.com", "amazonaws.com", "cloudflare.com", "akamai.net"] filtered = [] for b in beacons: dst = b.get("dst_ip", "") or b.get("query", "") if not any(w in dst for w in whitelist_domains): filtered.append(b) return filtered def main(): parser = argparse.ArgumentParser(description="Zeek Beaconing Detection Agent") parser.add_argument("--conn-log", help="Path to Zeek conn.log") parser.add_argument("--dns-log", help="Path to Zeek dns.log") parser.add_argument("--min-connections", type=int, default=10) parser.add_argument("--max-cv", type=float, default=0.3) parser.add_argument("--output", default="beacon_report.json") parser.add_argument("--action", choices=[ "conn_beacon", "dns_beacon", "full_hunt" ], default="full_hunt") args = parser.parse_args() report = {"generated_at": datetime.utcnow().isoformat(), "findings": {}} if args.action in ("conn_beacon", "full_hunt") and args.conn_log: conn_df = load_conn_log(args.conn_log) beacons = detect_beaconing(conn_df, args.min_connections, args.max_cv) beacons = filter_whitelisted(beacons) report["findings"]["conn_beacons"] = beacons print(f"[+] Connection beacons detected: {len(beacons)}") if args.action in ("dns_beacon", "full_hunt") and args.dns_log: dns_beacons = analyze_dns_beaconing(args.dns_log, args.min_connections) dns_beacons = filter_whitelisted(dns_beacons) report["findings"]["dns_beacons"] = dns_beacons print(f"[+] DNS beacons detected: {len(dns_beacons)}") with open(args.output, "w") as f: json.dump(report, f, indent=2, default=str) print(f"[+] Report saved to {args.output}") if __name__ == "__main__": main()