#!/usr/bin/env python3 """ C2 Beaconing Detection Script Analyzes network connection logs for periodic beaconing patterns using statistical frequency analysis and jitter detection. """ import json import csv import argparse import datetime import math import re from collections import defaultdict from pathlib import Path # Known legitimate beaconing services to exclude KNOWN_GOOD_DOMAINS = { "microsoft.com", "windowsupdate.com", "google.com", "googleapis.com", "gstatic.com", "amazonaws.com", "cloudflare.com", "akamai.net", "apple.com", "icloud.com", "adobe.com", "symantec.com", "norton.com", "mcafee.com", "crowdstrike.com", "sentinelone.com", "office365.com", "office.com", "live.com", "outlook.com", "github.com", "slack.com", "teams.microsoft.com", } # Known C2 framework default ports C2_SUSPICIOUS_PORTS = {443, 8443, 8080, 4444, 5555, 6666, 8888, 9090, 50050, 31337} # Beaconing detection thresholds BEACON_THRESHOLDS = { "min_connections": 20, # Minimum connections for analysis "max_cv": 0.25, # Max coefficient of variation for periodicity "min_interval": 10, # Minimum average interval (seconds) "max_interval": 86400, # Maximum average interval (1 day) "max_data_cv": 0.30, # Max CV for data size consistency } def parse_logs(input_path: str) -> list[dict]: """Parse connection logs (Zeek, CSV, JSON format).""" path = Path(input_path) events = [] if path.suffix == ".json": with open(path, "r", encoding="utf-8") as f: data = json.load(f) events = data if isinstance(data, list) else data.get("events", []) elif path.suffix == ".csv": with open(path, "r", encoding="utf-8-sig") as f: events = [dict(row) for row in csv.DictReader(f)] elif path.suffix == ".log": # Zeek tab-separated format with open(path, "r", encoding="utf-8") as f: headers = None for line in f: if line.startswith("#fields"): headers = line.strip().split("\t")[1:] elif line.startswith("#"): continue elif headers: values = line.strip().split("\t") if len(values) == len(headers): events.append(dict(zip(headers, values))) return events def normalize_connection(event: dict) -> dict: """Normalize connection event fields.""" field_map = { "timestamp": ["ts", "timestamp", "_time", "@timestamp", "Timestamp"], "src_ip": ["id.orig_h", "src_ip", "source_ip", "LocalIP", "DeviceName"], "src_port": ["id.orig_p", "src_port", "source_port", "LocalPort"], "dst_ip": ["id.resp_h", "dst_ip", "dest_ip", "RemoteIP", "DestinationIp"], "dst_port": ["id.resp_p", "dst_port", "dest_port", "RemotePort", "DestinationPort"], "domain": ["query", "domain", "host", "RemoteUrl", "server_name", "dest"], "bytes_sent": ["orig_bytes", "bytes_out", "SentBytes", "bytes_sent"], "bytes_recv": ["resp_bytes", "bytes_in", "ReceivedBytes", "bytes_recv"], "duration": ["duration", "conn_duration", "session_duration"], "proto": ["proto", "protocol", "Protocol"], "user_agent": ["user_agent", "UserAgent", "http_user_agent"], } normalized = {} for target, sources in field_map.items(): for src in sources: if src in event and event[src] and event[src] != "-": normalized[target] = str(event[src]) break if target not in normalized: normalized[target] = "" return normalized def is_known_good(domain: str) -> bool: """Check if domain is in known-good list.""" domain_lower = domain.lower() for good in KNOWN_GOOD_DOMAINS: if domain_lower.endswith(good): return True return False def calculate_entropy(text: str) -> float: """Calculate Shannon entropy of a string.""" if not text: return 0.0 freq = defaultdict(int) for char in text: freq[char] += 1 length = len(text) entropy = 0.0 for count in freq.values(): p = count / length if p > 0: entropy -= p * math.log2(p) return entropy def detect_beaconing(connections: list[dict]) -> list[dict]: """Analyze connection patterns for beaconing behavior.""" # Group connections by source-destination pair pairs = defaultdict(list) for conn in connections: src = conn.get("src_ip", "") dst = conn.get("domain", "") or conn.get("dst_ip", "") if src and dst and not is_known_good(dst): try: ts = float(conn.get("timestamp", 0)) except (ValueError, TypeError): # Try parsing ISO timestamp try: dt = datetime.datetime.fromisoformat(conn["timestamp"].replace("Z", "+00:00")) ts = dt.timestamp() except (ValueError, KeyError): continue pairs[(src, dst)].append({ "timestamp": ts, "bytes_sent": int(conn.get("bytes_sent", 0) or 0), "bytes_recv": int(conn.get("bytes_recv", 0) or 0), "dst_port": conn.get("dst_port", ""), "user_agent": conn.get("user_agent", ""), }) findings = [] for (src, dst), conns in pairs.items(): if len(conns) < BEACON_THRESHOLDS["min_connections"]: continue # Sort by timestamp conns.sort(key=lambda x: x["timestamp"]) # Calculate intervals intervals = [] for i in range(1, len(conns)): interval = conns[i]["timestamp"] - conns[i - 1]["timestamp"] if interval > 0: intervals.append(interval) if len(intervals) < 10: continue # Statistical analysis avg_interval = sum(intervals) / len(intervals) if avg_interval < BEACON_THRESHOLDS["min_interval"] or avg_interval > BEACON_THRESHOLDS["max_interval"]: continue variance = sum((x - avg_interval) ** 2 for x in intervals) / len(intervals) stdev = math.sqrt(variance) cv = stdev / avg_interval if avg_interval > 0 else float("inf") # Check if beaconing threshold met if cv > BEACON_THRESHOLDS["max_cv"]: continue # Calculate data size consistency bytes_sent_list = [c["bytes_sent"] for c in conns if c["bytes_sent"] > 0] data_cv = 0.0 if bytes_sent_list: avg_bytes = sum(bytes_sent_list) / len(bytes_sent_list) if avg_bytes > 0: data_var = sum((x - avg_bytes) ** 2 for x in bytes_sent_list) / len(bytes_sent_list) data_cv = math.sqrt(data_var) / avg_bytes # Calculate risk score risk = 0 indicators = [] # Low CV = high periodicity if cv < 0.05: risk += 40 indicators.append(f"Very regular interval (CV={cv:.4f})") elif cv < 0.15: risk += 30 indicators.append(f"Regular interval (CV={cv:.4f})") else: risk += 20 indicators.append(f"Moderately regular interval (CV={cv:.4f})") # Consistent data sizes if data_cv < 0.10 and bytes_sent_list: risk += 15 indicators.append(f"Very consistent payload size (CV={data_cv:.4f})") # Suspicious port dst_ports = set(c["dst_port"] for c in conns) for port in dst_ports: try: if int(port) in C2_SUSPICIOUS_PORTS: risk += 10 indicators.append(f"Suspicious port: {port}") except ValueError: pass # High connection count if len(conns) > 500: risk += 10 indicators.append(f"High connection count: {len(conns)}") # Domain entropy (DGA indicator) domain_parts = dst.split(".") if domain_parts: entropy = calculate_entropy(domain_parts[0]) if entropy > 3.5: risk += 15 indicators.append(f"High domain entropy: {entropy:.2f} (possible DGA)") risk_level = ( "CRITICAL" if risk >= 70 else "HIGH" if risk >= 50 else "MEDIUM" if risk >= 30 else "LOW" ) # Estimate jitter percentage jitter_pct = (stdev / avg_interval * 100) if avg_interval > 0 else 0 findings.append({ "src_ip": src, "destination": dst, "connection_count": len(conns), "avg_interval_sec": round(avg_interval, 2), "stdev_interval": round(stdev, 2), "coefficient_of_variation": round(cv, 4), "estimated_jitter_pct": round(jitter_pct, 1), "avg_bytes_sent": round(sum(bytes_sent_list) / len(bytes_sent_list)) if bytes_sent_list else 0, "data_size_cv": round(data_cv, 4), "first_seen": datetime.datetime.fromtimestamp(conns[0]["timestamp"]).isoformat(), "last_seen": datetime.datetime.fromtimestamp(conns[-1]["timestamp"]).isoformat(), "dst_ports": list(dst_ports), "risk_score": risk, "risk_level": risk_level, "indicators": indicators, }) return sorted(findings, key=lambda x: x["risk_score"], reverse=True) def detect_dns_tunneling(connections: list[dict]) -> list[dict]: """Detect DNS tunneling indicators.""" domain_stats = defaultdict(lambda: {"queries": 0, "unique_subdomains": set(), "total_length": 0, "txt_queries": 0}) for conn in connections: domain = conn.get("domain", "") if not domain: continue parts = domain.split(".") if len(parts) < 3: continue base_domain = ".".join(parts[-2:]) subdomain = ".".join(parts[:-2]) stats = domain_stats[base_domain] stats["queries"] += 1 stats["unique_subdomains"].add(subdomain) stats["total_length"] += len(domain) findings = [] for base_domain, stats in domain_stats.items(): if stats["queries"] < 50: continue avg_len = stats["total_length"] / stats["queries"] unique_subs = len(stats["unique_subdomains"]) risk = 0 indicators = [] if unique_subs > 100: risk += 30 indicators.append(f"High unique subdomain count: {unique_subs}") if avg_len > 40: risk += 25 indicators.append(f"Long average query length: {avg_len:.1f}") if stats["queries"] > 500: risk += 15 indicators.append(f"High query volume: {stats['queries']}") # Check subdomain entropy for sub in list(stats["unique_subdomains"])[:10]: ent = calculate_entropy(sub) if ent > 3.5: risk += 20 indicators.append(f"High subdomain entropy: {ent:.2f}") break if risk >= 30: risk_level = "CRITICAL" if risk >= 70 else "HIGH" if risk >= 50 else "MEDIUM" findings.append({ "detection_type": "DNS_TUNNELING", "domain": base_domain, "query_count": stats["queries"], "unique_subdomains": unique_subs, "avg_query_length": round(avg_len, 1), "risk_score": risk, "risk_level": risk_level, "indicators": indicators, }) return sorted(findings, key=lambda x: x["risk_score"], reverse=True) def run_hunt(input_path: str, output_dir: str) -> None: """Execute C2 beaconing hunt.""" print(f"[*] C2 Beaconing Hunt - {datetime.datetime.now().isoformat()}") connections = parse_logs(input_path) normalized = [normalize_connection(c) for c in connections] print(f"[*] Loaded {len(normalized)} connections") beacon_findings = detect_beaconing(normalized) dns_findings = detect_dns_tunneling(normalized) all_findings = beacon_findings + dns_findings print(f"[*] Beacon detections: {len(beacon_findings)}") print(f"[*] DNS tunnel detections: {len(dns_findings)}") output_path = Path(output_dir) output_path.mkdir(parents=True, exist_ok=True) with open(output_path / "c2_beacon_findings.json", "w", encoding="utf-8") as f: json.dump({ "hunt_id": f"TH-C2-{datetime.date.today().isoformat()}", "total_connections": len(normalized), "beacon_findings": len(beacon_findings), "dns_tunnel_findings": len(dns_findings), "findings": all_findings, }, f, indent=2) with open(output_path / "hunt_report.md", "w", encoding="utf-8") as f: f.write(f"# C2 Beaconing Hunt Report\n\n") f.write(f"**Date**: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") f.write(f"**Connections Analyzed**: {len(normalized)}\n\n") f.write("## Beaconing Detections\n\n") for bf in beacon_findings[:20]: f.write(f"### [{bf['risk_level']}] {bf['src_ip']} -> {bf['destination']}\n") f.write(f"- Interval: {bf['avg_interval_sec']}s (CV: {bf['coefficient_of_variation']})\n") f.write(f"- Jitter: ~{bf['estimated_jitter_pct']}%\n") f.write(f"- Connections: {bf['connection_count']}\n\n") f.write("## DNS Tunneling Detections\n\n") for df in dns_findings[:10]: f.write(f"### [{df['risk_level']}] {df['domain']}\n") f.write(f"- Queries: {df['query_count']}, Unique Subdomains: {df['unique_subdomains']}\n\n") print(f"[+] Results written to {output_dir}") def main(): parser = argparse.ArgumentParser(description="C2 Beaconing Detection") subparsers = parser.add_subparsers(dest="command") hunt_p = subparsers.add_parser("hunt") hunt_p.add_argument("--input", "-i", required=True) hunt_p.add_argument("--output", "-o", default="./c2_hunt_output") subparsers.add_parser("queries", help="Print hunting queries") args = parser.parse_args() if args.command == "hunt": run_hunt(args.input, args.output) elif args.command == "queries": print("=== Splunk Beaconing Queries ===\n") print("--- HTTP/S Beacon Frequency ---") print("""index=proxy | bin _time span=1s | stats count by src_ip dest _time | streamstats current=f last(_time) as prev_time by src_ip dest | eval interval=_time-prev_time | stats count avg(interval) as avg stdev(interval) as sd by src_ip dest | eval cv=sd/avg | where count>50 AND cv<0.20 AND avg>30 | sort cv""") else: parser.print_help() if __name__ == "__main__": main()