#!/usr/bin/env python3 """Agent for hunting data exfiltration indicators in network traffic.""" import argparse import csv import json import math import sys from collections import defaultdict from datetime import datetime, timezone DNS_EXFIL_ENTROPY_THRESHOLD = 3.5 DNS_LABEL_LENGTH_THRESHOLD = 40 LARGE_UPLOAD_THRESHOLD_MB = 50 SUSPICIOUS_PORTS = { 20: "FTP Data", 21: "FTP", 22: "SSH/SCP", 53: "DNS", 443: "HTTPS", 993: "IMAPS", 995: "POP3S", 8443: "Alt HTTPS", 6667: "IRC", } def shannon_entropy(data): """Calculate Shannon entropy of a string.""" if not data: return 0.0 freq = defaultdict(int) for c in data: freq[c] += 1 length = len(data) return -sum((count/length) * math.log2(count/length) for count in freq.values()) def analyze_dns_queries(filepath): """Analyze DNS query log for exfiltration indicators.""" findings = [] domain_stats = defaultdict(lambda: {"count": 0, "total_length": 0, "queries": []}) try: with open(filepath, "r") as f: reader = csv.DictReader(f, delimiter="\t") for row in reader: query = row.get("query", "") if not query: continue parts = query.split(".") if len(parts) < 2: continue domain = ".".join(parts[-2:]) subdomain = ".".join(parts[:-2]) domain_stats[domain]["count"] += 1 domain_stats[domain]["total_length"] += len(subdomain) domain_stats[domain]["queries"].append(subdomain) except (OSError, csv.Error): return findings for domain, stats in domain_stats.items(): if stats["count"] < 5: continue avg_subdomain_len = stats["total_length"] / stats["count"] all_subdomains = "".join(stats["queries"]) entropy = shannon_entropy(all_subdomains) if entropy > DNS_EXFIL_ENTROPY_THRESHOLD and avg_subdomain_len > 20: findings.append({ "type": "dns_exfiltration", "domain": domain, "query_count": stats["count"], "avg_subdomain_length": round(avg_subdomain_len, 1), "entropy": round(entropy, 3), "severity": "CRITICAL", }) return findings def analyze_network_flows(filepath): """Analyze network flow data for large outbound transfers.""" findings = [] dest_bytes = defaultdict(int) try: with open(filepath, "r") as f: reader = csv.DictReader(f, delimiter="\t") for row in reader: dst = row.get("id.resp_h", row.get("dst", "")) orig_bytes = int(row.get("orig_bytes", 0) or 0) dest_bytes[dst] += orig_bytes except (OSError, csv.Error, ValueError): return findings for dst, total in dest_bytes.items(): mb = total / (1024 * 1024) if mb >= LARGE_UPLOAD_THRESHOLD_MB: findings.append({ "type": "large_outbound_transfer", "destination": dst, "total_bytes": total, "total_mb": round(mb, 2), "severity": "HIGH", }) return findings def analyze_off_hours_traffic(filepath): """Check for significant data transfers during off-hours.""" findings = [] off_hours_transfers = defaultdict(int) try: with open(filepath, "r") as f: reader = csv.DictReader(f, delimiter="\t") for row in reader: ts = float(row.get("ts", 0)) hour = datetime.fromtimestamp(ts).hour if hour < 6 or hour > 22: dst = row.get("id.resp_h", row.get("dst", "")) orig_bytes = int(row.get("orig_bytes", 0) or 0) off_hours_transfers[dst] += orig_bytes except (OSError, csv.Error, ValueError): return findings for dst, total in off_hours_transfers.items(): mb = total / (1024 * 1024) if mb >= 10: findings.append({ "type": "off_hours_transfer", "destination": dst, "total_mb": round(mb, 2), "severity": "MEDIUM", }) return findings def main(): parser = argparse.ArgumentParser( description="Data exfiltration indicator hunter" ) parser.add_argument("--conn-log", help="Zeek conn.log or network flow CSV") parser.add_argument("--dns-log", help="Zeek dns.log or DNS query CSV") parser.add_argument("--output", "-o", help="Output JSON report") parser.add_argument("--verbose", "-v", action="store_true") args = parser.parse_args() if not args.conn_log and not args.dns_log: parser.error("At least one of --conn-log or --dns-log is required") print("[*] Data Exfiltration Indicator Hunter") report = {"timestamp": datetime.now(timezone.utc).isoformat(), "findings": []} if args.dns_log: report["findings"].extend(analyze_dns_queries(args.dns_log)) if args.conn_log: report["findings"].extend(analyze_network_flows(args.conn_log)) report["findings"].extend(analyze_off_hours_traffic(args.conn_log)) report["risk_level"] = ( "CRITICAL" if any(f["severity"] == "CRITICAL" for f in report["findings"]) else "HIGH" if any(f["severity"] == "HIGH" for f in report["findings"]) else "MEDIUM" if report["findings"] else "LOW" ) report["total_findings"] = len(report["findings"]) print(f"[*] {report['total_findings']} exfiltration indicators found") if args.output: with open(args.output, "w") as f: json.dump(report, f, indent=2) print(f"[*] Report saved to {args.output}") else: print(json.dumps(report, indent=2)) if __name__ == "__main__": main()