#!/usr/bin/env python3 """DNS exfiltration detection agent using entropy analysis and query pattern anomalies. Analyzes DNS query logs for tunneling indicators: high entropy subdomains, excessive query length, abnormal TXT record usage, and volume spikes. """ import argparse import json import math import re import sys from collections import Counter, defaultdict from datetime import datetime KNOWN_TUNNEL_DOMAINS = { "dnscat2", "iodine", "dns2tcp", "heyoka", "ozyman", "tuns", "dnscapy", "dns-tunnel", } TXT_THRESHOLD = 0.3 ENTROPY_THRESHOLD = 3.5 SUBDOMAIN_LENGTH_THRESHOLD = 40 QUERY_RATE_THRESHOLD = 100 def calculate_entropy(text): if not text: return 0.0 freq = Counter(text) length = len(text) return -sum((count / length) * math.log2(count / length) for count in freq.values()) def parse_dns_log(filepath, log_format="zeek"): queries = [] with open(filepath, "r") as f: if log_format == "zeek": headers = None for line in f: if line.startswith("#fields"): headers = line.strip().split("\t")[1:] continue if line.startswith("#"): continue if not headers: continue fields = line.strip().split("\t") if len(fields) >= len(headers): record = dict(zip(headers, fields)) queries.append({ "timestamp": record.get("ts", ""), "source": record.get("id.orig_h", ""), "query": record.get("query", ""), "qtype": record.get("qtype_name", record.get("qtype", "")), "rcode": record.get("rcode_name", ""), "answers": record.get("answers", ""), }) else: for line in f: parts = line.strip().split() if len(parts) >= 3: queries.append({ "timestamp": parts[0], "source": parts[1] if len(parts) > 3 else "", "query": parts[-2] if len(parts) > 2 else parts[1], "qtype": parts[-1] if len(parts) > 2 else "", }) return queries def analyze_queries(queries): findings = [] domain_stats = defaultdict(lambda: {"count": 0, "sources": set(), "entropies": [], "lengths": [], "txt_count": 0, "total": 0}) for q in queries: query = q.get("query", "") if not query or query == "-": continue parts = query.rstrip(".").split(".") if len(parts) < 2: continue base_domain = ".".join(parts[-2:]) subdomain = ".".join(parts[:-2]) stats = domain_stats[base_domain] stats["count"] += 1 stats["total"] += 1 stats["sources"].add(q.get("source", "")) if subdomain: entropy = calculate_entropy(subdomain.replace(".", "")) stats["entropies"].append(entropy) stats["lengths"].append(len(subdomain)) if entropy > ENTROPY_THRESHOLD and len(subdomain) > SUBDOMAIN_LENGTH_THRESHOLD: findings.append({ "type": "high_entropy_long_subdomain", "query": query, "subdomain": subdomain, "entropy": round(entropy, 3), "length": len(subdomain), "source": q.get("source", ""), "severity": "HIGH", }) if q.get("qtype", "").upper() in ("TXT", "NULL", "CNAME"): stats["txt_count"] += 1 for domain, stats in domain_stats.items(): if stats["total"] > QUERY_RATE_THRESHOLD: avg_entropy = (sum(stats["entropies"]) / len(stats["entropies"]) if stats["entropies"] else 0) avg_length = (sum(stats["lengths"]) / len(stats["lengths"]) if stats["lengths"] else 0) txt_ratio = stats["txt_count"] / stats["total"] score = 0 if avg_entropy > ENTROPY_THRESHOLD: score += 30 if avg_length > 30: score += 20 if txt_ratio > TXT_THRESHOLD: score += 25 if stats["total"] > 500: score += 25 if score >= 50: findings.append({ "type": "suspected_dns_tunnel", "domain": domain, "total_queries": stats["total"], "avg_entropy": round(avg_entropy, 3), "avg_subdomain_length": round(avg_length, 1), "txt_ratio": round(txt_ratio, 3), "tunnel_score": score, "unique_sources": len(stats["sources"]), "severity": "CRITICAL" if score >= 75 else "HIGH", }) return findings def main(): parser = argparse.ArgumentParser(description="DNS Exfiltration Detector") parser.add_argument("--dns-log", required=True, help="DNS log file (Zeek or text)") parser.add_argument("--format", choices=["zeek", "text"], default="zeek") parser.add_argument("--entropy-threshold", type=float, default=ENTROPY_THRESHOLD) parser.add_argument("--length-threshold", type=int, default=SUBDOMAIN_LENGTH_THRESHOLD) args = parser.parse_args() global ENTROPY_THRESHOLD, SUBDOMAIN_LENGTH_THRESHOLD ENTROPY_THRESHOLD = args.entropy_threshold SUBDOMAIN_LENGTH_THRESHOLD = args.length_threshold queries = parse_dns_log(args.dns_log, args.format) findings = analyze_queries(queries) results = { "timestamp": datetime.utcnow().isoformat() + "Z", "total_queries_analyzed": len(queries), "findings": findings, "total_findings": len(findings), } print(json.dumps(results, indent=2)) if __name__ == "__main__": main()