Files
mukul975 27c6414ca5 Add folder anatomy (scripts/agent.py + references/api-reference.md) for 648 cybersecurity skills
Complete skill folder anatomy across all cybersecurity skills:
- scripts/agent.py: 80-150 line Python agents using real libraries (impacket,
  boto3, azure-mgmt-*, kubernetes, pefile, yara, scapy, shodan, stix2, etc.)
- references/api-reference.md: real API documentation with method signatures
- LICENSE: MIT license for all skill folders
2026-03-10 21:02:12 +01:00

171 lines
6.0 KiB
Python

#!/usr/bin/env python3
"""DNS exfiltration detection agent using entropy analysis and query pattern anomalies.
Analyzes DNS query logs for tunneling indicators: high entropy subdomains,
excessive query length, abnormal TXT record usage, and volume spikes.
"""
import argparse
import json
import math
import re
import sys
from collections import Counter, defaultdict
from datetime import datetime
KNOWN_TUNNEL_DOMAINS = {
"dnscat2", "iodine", "dns2tcp", "heyoka", "ozyman",
"tuns", "dnscapy", "dns-tunnel",
}
TXT_THRESHOLD = 0.3
ENTROPY_THRESHOLD = 3.5
SUBDOMAIN_LENGTH_THRESHOLD = 40
QUERY_RATE_THRESHOLD = 100
def calculate_entropy(text):
if not text:
return 0.0
freq = Counter(text)
length = len(text)
return -sum((count / length) * math.log2(count / length) for count in freq.values())
def parse_dns_log(filepath, log_format="zeek"):
queries = []
with open(filepath, "r") as f:
if log_format == "zeek":
headers = None
for line in f:
if line.startswith("#fields"):
headers = line.strip().split("\t")[1:]
continue
if line.startswith("#"):
continue
if not headers:
continue
fields = line.strip().split("\t")
if len(fields) >= len(headers):
record = dict(zip(headers, fields))
queries.append({
"timestamp": record.get("ts", ""),
"source": record.get("id.orig_h", ""),
"query": record.get("query", ""),
"qtype": record.get("qtype_name", record.get("qtype", "")),
"rcode": record.get("rcode_name", ""),
"answers": record.get("answers", ""),
})
else:
for line in f:
parts = line.strip().split()
if len(parts) >= 3:
queries.append({
"timestamp": parts[0],
"source": parts[1] if len(parts) > 3 else "",
"query": parts[-2] if len(parts) > 2 else parts[1],
"qtype": parts[-1] if len(parts) > 2 else "",
})
return queries
def analyze_queries(queries):
findings = []
domain_stats = defaultdict(lambda: {"count": 0, "sources": set(),
"entropies": [], "lengths": [],
"txt_count": 0, "total": 0})
for q in queries:
query = q.get("query", "")
if not query or query == "-":
continue
parts = query.rstrip(".").split(".")
if len(parts) < 2:
continue
base_domain = ".".join(parts[-2:])
subdomain = ".".join(parts[:-2])
stats = domain_stats[base_domain]
stats["count"] += 1
stats["total"] += 1
stats["sources"].add(q.get("source", ""))
if subdomain:
entropy = calculate_entropy(subdomain.replace(".", ""))
stats["entropies"].append(entropy)
stats["lengths"].append(len(subdomain))
if entropy > ENTROPY_THRESHOLD and len(subdomain) > SUBDOMAIN_LENGTH_THRESHOLD:
findings.append({
"type": "high_entropy_long_subdomain",
"query": query,
"subdomain": subdomain,
"entropy": round(entropy, 3),
"length": len(subdomain),
"source": q.get("source", ""),
"severity": "HIGH",
})
if q.get("qtype", "").upper() in ("TXT", "NULL", "CNAME"):
stats["txt_count"] += 1
for domain, stats in domain_stats.items():
if stats["total"] > QUERY_RATE_THRESHOLD:
avg_entropy = (sum(stats["entropies"]) / len(stats["entropies"])
if stats["entropies"] else 0)
avg_length = (sum(stats["lengths"]) / len(stats["lengths"])
if stats["lengths"] else 0)
txt_ratio = stats["txt_count"] / stats["total"]
score = 0
if avg_entropy > ENTROPY_THRESHOLD:
score += 30
if avg_length > 30:
score += 20
if txt_ratio > TXT_THRESHOLD:
score += 25
if stats["total"] > 500:
score += 25
if score >= 50:
findings.append({
"type": "suspected_dns_tunnel",
"domain": domain,
"total_queries": stats["total"],
"avg_entropy": round(avg_entropy, 3),
"avg_subdomain_length": round(avg_length, 1),
"txt_ratio": round(txt_ratio, 3),
"tunnel_score": score,
"unique_sources": len(stats["sources"]),
"severity": "CRITICAL" if score >= 75 else "HIGH",
})
return findings
def main():
parser = argparse.ArgumentParser(description="DNS Exfiltration Detector")
parser.add_argument("--dns-log", required=True, help="DNS log file (Zeek or text)")
parser.add_argument("--format", choices=["zeek", "text"], default="zeek")
parser.add_argument("--entropy-threshold", type=float, default=ENTROPY_THRESHOLD)
parser.add_argument("--length-threshold", type=int, default=SUBDOMAIN_LENGTH_THRESHOLD)
args = parser.parse_args()
global ENTROPY_THRESHOLD, SUBDOMAIN_LENGTH_THRESHOLD
ENTROPY_THRESHOLD = args.entropy_threshold
SUBDOMAIN_LENGTH_THRESHOLD = args.length_threshold
queries = parse_dns_log(args.dns_log, args.format)
findings = analyze_queries(queries)
results = {
"timestamp": datetime.utcnow().isoformat() + "Z",
"total_queries_analyzed": len(queries),
"findings": findings,
"total_findings": len(findings),
}
print(json.dumps(results, indent=2))
if __name__ == "__main__":
main()