Files
Anthropic-Cybersecurity-Skills/skills/analyzing-dns-logs-for-exfiltration/scripts/agent.py
T
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

230 lines
8.5 KiB
Python

#!/usr/bin/env python3
"""DNS exfiltration detection agent using entropy analysis and query pattern detection."""
import math
from collections import Counter, defaultdict
def shannon_entropy(text):
"""Calculate Shannon entropy of a string."""
if not text:
return 0.0
counter = Counter(text.lower())
length = len(text)
entropy = -sum(
(count / length) * math.log2(count / length)
for count in counter.values()
)
return round(entropy, 4)
def extract_subdomain(fqdn):
"""Extract the subdomain portion from a fully qualified domain name."""
parts = fqdn.rstrip(".").split(".")
if len(parts) > 2:
return ".".join(parts[:-2])
return ""
def extract_registered_domain(fqdn):
"""Extract the registered domain (SLD + TLD) from an FQDN."""
parts = fqdn.rstrip(".").split(".")
if len(parts) >= 2:
return ".".join(parts[-2:])
return fqdn
def detect_tunneling(dns_records, subdomain_len_threshold=50, min_queries=20):
"""Detect DNS tunneling based on subdomain length anomalies."""
domain_stats = defaultdict(lambda: {"queries": 0, "unique_queries": set(),
"subdomain_lengths": [], "sources": set()})
for record in dns_records:
query = record.get("query", "")
src = record.get("src_ip", "unknown")
subdomain = extract_subdomain(query)
reg_domain = extract_registered_domain(query)
if len(subdomain) > subdomain_len_threshold:
stats = domain_stats[reg_domain]
stats["queries"] += 1
stats["unique_queries"].add(query)
stats["subdomain_lengths"].append(len(subdomain))
stats["sources"].add(src)
alerts = []
for domain, stats in domain_stats.items():
if stats["queries"] >= min_queries:
avg_len = sum(stats["subdomain_lengths"]) / len(stats["subdomain_lengths"])
max_len = max(stats["subdomain_lengths"])
alerts.append({
"domain": domain,
"queries": stats["queries"],
"unique_queries": len(stats["unique_queries"]),
"avg_subdomain_length": round(avg_len, 1),
"max_subdomain_length": max_len,
"sources": list(stats["sources"]),
"verdict": "CRITICAL - Likely DNS tunneling",
})
return sorted(alerts, key=lambda x: x["avg_subdomain_length"], reverse=True)
def detect_dga(dns_records, entropy_threshold=3.5, min_sld_length=12):
"""Detect Domain Generation Algorithm queries using entropy scoring."""
suspicious = defaultdict(lambda: {"count": 0, "sources": set(), "entropies": []})
for record in dns_records:
query = record.get("query", "").rstrip(".")
src = record.get("src_ip", "unknown")
parts = query.split(".")
if len(parts) < 2:
continue
sld = parts[-2]
if len(sld) < min_sld_length:
continue
ent = shannon_entropy(sld)
if ent > entropy_threshold:
suspicious[query]["count"] += 1
suspicious[query]["sources"].add(src)
suspicious[query]["entropies"].append(ent)
alerts = []
for domain, data in suspicious.items():
avg_entropy = sum(data["entropies"]) / len(data["entropies"])
alerts.append({
"domain": domain,
"queries": data["count"],
"avg_entropy": round(avg_entropy, 4),
"sources": list(data["sources"]),
"verdict": "HIGH - Possible DGA domain",
})
return sorted(alerts, key=lambda x: x["avg_entropy"], reverse=True)
def detect_volume_anomaly(dns_records, z_score_threshold=3.0):
"""Detect hosts with anomalously high DNS query volumes."""
host_counts = defaultdict(int)
for record in dns_records:
src = record.get("src_ip", "unknown")
host_counts[src] += 1
if not host_counts:
return []
values = list(host_counts.values())
mean_q = sum(values) / len(values)
if len(values) < 2:
return []
variance = sum((x - mean_q) ** 2 for x in values) / (len(values) - 1)
stdev_q = variance ** 0.5
if stdev_q == 0:
return []
anomalies = []
for host, count in host_counts.items():
z = (count - mean_q) / stdev_q
if z > z_score_threshold:
anomalies.append({
"src_ip": host,
"queries": count,
"z_score": round(z, 2),
"mean": round(mean_q, 1),
"verdict": "HIGH - Anomalous query volume",
})
return sorted(anomalies, key=lambda x: x["z_score"], reverse=True)
def detect_txt_abuse(dns_records, threshold=100):
"""Detect excessive TXT record queries (common tunneling method)."""
txt_counts = defaultdict(lambda: {"count": 0, "unique_domains": set()})
for record in dns_records:
qtype = str(record.get("query_type", "")).upper()
if qtype in ("TXT", "16"):
src = record.get("src_ip", "unknown")
txt_counts[src]["count"] += 1
txt_counts[src]["unique_domains"].add(record.get("query", ""))
alerts = []
for src, data in txt_counts.items():
if data["count"] > threshold:
level = "CRITICAL" if data["count"] > 1000 else "HIGH" if data["count"] > 500 else "MEDIUM"
alerts.append({
"src_ip": src,
"txt_queries": data["count"],
"unique_domains": len(data["unique_domains"]),
"verdict": f"{level} - Possible DNS tunneling via TXT records",
})
return sorted(alerts, key=lambda x: x["txt_queries"], reverse=True)
def estimate_exfil_volume(dns_records, target_domain):
"""Estimate data volume encoded in DNS queries to a specific domain."""
total_encoded_bytes = 0
query_count = 0
for record in dns_records:
query = record.get("query", "")
if target_domain in query:
subdomain = extract_subdomain(query)
total_encoded_bytes += len(subdomain)
query_count += 1
decoded_bytes = int(total_encoded_bytes * 0.75) # Base64 decode factor
return {
"target_domain": target_domain,
"total_queries": query_count,
"encoded_bytes": total_encoded_bytes,
"estimated_decoded_bytes": decoded_bytes,
"estimated_kb": round(decoded_bytes / 1024, 1),
"estimated_mb": round(decoded_bytes / (1024 * 1024), 3),
}
def parse_zeek_dns_log(log_path):
"""Parse a Zeek dns.log file into structured records."""
records = []
with open(log_path, "r") as f:
for line in f:
if line.startswith("#"):
continue
parts = line.strip().split("\t")
if len(parts) >= 10:
records.append({
"timestamp": parts[0],
"src_ip": parts[2],
"src_port": parts[3],
"dst_ip": parts[4],
"query": parts[9] if len(parts) > 9 else "",
"query_type": parts[13] if len(parts) > 13 else "",
})
return records
if __name__ == "__main__":
print("=" * 60)
print("DNS Exfiltration Detection Agent")
print("Tunneling, DGA, volume anomaly, and TXT abuse detection")
print("=" * 60)
# Demo with synthetic DNS records
demo_records = [
{"query": f"{'a' * 60}.evil-tunnel.com", "src_ip": "192.168.1.105",
"query_type": "TXT"} for _ in range(50)
] + [
{"query": "x8kj2m9p4qw7nz3.xyz", "src_ip": "192.168.1.110",
"query_type": "A"} for _ in range(5)
] + [
{"query": "google.com", "src_ip": "192.168.1.50", "query_type": "A"}
for _ in range(10)
]
print("\n--- DNS Tunneling Detection ---")
tunneling = detect_tunneling(demo_records, subdomain_len_threshold=30, min_queries=10)
for t in tunneling:
print(f"[!] {t['domain']}: {t['queries']} queries, "
f"avg subdomain len={t['avg_subdomain_length']}")
print("\n--- DGA Detection ---")
dga = detect_dga(demo_records, entropy_threshold=3.0, min_sld_length=10)
for d in dga[:5]:
print(f"[!] {d['domain']}: entropy={d['avg_entropy']}")
print("\n--- TXT Record Abuse ---")
txt = detect_txt_abuse(demo_records, threshold=10)
for t in txt:
print(f"[!] {t['src_ip']}: {t['txt_queries']} TXT queries")
print("\n--- Entropy Examples ---")
examples = ["google", "x8kj2m9p4qw7n", "aGVsbG8gd29ybGQ"]
for ex in examples:
print(f" '{ex}' -> entropy={shannon_entropy(ex)}")