Files
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

169 lines
6.0 KiB
Python

#!/usr/bin/env python3
"""DNS exfiltration detection agent using entropy analysis and query pattern anomalies.
Analyzes DNS query logs for tunneling indicators: high entropy subdomains,
excessive query length, abnormal TXT record usage, and volume spikes.
"""
import argparse
import json
import math
from collections import Counter, defaultdict
from datetime import datetime
KNOWN_TUNNEL_DOMAINS = {
"dnscat2", "iodine", "dns2tcp", "heyoka", "ozyman",
"tuns", "dnscapy", "dns-tunnel",
}
TXT_THRESHOLD = 0.3
ENTROPY_THRESHOLD = 3.5
SUBDOMAIN_LENGTH_THRESHOLD = 40
QUERY_RATE_THRESHOLD = 100
def calculate_entropy(text):
if not text:
return 0.0
freq = Counter(text)
length = len(text)
return -sum((count / length) * math.log2(count / length) for count in freq.values())
def parse_dns_log(filepath, log_format="zeek"):
queries = []
with open(filepath, "r") as f:
if log_format == "zeek":
headers = None
for line in f:
if line.startswith("#fields"):
headers = line.strip().split("\t")[1:]
continue
if line.startswith("#"):
continue
if not headers:
continue
fields = line.strip().split("\t")
if len(fields) >= len(headers):
record = dict(zip(headers, fields))
queries.append({
"timestamp": record.get("ts", ""),
"source": record.get("id.orig_h", ""),
"query": record.get("query", ""),
"qtype": record.get("qtype_name", record.get("qtype", "")),
"rcode": record.get("rcode_name", ""),
"answers": record.get("answers", ""),
})
else:
for line in f:
parts = line.strip().split()
if len(parts) >= 3:
queries.append({
"timestamp": parts[0],
"source": parts[1] if len(parts) > 3 else "",
"query": parts[-2] if len(parts) > 2 else parts[1],
"qtype": parts[-1] if len(parts) > 2 else "",
})
return queries
def analyze_queries(queries):
findings = []
domain_stats = defaultdict(lambda: {"count": 0, "sources": set(),
"entropies": [], "lengths": [],
"txt_count": 0, "total": 0})
for q in queries:
query = q.get("query", "")
if not query or query == "-":
continue
parts = query.rstrip(".").split(".")
if len(parts) < 2:
continue
base_domain = ".".join(parts[-2:])
subdomain = ".".join(parts[:-2])
stats = domain_stats[base_domain]
stats["count"] += 1
stats["total"] += 1
stats["sources"].add(q.get("source", ""))
if subdomain:
entropy = calculate_entropy(subdomain.replace(".", ""))
stats["entropies"].append(entropy)
stats["lengths"].append(len(subdomain))
if entropy > ENTROPY_THRESHOLD and len(subdomain) > SUBDOMAIN_LENGTH_THRESHOLD:
findings.append({
"type": "high_entropy_long_subdomain",
"query": query,
"subdomain": subdomain,
"entropy": round(entropy, 3),
"length": len(subdomain),
"source": q.get("source", ""),
"severity": "HIGH",
})
if q.get("qtype", "").upper() in ("TXT", "NULL", "CNAME"):
stats["txt_count"] += 1
for domain, stats in domain_stats.items():
if stats["total"] > QUERY_RATE_THRESHOLD:
avg_entropy = (sum(stats["entropies"]) / len(stats["entropies"])
if stats["entropies"] else 0)
avg_length = (sum(stats["lengths"]) / len(stats["lengths"])
if stats["lengths"] else 0)
txt_ratio = stats["txt_count"] / stats["total"]
score = 0
if avg_entropy > ENTROPY_THRESHOLD:
score += 30
if avg_length > 30:
score += 20
if txt_ratio > TXT_THRESHOLD:
score += 25
if stats["total"] > 500:
score += 25
if score >= 50:
findings.append({
"type": "suspected_dns_tunnel",
"domain": domain,
"total_queries": stats["total"],
"avg_entropy": round(avg_entropy, 3),
"avg_subdomain_length": round(avg_length, 1),
"txt_ratio": round(txt_ratio, 3),
"tunnel_score": score,
"unique_sources": len(stats["sources"]),
"severity": "CRITICAL" if score >= 75 else "HIGH",
})
return findings
def main():
global ENTROPY_THRESHOLD, SUBDOMAIN_LENGTH_THRESHOLD
parser = argparse.ArgumentParser(description="DNS Exfiltration Detector")
parser.add_argument("--dns-log", required=True, help="DNS log file (Zeek or text)")
parser.add_argument("--format", choices=["zeek", "text"], default="zeek")
parser.add_argument("--entropy-threshold", type=float, default=ENTROPY_THRESHOLD)
parser.add_argument("--length-threshold", type=int, default=SUBDOMAIN_LENGTH_THRESHOLD)
args = parser.parse_args()
ENTROPY_THRESHOLD = args.entropy_threshold
SUBDOMAIN_LENGTH_THRESHOLD = args.length_threshold
queries = parse_dns_log(args.dns_log, args.format)
findings = analyze_queries(queries)
results = {
"timestamp": datetime.utcnow().isoformat() + "Z",
"total_queries_analyzed": len(queries),
"findings": findings,
"total_findings": len(findings),
}
print(json.dumps(results, indent=2))
if __name__ == "__main__":
main()