mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-06-12 14:14:56 +03:00
c47eed6a64
- Fix 25 shell=True subprocess calls with list-based commands - Fix 49 verify=False in defensive skills (env-var override) - Add timeout to 231 HTTP/subprocess/socket calls - Fix 6 SQL injection patterns with whitelist validation - Replace 8 __import__() with standard imports - Remove 701 unused imports across 442 files - Add authorized-testing disclaimers to all offensive skills - Complete 11 incomplete skill directories - Expand 10 stub SKILL.md files with full content - Fix 2 YAML parse errors in frontmatter - Fix 5 pre-existing syntax errors - Convert 22 hardcoded paths/ports to environment variables - Back up 21 redundant skill pairs to .bak - Fix 2 global declaration errors - 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE) - 0 compile errors across all 724 agent.py files
230 lines
8.5 KiB
Python
230 lines
8.5 KiB
Python
#!/usr/bin/env python3
|
|
"""DNS exfiltration detection agent using entropy analysis and query pattern detection."""
|
|
|
|
import math
|
|
from collections import Counter, defaultdict
|
|
|
|
|
|
def shannon_entropy(text):
|
|
"""Calculate Shannon entropy of a string."""
|
|
if not text:
|
|
return 0.0
|
|
counter = Counter(text.lower())
|
|
length = len(text)
|
|
entropy = -sum(
|
|
(count / length) * math.log2(count / length)
|
|
for count in counter.values()
|
|
)
|
|
return round(entropy, 4)
|
|
|
|
|
|
def extract_subdomain(fqdn):
|
|
"""Extract the subdomain portion from a fully qualified domain name."""
|
|
parts = fqdn.rstrip(".").split(".")
|
|
if len(parts) > 2:
|
|
return ".".join(parts[:-2])
|
|
return ""
|
|
|
|
|
|
def extract_registered_domain(fqdn):
|
|
"""Extract the registered domain (SLD + TLD) from an FQDN."""
|
|
parts = fqdn.rstrip(".").split(".")
|
|
if len(parts) >= 2:
|
|
return ".".join(parts[-2:])
|
|
return fqdn
|
|
|
|
|
|
def detect_tunneling(dns_records, subdomain_len_threshold=50, min_queries=20):
|
|
"""Detect DNS tunneling based on subdomain length anomalies."""
|
|
domain_stats = defaultdict(lambda: {"queries": 0, "unique_queries": set(),
|
|
"subdomain_lengths": [], "sources": set()})
|
|
for record in dns_records:
|
|
query = record.get("query", "")
|
|
src = record.get("src_ip", "unknown")
|
|
subdomain = extract_subdomain(query)
|
|
reg_domain = extract_registered_domain(query)
|
|
if len(subdomain) > subdomain_len_threshold:
|
|
stats = domain_stats[reg_domain]
|
|
stats["queries"] += 1
|
|
stats["unique_queries"].add(query)
|
|
stats["subdomain_lengths"].append(len(subdomain))
|
|
stats["sources"].add(src)
|
|
alerts = []
|
|
for domain, stats in domain_stats.items():
|
|
if stats["queries"] >= min_queries:
|
|
avg_len = sum(stats["subdomain_lengths"]) / len(stats["subdomain_lengths"])
|
|
max_len = max(stats["subdomain_lengths"])
|
|
alerts.append({
|
|
"domain": domain,
|
|
"queries": stats["queries"],
|
|
"unique_queries": len(stats["unique_queries"]),
|
|
"avg_subdomain_length": round(avg_len, 1),
|
|
"max_subdomain_length": max_len,
|
|
"sources": list(stats["sources"]),
|
|
"verdict": "CRITICAL - Likely DNS tunneling",
|
|
})
|
|
return sorted(alerts, key=lambda x: x["avg_subdomain_length"], reverse=True)
|
|
|
|
|
|
def detect_dga(dns_records, entropy_threshold=3.5, min_sld_length=12):
|
|
"""Detect Domain Generation Algorithm queries using entropy scoring."""
|
|
suspicious = defaultdict(lambda: {"count": 0, "sources": set(), "entropies": []})
|
|
for record in dns_records:
|
|
query = record.get("query", "").rstrip(".")
|
|
src = record.get("src_ip", "unknown")
|
|
parts = query.split(".")
|
|
if len(parts) < 2:
|
|
continue
|
|
sld = parts[-2]
|
|
if len(sld) < min_sld_length:
|
|
continue
|
|
ent = shannon_entropy(sld)
|
|
if ent > entropy_threshold:
|
|
suspicious[query]["count"] += 1
|
|
suspicious[query]["sources"].add(src)
|
|
suspicious[query]["entropies"].append(ent)
|
|
alerts = []
|
|
for domain, data in suspicious.items():
|
|
avg_entropy = sum(data["entropies"]) / len(data["entropies"])
|
|
alerts.append({
|
|
"domain": domain,
|
|
"queries": data["count"],
|
|
"avg_entropy": round(avg_entropy, 4),
|
|
"sources": list(data["sources"]),
|
|
"verdict": "HIGH - Possible DGA domain",
|
|
})
|
|
return sorted(alerts, key=lambda x: x["avg_entropy"], reverse=True)
|
|
|
|
|
|
def detect_volume_anomaly(dns_records, z_score_threshold=3.0):
|
|
"""Detect hosts with anomalously high DNS query volumes."""
|
|
host_counts = defaultdict(int)
|
|
for record in dns_records:
|
|
src = record.get("src_ip", "unknown")
|
|
host_counts[src] += 1
|
|
if not host_counts:
|
|
return []
|
|
values = list(host_counts.values())
|
|
mean_q = sum(values) / len(values)
|
|
if len(values) < 2:
|
|
return []
|
|
variance = sum((x - mean_q) ** 2 for x in values) / (len(values) - 1)
|
|
stdev_q = variance ** 0.5
|
|
if stdev_q == 0:
|
|
return []
|
|
anomalies = []
|
|
for host, count in host_counts.items():
|
|
z = (count - mean_q) / stdev_q
|
|
if z > z_score_threshold:
|
|
anomalies.append({
|
|
"src_ip": host,
|
|
"queries": count,
|
|
"z_score": round(z, 2),
|
|
"mean": round(mean_q, 1),
|
|
"verdict": "HIGH - Anomalous query volume",
|
|
})
|
|
return sorted(anomalies, key=lambda x: x["z_score"], reverse=True)
|
|
|
|
|
|
def detect_txt_abuse(dns_records, threshold=100):
|
|
"""Detect excessive TXT record queries (common tunneling method)."""
|
|
txt_counts = defaultdict(lambda: {"count": 0, "unique_domains": set()})
|
|
for record in dns_records:
|
|
qtype = str(record.get("query_type", "")).upper()
|
|
if qtype in ("TXT", "16"):
|
|
src = record.get("src_ip", "unknown")
|
|
txt_counts[src]["count"] += 1
|
|
txt_counts[src]["unique_domains"].add(record.get("query", ""))
|
|
alerts = []
|
|
for src, data in txt_counts.items():
|
|
if data["count"] > threshold:
|
|
level = "CRITICAL" if data["count"] > 1000 else "HIGH" if data["count"] > 500 else "MEDIUM"
|
|
alerts.append({
|
|
"src_ip": src,
|
|
"txt_queries": data["count"],
|
|
"unique_domains": len(data["unique_domains"]),
|
|
"verdict": f"{level} - Possible DNS tunneling via TXT records",
|
|
})
|
|
return sorted(alerts, key=lambda x: x["txt_queries"], reverse=True)
|
|
|
|
|
|
def estimate_exfil_volume(dns_records, target_domain):
|
|
"""Estimate data volume encoded in DNS queries to a specific domain."""
|
|
total_encoded_bytes = 0
|
|
query_count = 0
|
|
for record in dns_records:
|
|
query = record.get("query", "")
|
|
if target_domain in query:
|
|
subdomain = extract_subdomain(query)
|
|
total_encoded_bytes += len(subdomain)
|
|
query_count += 1
|
|
decoded_bytes = int(total_encoded_bytes * 0.75) # Base64 decode factor
|
|
return {
|
|
"target_domain": target_domain,
|
|
"total_queries": query_count,
|
|
"encoded_bytes": total_encoded_bytes,
|
|
"estimated_decoded_bytes": decoded_bytes,
|
|
"estimated_kb": round(decoded_bytes / 1024, 1),
|
|
"estimated_mb": round(decoded_bytes / (1024 * 1024), 3),
|
|
}
|
|
|
|
|
|
def parse_zeek_dns_log(log_path):
|
|
"""Parse a Zeek dns.log file into structured records."""
|
|
records = []
|
|
with open(log_path, "r") as f:
|
|
for line in f:
|
|
if line.startswith("#"):
|
|
continue
|
|
parts = line.strip().split("\t")
|
|
if len(parts) >= 10:
|
|
records.append({
|
|
"timestamp": parts[0],
|
|
"src_ip": parts[2],
|
|
"src_port": parts[3],
|
|
"dst_ip": parts[4],
|
|
"query": parts[9] if len(parts) > 9 else "",
|
|
"query_type": parts[13] if len(parts) > 13 else "",
|
|
})
|
|
return records
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("=" * 60)
|
|
print("DNS Exfiltration Detection Agent")
|
|
print("Tunneling, DGA, volume anomaly, and TXT abuse detection")
|
|
print("=" * 60)
|
|
|
|
# Demo with synthetic DNS records
|
|
demo_records = [
|
|
{"query": f"{'a' * 60}.evil-tunnel.com", "src_ip": "192.168.1.105",
|
|
"query_type": "TXT"} for _ in range(50)
|
|
] + [
|
|
{"query": "x8kj2m9p4qw7nz3.xyz", "src_ip": "192.168.1.110",
|
|
"query_type": "A"} for _ in range(5)
|
|
] + [
|
|
{"query": "google.com", "src_ip": "192.168.1.50", "query_type": "A"}
|
|
for _ in range(10)
|
|
]
|
|
|
|
print("\n--- DNS Tunneling Detection ---")
|
|
tunneling = detect_tunneling(demo_records, subdomain_len_threshold=30, min_queries=10)
|
|
for t in tunneling:
|
|
print(f"[!] {t['domain']}: {t['queries']} queries, "
|
|
f"avg subdomain len={t['avg_subdomain_length']}")
|
|
|
|
print("\n--- DGA Detection ---")
|
|
dga = detect_dga(demo_records, entropy_threshold=3.0, min_sld_length=10)
|
|
for d in dga[:5]:
|
|
print(f"[!] {d['domain']}: entropy={d['avg_entropy']}")
|
|
|
|
print("\n--- TXT Record Abuse ---")
|
|
txt = detect_txt_abuse(demo_records, threshold=10)
|
|
for t in txt:
|
|
print(f"[!] {t['src_ip']}: {t['txt_queries']} TXT queries")
|
|
|
|
print("\n--- Entropy Examples ---")
|
|
examples = ["google", "x8kj2m9p4qw7n", "aGVsbG8gd29ybGQ"]
|
|
for ex in examples:
|
|
print(f" '{ex}' -> entropy={shannon_entropy(ex)}")
|