Files
T
mukul975 c21af3347e Complete folder anatomy for all 649 cybersecurity skills + update LICENSE to Mahipal
- Add scripts/agent.py and references/api-reference.md to all remaining skills
- Update all 648 LICENSE files: copyright now reads 'Mahipal'
- Add implementing-security-monitoring-with-datadog (new skill with full anatomy)
- All 649 skills now have: SKILL.md, LICENSE, scripts/agent.py, references/api-reference.md
2026-03-11 00:22:12 +01:00

171 lines
5.8 KiB
Python

#!/usr/bin/env python3
"""Agent for hunting data exfiltration indicators in network traffic."""
import argparse
import csv
import json
import math
import sys
from collections import defaultdict
from datetime import datetime, timezone
DNS_EXFIL_ENTROPY_THRESHOLD = 3.5
DNS_LABEL_LENGTH_THRESHOLD = 40
LARGE_UPLOAD_THRESHOLD_MB = 50
SUSPICIOUS_PORTS = {
20: "FTP Data", 21: "FTP", 22: "SSH/SCP", 53: "DNS",
443: "HTTPS", 993: "IMAPS", 995: "POP3S",
8443: "Alt HTTPS", 6667: "IRC",
}
def shannon_entropy(data):
"""Calculate Shannon entropy of a string."""
if not data:
return 0.0
freq = defaultdict(int)
for c in data:
freq[c] += 1
length = len(data)
return -sum((count/length) * math.log2(count/length) for count in freq.values())
def analyze_dns_queries(filepath):
"""Analyze DNS query log for exfiltration indicators."""
findings = []
domain_stats = defaultdict(lambda: {"count": 0, "total_length": 0, "queries": []})
try:
with open(filepath, "r") as f:
reader = csv.DictReader(f, delimiter="\t")
for row in reader:
query = row.get("query", "")
if not query:
continue
parts = query.split(".")
if len(parts) < 2:
continue
domain = ".".join(parts[-2:])
subdomain = ".".join(parts[:-2])
domain_stats[domain]["count"] += 1
domain_stats[domain]["total_length"] += len(subdomain)
domain_stats[domain]["queries"].append(subdomain)
except (OSError, csv.Error):
return findings
for domain, stats in domain_stats.items():
if stats["count"] < 5:
continue
avg_subdomain_len = stats["total_length"] / stats["count"]
all_subdomains = "".join(stats["queries"])
entropy = shannon_entropy(all_subdomains)
if entropy > DNS_EXFIL_ENTROPY_THRESHOLD and avg_subdomain_len > 20:
findings.append({
"type": "dns_exfiltration",
"domain": domain,
"query_count": stats["count"],
"avg_subdomain_length": round(avg_subdomain_len, 1),
"entropy": round(entropy, 3),
"severity": "CRITICAL",
})
return findings
def analyze_network_flows(filepath):
"""Analyze network flow data for large outbound transfers."""
findings = []
dest_bytes = defaultdict(int)
try:
with open(filepath, "r") as f:
reader = csv.DictReader(f, delimiter="\t")
for row in reader:
dst = row.get("id.resp_h", row.get("dst", ""))
orig_bytes = int(row.get("orig_bytes", 0) or 0)
dest_bytes[dst] += orig_bytes
except (OSError, csv.Error, ValueError):
return findings
for dst, total in dest_bytes.items():
mb = total / (1024 * 1024)
if mb >= LARGE_UPLOAD_THRESHOLD_MB:
findings.append({
"type": "large_outbound_transfer",
"destination": dst,
"total_bytes": total,
"total_mb": round(mb, 2),
"severity": "HIGH",
})
return findings
def analyze_off_hours_traffic(filepath):
"""Check for significant data transfers during off-hours."""
findings = []
off_hours_transfers = defaultdict(int)
try:
with open(filepath, "r") as f:
reader = csv.DictReader(f, delimiter="\t")
for row in reader:
ts = float(row.get("ts", 0))
hour = datetime.fromtimestamp(ts).hour
if hour < 6 or hour > 22:
dst = row.get("id.resp_h", row.get("dst", ""))
orig_bytes = int(row.get("orig_bytes", 0) or 0)
off_hours_transfers[dst] += orig_bytes
except (OSError, csv.Error, ValueError):
return findings
for dst, total in off_hours_transfers.items():
mb = total / (1024 * 1024)
if mb >= 10:
findings.append({
"type": "off_hours_transfer",
"destination": dst,
"total_mb": round(mb, 2),
"severity": "MEDIUM",
})
return findings
def main():
parser = argparse.ArgumentParser(
description="Data exfiltration indicator hunter"
)
parser.add_argument("--conn-log", help="Zeek conn.log or network flow CSV")
parser.add_argument("--dns-log", help="Zeek dns.log or DNS query CSV")
parser.add_argument("--output", "-o", help="Output JSON report")
parser.add_argument("--verbose", "-v", action="store_true")
args = parser.parse_args()
if not args.conn_log and not args.dns_log:
parser.error("At least one of --conn-log or --dns-log is required")
print("[*] Data Exfiltration Indicator Hunter")
report = {"timestamp": datetime.now(timezone.utc).isoformat(), "findings": []}
if args.dns_log:
report["findings"].extend(analyze_dns_queries(args.dns_log))
if args.conn_log:
report["findings"].extend(analyze_network_flows(args.conn_log))
report["findings"].extend(analyze_off_hours_traffic(args.conn_log))
report["risk_level"] = (
"CRITICAL" if any(f["severity"] == "CRITICAL" for f in report["findings"])
else "HIGH" if any(f["severity"] == "HIGH" for f in report["findings"])
else "MEDIUM" if report["findings"] else "LOW"
)
report["total_findings"] = len(report["findings"])
print(f"[*] {report['total_findings']} exfiltration indicators found")
if args.output:
with open(args.output, "w") as f:
json.dump(report, f, indent=2)
print(f"[*] Report saved to {args.output}")
else:
print(json.dumps(report, indent=2))
if __name__ == "__main__":
main()