Files
T
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

170 lines
5.8 KiB
Python

#!/usr/bin/env python3
"""Agent for hunting data exfiltration indicators in network traffic."""
import argparse
import csv
import json
import math
from collections import defaultdict
from datetime import datetime, timezone
DNS_EXFIL_ENTROPY_THRESHOLD = 3.5
DNS_LABEL_LENGTH_THRESHOLD = 40
LARGE_UPLOAD_THRESHOLD_MB = 50
SUSPICIOUS_PORTS = {
20: "FTP Data", 21: "FTP", 22: "SSH/SCP", 53: "DNS",
443: "HTTPS", 993: "IMAPS", 995: "POP3S",
8443: "Alt HTTPS", 6667: "IRC",
}
def shannon_entropy(data):
"""Calculate Shannon entropy of a string."""
if not data:
return 0.0
freq = defaultdict(int)
for c in data:
freq[c] += 1
length = len(data)
return -sum((count/length) * math.log2(count/length) for count in freq.values())
def analyze_dns_queries(filepath):
"""Analyze DNS query log for exfiltration indicators."""
findings = []
domain_stats = defaultdict(lambda: {"count": 0, "total_length": 0, "queries": []})
try:
with open(filepath, "r") as f:
reader = csv.DictReader(f, delimiter="\t")
for row in reader:
query = row.get("query", "")
if not query:
continue
parts = query.split(".")
if len(parts) < 2:
continue
domain = ".".join(parts[-2:])
subdomain = ".".join(parts[:-2])
domain_stats[domain]["count"] += 1
domain_stats[domain]["total_length"] += len(subdomain)
domain_stats[domain]["queries"].append(subdomain)
except (OSError, csv.Error):
return findings
for domain, stats in domain_stats.items():
if stats["count"] < 5:
continue
avg_subdomain_len = stats["total_length"] / stats["count"]
all_subdomains = "".join(stats["queries"])
entropy = shannon_entropy(all_subdomains)
if entropy > DNS_EXFIL_ENTROPY_THRESHOLD and avg_subdomain_len > 20:
findings.append({
"type": "dns_exfiltration",
"domain": domain,
"query_count": stats["count"],
"avg_subdomain_length": round(avg_subdomain_len, 1),
"entropy": round(entropy, 3),
"severity": "CRITICAL",
})
return findings
def analyze_network_flows(filepath):
"""Analyze network flow data for large outbound transfers."""
findings = []
dest_bytes = defaultdict(int)
try:
with open(filepath, "r") as f:
reader = csv.DictReader(f, delimiter="\t")
for row in reader:
dst = row.get("id.resp_h", row.get("dst", ""))
orig_bytes = int(row.get("orig_bytes", 0) or 0)
dest_bytes[dst] += orig_bytes
except (OSError, csv.Error, ValueError):
return findings
for dst, total in dest_bytes.items():
mb = total / (1024 * 1024)
if mb >= LARGE_UPLOAD_THRESHOLD_MB:
findings.append({
"type": "large_outbound_transfer",
"destination": dst,
"total_bytes": total,
"total_mb": round(mb, 2),
"severity": "HIGH",
})
return findings
def analyze_off_hours_traffic(filepath):
"""Check for significant data transfers during off-hours."""
findings = []
off_hours_transfers = defaultdict(int)
try:
with open(filepath, "r") as f:
reader = csv.DictReader(f, delimiter="\t")
for row in reader:
ts = float(row.get("ts", 0))
hour = datetime.fromtimestamp(ts).hour
if hour < 6 or hour > 22:
dst = row.get("id.resp_h", row.get("dst", ""))
orig_bytes = int(row.get("orig_bytes", 0) or 0)
off_hours_transfers[dst] += orig_bytes
except (OSError, csv.Error, ValueError):
return findings
for dst, total in off_hours_transfers.items():
mb = total / (1024 * 1024)
if mb >= 10:
findings.append({
"type": "off_hours_transfer",
"destination": dst,
"total_mb": round(mb, 2),
"severity": "MEDIUM",
})
return findings
def main():
parser = argparse.ArgumentParser(
description="Data exfiltration indicator hunter"
)
parser.add_argument("--conn-log", help="Zeek conn.log or network flow CSV")
parser.add_argument("--dns-log", help="Zeek dns.log or DNS query CSV")
parser.add_argument("--output", "-o", help="Output JSON report")
parser.add_argument("--verbose", "-v", action="store_true")
args = parser.parse_args()
if not args.conn_log and not args.dns_log:
parser.error("At least one of --conn-log or --dns-log is required")
print("[*] Data Exfiltration Indicator Hunter")
report = {"timestamp": datetime.now(timezone.utc).isoformat(), "findings": []}
if args.dns_log:
report["findings"].extend(analyze_dns_queries(args.dns_log))
if args.conn_log:
report["findings"].extend(analyze_network_flows(args.conn_log))
report["findings"].extend(analyze_off_hours_traffic(args.conn_log))
report["risk_level"] = (
"CRITICAL" if any(f["severity"] == "CRITICAL" for f in report["findings"])
else "HIGH" if any(f["severity"] == "HIGH" for f in report["findings"])
else "MEDIUM" if report["findings"] else "LOW"
)
report["total_findings"] = len(report["findings"])
print(f"[*] {report['total_findings']} exfiltration indicators found")
if args.output:
with open(args.output, "w") as f:
json.dump(report, f, indent=2)
print(f"[*] Report saved to {args.output}")
else:
print(json.dumps(report, indent=2))
if __name__ == "__main__":
main()