mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-06-12 14:14:56 +03:00
c47eed6a64
- Fix 25 shell=True subprocess calls with list-based commands - Fix 49 verify=False in defensive skills (env-var override) - Add timeout to 231 HTTP/subprocess/socket calls - Fix 6 SQL injection patterns with whitelist validation - Replace 8 __import__() with standard imports - Remove 701 unused imports across 442 files - Add authorized-testing disclaimers to all offensive skills - Complete 11 incomplete skill directories - Expand 10 stub SKILL.md files with full content - Fix 2 YAML parse errors in frontmatter - Fix 5 pre-existing syntax errors - Convert 22 hardcoded paths/ports to environment variables - Back up 21 redundant skill pairs to .bak - Fix 2 global declaration errors - 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE) - 0 compile errors across all 724 agent.py files
170 lines
5.8 KiB
Python
170 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
|
"""Agent for hunting data exfiltration indicators in network traffic."""
|
|
|
|
import argparse
|
|
import csv
|
|
import json
|
|
import math
|
|
from collections import defaultdict
|
|
from datetime import datetime, timezone
|
|
|
|
|
|
DNS_EXFIL_ENTROPY_THRESHOLD = 3.5
|
|
DNS_LABEL_LENGTH_THRESHOLD = 40
|
|
LARGE_UPLOAD_THRESHOLD_MB = 50
|
|
|
|
SUSPICIOUS_PORTS = {
|
|
20: "FTP Data", 21: "FTP", 22: "SSH/SCP", 53: "DNS",
|
|
443: "HTTPS", 993: "IMAPS", 995: "POP3S",
|
|
8443: "Alt HTTPS", 6667: "IRC",
|
|
}
|
|
|
|
|
|
def shannon_entropy(data):
|
|
"""Calculate Shannon entropy of a string."""
|
|
if not data:
|
|
return 0.0
|
|
freq = defaultdict(int)
|
|
for c in data:
|
|
freq[c] += 1
|
|
length = len(data)
|
|
return -sum((count/length) * math.log2(count/length) for count in freq.values())
|
|
|
|
|
|
def analyze_dns_queries(filepath):
|
|
"""Analyze DNS query log for exfiltration indicators."""
|
|
findings = []
|
|
domain_stats = defaultdict(lambda: {"count": 0, "total_length": 0, "queries": []})
|
|
try:
|
|
with open(filepath, "r") as f:
|
|
reader = csv.DictReader(f, delimiter="\t")
|
|
for row in reader:
|
|
query = row.get("query", "")
|
|
if not query:
|
|
continue
|
|
parts = query.split(".")
|
|
if len(parts) < 2:
|
|
continue
|
|
domain = ".".join(parts[-2:])
|
|
subdomain = ".".join(parts[:-2])
|
|
domain_stats[domain]["count"] += 1
|
|
domain_stats[domain]["total_length"] += len(subdomain)
|
|
domain_stats[domain]["queries"].append(subdomain)
|
|
except (OSError, csv.Error):
|
|
return findings
|
|
|
|
for domain, stats in domain_stats.items():
|
|
if stats["count"] < 5:
|
|
continue
|
|
avg_subdomain_len = stats["total_length"] / stats["count"]
|
|
all_subdomains = "".join(stats["queries"])
|
|
entropy = shannon_entropy(all_subdomains)
|
|
if entropy > DNS_EXFIL_ENTROPY_THRESHOLD and avg_subdomain_len > 20:
|
|
findings.append({
|
|
"type": "dns_exfiltration",
|
|
"domain": domain,
|
|
"query_count": stats["count"],
|
|
"avg_subdomain_length": round(avg_subdomain_len, 1),
|
|
"entropy": round(entropy, 3),
|
|
"severity": "CRITICAL",
|
|
})
|
|
return findings
|
|
|
|
|
|
def analyze_network_flows(filepath):
|
|
"""Analyze network flow data for large outbound transfers."""
|
|
findings = []
|
|
dest_bytes = defaultdict(int)
|
|
try:
|
|
with open(filepath, "r") as f:
|
|
reader = csv.DictReader(f, delimiter="\t")
|
|
for row in reader:
|
|
dst = row.get("id.resp_h", row.get("dst", ""))
|
|
orig_bytes = int(row.get("orig_bytes", 0) or 0)
|
|
dest_bytes[dst] += orig_bytes
|
|
except (OSError, csv.Error, ValueError):
|
|
return findings
|
|
|
|
for dst, total in dest_bytes.items():
|
|
mb = total / (1024 * 1024)
|
|
if mb >= LARGE_UPLOAD_THRESHOLD_MB:
|
|
findings.append({
|
|
"type": "large_outbound_transfer",
|
|
"destination": dst,
|
|
"total_bytes": total,
|
|
"total_mb": round(mb, 2),
|
|
"severity": "HIGH",
|
|
})
|
|
return findings
|
|
|
|
|
|
def analyze_off_hours_traffic(filepath):
|
|
"""Check for significant data transfers during off-hours."""
|
|
findings = []
|
|
off_hours_transfers = defaultdict(int)
|
|
try:
|
|
with open(filepath, "r") as f:
|
|
reader = csv.DictReader(f, delimiter="\t")
|
|
for row in reader:
|
|
ts = float(row.get("ts", 0))
|
|
hour = datetime.fromtimestamp(ts).hour
|
|
if hour < 6 or hour > 22:
|
|
dst = row.get("id.resp_h", row.get("dst", ""))
|
|
orig_bytes = int(row.get("orig_bytes", 0) or 0)
|
|
off_hours_transfers[dst] += orig_bytes
|
|
except (OSError, csv.Error, ValueError):
|
|
return findings
|
|
|
|
for dst, total in off_hours_transfers.items():
|
|
mb = total / (1024 * 1024)
|
|
if mb >= 10:
|
|
findings.append({
|
|
"type": "off_hours_transfer",
|
|
"destination": dst,
|
|
"total_mb": round(mb, 2),
|
|
"severity": "MEDIUM",
|
|
})
|
|
return findings
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Data exfiltration indicator hunter"
|
|
)
|
|
parser.add_argument("--conn-log", help="Zeek conn.log or network flow CSV")
|
|
parser.add_argument("--dns-log", help="Zeek dns.log or DNS query CSV")
|
|
parser.add_argument("--output", "-o", help="Output JSON report")
|
|
parser.add_argument("--verbose", "-v", action="store_true")
|
|
args = parser.parse_args()
|
|
|
|
if not args.conn_log and not args.dns_log:
|
|
parser.error("At least one of --conn-log or --dns-log is required")
|
|
|
|
print("[*] Data Exfiltration Indicator Hunter")
|
|
report = {"timestamp": datetime.now(timezone.utc).isoformat(), "findings": []}
|
|
|
|
if args.dns_log:
|
|
report["findings"].extend(analyze_dns_queries(args.dns_log))
|
|
if args.conn_log:
|
|
report["findings"].extend(analyze_network_flows(args.conn_log))
|
|
report["findings"].extend(analyze_off_hours_traffic(args.conn_log))
|
|
|
|
report["risk_level"] = (
|
|
"CRITICAL" if any(f["severity"] == "CRITICAL" for f in report["findings"])
|
|
else "HIGH" if any(f["severity"] == "HIGH" for f in report["findings"])
|
|
else "MEDIUM" if report["findings"] else "LOW"
|
|
)
|
|
report["total_findings"] = len(report["findings"])
|
|
|
|
print(f"[*] {report['total_findings']} exfiltration indicators found")
|
|
if args.output:
|
|
with open(args.output, "w") as f:
|
|
json.dump(report, f, indent=2)
|
|
print(f"[*] Report saved to {args.output}")
|
|
else:
|
|
print(json.dumps(report, indent=2))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|