mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-06-13 22:54:53 +03:00
229 lines
8.2 KiB
Python
229 lines
8.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Network traffic analysis agent using tshark and pyshark for PCAP analysis."""
|
|
|
|
import json
|
|
import math
|
|
import subprocess
|
|
import argparse
|
|
import re
|
|
from datetime import datetime
|
|
from collections import defaultdict, Counter
|
|
|
|
try:
|
|
import pyshark
|
|
HAS_PYSHARK = True
|
|
except ImportError:
|
|
HAS_PYSHARK = False
|
|
|
|
|
|
def get_protocol_stats(pcap_path):
|
|
"""Extract protocol hierarchy statistics using tshark."""
|
|
result = subprocess.run(
|
|
["tshark", "-r", pcap_path, "-q", "-z", "io,phs"],
|
|
capture_output=True, text=True, timeout=120
|
|
)
|
|
protocols = {}
|
|
for line in result.stdout.splitlines():
|
|
match = re.match(r"\s+([\w.]+)\s+frames:(\d+)\s+bytes:(\d+)", line)
|
|
if match:
|
|
protocols[match.group(1)] = {
|
|
"frames": int(match.group(2)), "bytes": int(match.group(3))
|
|
}
|
|
return protocols
|
|
|
|
|
|
def get_conversations(pcap_path):
|
|
"""Extract IP conversations using tshark."""
|
|
result = subprocess.run(
|
|
["tshark", "-r", pcap_path, "-q", "-z", "conv,ip"],
|
|
capture_output=True, text=True, timeout=120
|
|
)
|
|
conversations = []
|
|
for line in result.stdout.splitlines():
|
|
parts = line.split()
|
|
if len(parts) >= 10 and "<->" in line:
|
|
idx = parts.index("<->")
|
|
conversations.append({
|
|
"src": parts[idx - 1], "dst": parts[idx + 1],
|
|
"frames_total": parts[idx + 2] if len(parts) > idx + 2 else "0",
|
|
})
|
|
return conversations
|
|
|
|
|
|
def get_top_talkers(pcap_path, top_n=20):
|
|
"""Identify top source and destination IPs by packet count."""
|
|
result = subprocess.run(
|
|
["tshark", "-r", pcap_path, "-T", "fields", "-e", "ip.src", "-e", "ip.dst"],
|
|
capture_output=True, text=True, timeout=120
|
|
)
|
|
src_counts = Counter()
|
|
dst_counts = Counter()
|
|
for line in result.stdout.splitlines():
|
|
parts = line.split("\t")
|
|
if len(parts) >= 2:
|
|
src_counts[parts[0]] += 1
|
|
dst_counts[parts[1]] += 1
|
|
return {
|
|
"top_sources": src_counts.most_common(top_n),
|
|
"top_destinations": dst_counts.most_common(top_n),
|
|
}
|
|
|
|
|
|
def extract_dns_queries(pcap_path):
|
|
"""Extract DNS queries from the capture."""
|
|
result = subprocess.run(
|
|
["tshark", "-r", pcap_path, "-Y", "dns.qry.name", "-T", "fields",
|
|
"-e", "dns.qry.name", "-e", "dns.qry.type", "-e", "ip.dst"],
|
|
capture_output=True, text=True, timeout=120
|
|
)
|
|
queries = []
|
|
for line in result.stdout.splitlines():
|
|
parts = line.split("\t")
|
|
if parts and parts[0]:
|
|
queries.append({
|
|
"query": parts[0],
|
|
"type": parts[1] if len(parts) > 1 else "",
|
|
"resolver": parts[2] if len(parts) > 2 else "",
|
|
})
|
|
return queries
|
|
|
|
|
|
def detect_dns_tunneling(dns_queries, entropy_threshold=3.5, length_threshold=40):
|
|
"""Detect DNS tunneling via high-entropy or long subdomain queries."""
|
|
suspicious = []
|
|
for q in dns_queries:
|
|
domain = q["query"]
|
|
subdomain = domain.split(".")[0] if "." in domain else domain
|
|
if len(subdomain) < 5:
|
|
continue
|
|
entropy = _calculate_entropy(subdomain)
|
|
if entropy > entropy_threshold or len(subdomain) > length_threshold:
|
|
suspicious.append({
|
|
"query": domain, "subdomain_length": len(subdomain),
|
|
"entropy": round(entropy, 3),
|
|
"severity": "high" if entropy > 4.0 else "medium",
|
|
"indicator": "Possible DNS tunneling",
|
|
})
|
|
return suspicious
|
|
|
|
|
|
def _calculate_entropy(text):
|
|
"""Calculate Shannon entropy of a string."""
|
|
if not text:
|
|
return 0.0
|
|
freq = Counter(text)
|
|
length = len(text)
|
|
return -sum((c / length) * math.log2(c / length) for c in freq.values())
|
|
|
|
|
|
def extract_http_urls(pcap_path):
|
|
"""Extract HTTP request URIs from the capture."""
|
|
result = subprocess.run(
|
|
["tshark", "-r", pcap_path, "-Y", "http.request", "-T", "fields",
|
|
"-e", "http.host", "-e", "http.request.uri", "-e", "ip.dst"],
|
|
capture_output=True, text=True, timeout=120
|
|
)
|
|
urls = []
|
|
for line in result.stdout.splitlines():
|
|
parts = line.split("\t")
|
|
if len(parts) >= 2 and parts[0]:
|
|
urls.append({
|
|
"host": parts[0],
|
|
"uri": parts[1] if len(parts) > 1 else "/",
|
|
"dst_ip": parts[2] if len(parts) > 2 else "",
|
|
"full_url": f"http://{parts[0]}{parts[1] if len(parts) > 1 else '/'}",
|
|
})
|
|
return urls
|
|
|
|
|
|
def detect_port_scan(pcap_path, threshold=20):
|
|
"""Detect port scanning patterns (single source hitting many ports)."""
|
|
result = subprocess.run(
|
|
["tshark", "-r", pcap_path, "-Y", "tcp.flags.syn==1 && tcp.flags.ack==0",
|
|
"-T", "fields", "-e", "ip.src", "-e", "ip.dst", "-e", "tcp.dstport"],
|
|
capture_output=True, text=True, timeout=120
|
|
)
|
|
src_dst_ports = defaultdict(set)
|
|
for line in result.stdout.splitlines():
|
|
parts = line.split("\t")
|
|
if len(parts) >= 3:
|
|
key = f"{parts[0]}->{parts[1]}"
|
|
src_dst_ports[key].add(parts[2])
|
|
scans = []
|
|
for pair, ports in src_dst_ports.items():
|
|
if len(ports) >= threshold:
|
|
src, dst = pair.split("->")
|
|
scans.append({
|
|
"source": src, "target": dst,
|
|
"unique_ports": len(ports), "severity": "high",
|
|
"indicator": f"Port scan: {len(ports)} unique ports probed",
|
|
})
|
|
return scans
|
|
|
|
|
|
def extract_unique_ips(pcap_path):
|
|
"""Extract all unique external IPs from the capture."""
|
|
result = subprocess.run(
|
|
["tshark", "-r", pcap_path, "-T", "fields", "-e", "ip.src", "-e", "ip.dst"],
|
|
capture_output=True, text=True, timeout=120
|
|
)
|
|
ips = set()
|
|
for line in result.stdout.splitlines():
|
|
for ip in line.split("\t"):
|
|
ip = ip.strip()
|
|
if ip and not ip.startswith(("10.", "192.168.", "172.16.", "127.")):
|
|
ips.add(ip)
|
|
return sorted(ips)
|
|
|
|
|
|
def generate_report(pcap_path, protocols, top_talkers, dns_queries, dns_tunneling,
|
|
urls, port_scans, external_ips):
|
|
"""Generate network traffic analysis report."""
|
|
return {
|
|
"report_time": datetime.utcnow().isoformat(),
|
|
"pcap_file": pcap_path,
|
|
"protocol_statistics": protocols,
|
|
"top_talkers": top_talkers,
|
|
"dns_queries_total": len(dns_queries),
|
|
"dns_tunneling_alerts": dns_tunneling,
|
|
"http_urls_extracted": len(urls),
|
|
"http_urls_sample": urls[:20],
|
|
"port_scan_detections": port_scans,
|
|
"external_ips": external_ips,
|
|
"ioc_summary": {
|
|
"unique_external_ips": len(external_ips),
|
|
"unique_domains": len({q["query"] for q in dns_queries}),
|
|
"unique_urls": len(urls),
|
|
},
|
|
}
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Network Traffic Analysis Agent (tshark/pyshark)")
|
|
parser.add_argument("--pcap", required=True, help="PCAP or PCAPNG file to analyze")
|
|
parser.add_argument("--output", default="traffic_analysis_report.json")
|
|
parser.add_argument("--top-n", type=int, default=20, help="Top N talkers to report")
|
|
parser.add_argument("--scan-threshold", type=int, default=20, help="Port scan detection threshold")
|
|
args = parser.parse_args()
|
|
|
|
print(f"[*] Analyzing: {args.pcap}")
|
|
protocols = get_protocol_stats(args.pcap)
|
|
top_talkers = get_top_talkers(args.pcap, args.top_n)
|
|
dns_queries = extract_dns_queries(args.pcap)
|
|
dns_tunneling = detect_dns_tunneling(dns_queries)
|
|
urls = extract_http_urls(args.pcap)
|
|
port_scans = detect_port_scan(args.pcap, args.scan_threshold)
|
|
external_ips = extract_unique_ips(args.pcap)
|
|
|
|
report = generate_report(args.pcap, protocols, top_talkers, dns_queries,
|
|
dns_tunneling, urls, port_scans, external_ips)
|
|
with open(args.output, "w") as f:
|
|
json.dump(report, f, indent=2, default=str)
|
|
print(f"[+] Protocols: {len(protocols)} | DNS queries: {len(dns_queries)} | URLs: {len(urls)}")
|
|
print(f"[+] Port scans: {len(port_scans)} | DNS tunneling alerts: {len(dns_tunneling)}")
|
|
print(f"[+] Report saved to {args.output}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|