Anthropic-Cybersecurity-Skills/skills/performing-network-traffic-analysis-with-tshark/scripts/agent.py

#!/usr/bin/env python3
"""Network traffic analysis agent using tshark and pyshark for PCAP analysis."""

import json
import math
import subprocess
import argparse
import re
from datetime import datetime
from collections import defaultdict, Counter

try:
    import pyshark
    HAS_PYSHARK = True
except ImportError:
    HAS_PYSHARK = False


def get_protocol_stats(pcap_path):
    """Extract protocol hierarchy statistics using tshark."""
    result = subprocess.run(
        ["tshark", "-r", pcap_path, "-q", "-z", "io,phs"],
        capture_output=True, text=True, timeout=120
    )
    protocols = {}
    for line in result.stdout.splitlines():
        match = re.match(r"\s+([\w.]+)\s+frames:(\d+)\s+bytes:(\d+)", line)
        if match:
            protocols[match.group(1)] = {
                "frames": int(match.group(2)), "bytes": int(match.group(3))
            }
    return protocols


def get_conversations(pcap_path):
    """Extract IP conversations using tshark."""
    result = subprocess.run(
        ["tshark", "-r", pcap_path, "-q", "-z", "conv,ip"],
        capture_output=True, text=True, timeout=120
    )
    conversations = []
    for line in result.stdout.splitlines():
        parts = line.split()
        if len(parts) >= 10 and "<->" in line:
            idx = parts.index("<->")
            conversations.append({
                "src": parts[idx - 1], "dst": parts[idx + 1],
                "frames_total": parts[idx + 2] if len(parts) > idx + 2 else "0",
            })
    return conversations


def get_top_talkers(pcap_path, top_n=20):
    """Identify top source and destination IPs by packet count."""
    result = subprocess.run(
        ["tshark", "-r", pcap_path, "-T", "fields", "-e", "ip.src", "-e", "ip.dst"],
        capture_output=True, text=True, timeout=120
    )
    src_counts = Counter()
    dst_counts = Counter()
    for line in result.stdout.splitlines():
        parts = line.split("\t")
        if len(parts) >= 2:
            src_counts[parts[0]] += 1
            dst_counts[parts[1]] += 1
    return {
        "top_sources": src_counts.most_common(top_n),
        "top_destinations": dst_counts.most_common(top_n),
    }


def extract_dns_queries(pcap_path):
    """Extract DNS queries from the capture."""
    result = subprocess.run(
        ["tshark", "-r", pcap_path, "-Y", "dns.qry.name", "-T", "fields",
         "-e", "dns.qry.name", "-e", "dns.qry.type", "-e", "ip.dst"],
        capture_output=True, text=True, timeout=120
    )
    queries = []
    for line in result.stdout.splitlines():
        parts = line.split("\t")
        if parts and parts[0]:
            queries.append({
                "query": parts[0],
                "type": parts[1] if len(parts) > 1 else "",
                "resolver": parts[2] if len(parts) > 2 else "",
            })
    return queries


def detect_dns_tunneling(dns_queries, entropy_threshold=3.5, length_threshold=40):
    """Detect DNS tunneling via high-entropy or long subdomain queries."""
    suspicious = []
    for q in dns_queries:
        domain = q["query"]
        subdomain = domain.split(".")[0] if "." in domain else domain
        if len(subdomain) < 5:
            continue
        entropy = _calculate_entropy(subdomain)
        if entropy > entropy_threshold or len(subdomain) > length_threshold:
            suspicious.append({
                "query": domain, "subdomain_length": len(subdomain),
                "entropy": round(entropy, 3),
                "severity": "high" if entropy > 4.0 else "medium",
                "indicator": "Possible DNS tunneling",
            })
    return suspicious


def _calculate_entropy(text):
    """Calculate Shannon entropy of a string."""
    if not text:
        return 0.0
    freq = Counter(text)
    length = len(text)
    return -sum((c / length) * math.log2(c / length) for c in freq.values())


def extract_http_urls(pcap_path):
    """Extract HTTP request URIs from the capture."""
    result = subprocess.run(
        ["tshark", "-r", pcap_path, "-Y", "http.request", "-T", "fields",
         "-e", "http.host", "-e", "http.request.uri", "-e", "ip.dst"],
        capture_output=True, text=True, timeout=120
    )
    urls = []
    for line in result.stdout.splitlines():
        parts = line.split("\t")
        if len(parts) >= 2 and parts[0]:
            urls.append({
                "host": parts[0],
                "uri": parts[1] if len(parts) > 1 else "/",
                "dst_ip": parts[2] if len(parts) > 2 else "",
                "full_url": f"http://{parts[0]}{parts[1] if len(parts) > 1 else '/'}",
            })
    return urls


def detect_port_scan(pcap_path, threshold=20):
    """Detect port scanning patterns (single source hitting many ports)."""
    result = subprocess.run(
        ["tshark", "-r", pcap_path, "-Y", "tcp.flags.syn==1 && tcp.flags.ack==0",
         "-T", "fields", "-e", "ip.src", "-e", "ip.dst", "-e", "tcp.dstport"],
        capture_output=True, text=True, timeout=120
    )
    src_dst_ports = defaultdict(set)
    for line in result.stdout.splitlines():
        parts = line.split("\t")
        if len(parts) >= 3:
            key = f"{parts[0]}->{parts[1]}"
            src_dst_ports[key].add(parts[2])
    scans = []
    for pair, ports in src_dst_ports.items():
        if len(ports) >= threshold:
            src, dst = pair.split("->")
            scans.append({
                "source": src, "target": dst,
                "unique_ports": len(ports), "severity": "high",
                "indicator": f"Port scan: {len(ports)} unique ports probed",
            })
    return scans


def extract_unique_ips(pcap_path):
    """Extract all unique external IPs from the capture."""
    result = subprocess.run(
        ["tshark", "-r", pcap_path, "-T", "fields", "-e", "ip.src", "-e", "ip.dst"],
        capture_output=True, text=True, timeout=120
    )
    ips = set()
    for line in result.stdout.splitlines():
        for ip in line.split("\t"):
            ip = ip.strip()
            if ip and not ip.startswith(("10.", "192.168.", "172.16.", "127.")):
                ips.add(ip)
    return sorted(ips)


def generate_report(pcap_path, protocols, top_talkers, dns_queries, dns_tunneling,
                    urls, port_scans, external_ips):
    """Generate network traffic analysis report."""
    return {
        "report_time": datetime.utcnow().isoformat(),
        "pcap_file": pcap_path,
        "protocol_statistics": protocols,
        "top_talkers": top_talkers,
        "dns_queries_total": len(dns_queries),
        "dns_tunneling_alerts": dns_tunneling,
        "http_urls_extracted": len(urls),
        "http_urls_sample": urls[:20],
        "port_scan_detections": port_scans,
        "external_ips": external_ips,
        "ioc_summary": {
            "unique_external_ips": len(external_ips),
            "unique_domains": len({q["query"] for q in dns_queries}),
            "unique_urls": len(urls),
        },
    }


def main():
    parser = argparse.ArgumentParser(description="Network Traffic Analysis Agent (tshark/pyshark)")
    parser.add_argument("--pcap", required=True, help="PCAP or PCAPNG file to analyze")
    parser.add_argument("--output", default="traffic_analysis_report.json")
    parser.add_argument("--top-n", type=int, default=20, help="Top N talkers to report")
    parser.add_argument("--scan-threshold", type=int, default=20, help="Port scan detection threshold")
    args = parser.parse_args()

    print(f"[*] Analyzing: {args.pcap}")
    protocols = get_protocol_stats(args.pcap)
    top_talkers = get_top_talkers(args.pcap, args.top_n)
    dns_queries = extract_dns_queries(args.pcap)
    dns_tunneling = detect_dns_tunneling(dns_queries)
    urls = extract_http_urls(args.pcap)
    port_scans = detect_port_scan(args.pcap, args.scan_threshold)
    external_ips = extract_unique_ips(args.pcap)

    report = generate_report(args.pcap, protocols, top_talkers, dns_queries,
                            dns_tunneling, urls, port_scans, external_ips)
    with open(args.output, "w") as f:
        json.dump(report, f, indent=2, default=str)
    print(f"[+] Protocols: {len(protocols)} | DNS queries: {len(dns_queries)} | URLs: {len(urls)}")
    print(f"[+] Port scans: {len(port_scans)} | DNS tunneling alerts: {len(dns_tunneling)}")
    print(f"[+] Report saved to {args.output}")


if __name__ == "__main__":
    main()