Anthropic-Cybersecurity-Skills/skills/analyzing-malware-behavior-with-cuckoo-sandbox/scripts/agent.py

#!/usr/bin/env python3
"""Cuckoo Sandbox behavioral analysis agent for automated malware detonation and reporting."""

import json
import os
import sys
import hashlib

try:
    import requests
    HAS_REQUESTS = True
except ImportError:
    HAS_REQUESTS = False


CUCKOO_API = os.environ.get("CUCKOO_API", "http://localhost:8090")
CUCKOO_STORAGE = os.environ.get("CUCKOO_STORAGE", "/opt/cuckoo/storage/analyses")


def submit_file(filepath, timeout=300, machine=None, package=None):
    """Submit a malware sample to Cuckoo via REST API."""
    if not HAS_REQUESTS:
        return None
    url = f"{CUCKOO_API}/tasks/create/file"
    files = {"file": (os.path.basename(filepath), open(filepath, "rb"))}
    data = {"timeout": timeout}
    if machine:
        data["machine"] = machine
    if package:
        data["package"] = package
    resp = requests.post(url, files=files, data=data, timeout=30)
    if resp.status_code == 200:
        return resp.json().get("task_id")
    return None


def submit_url(url_to_analyze, timeout=300):
    """Submit a URL to Cuckoo for analysis."""
    if not HAS_REQUESTS:
        return None
    url = f"{CUCKOO_API}/tasks/create/url"
    data = {"url": url_to_analyze, "timeout": timeout}
    resp = requests.post(url, data=data, timeout=30)
    if resp.status_code == 200:
        return resp.json().get("task_id")
    return None


def get_task_status(task_id):
    """Check the status of a Cuckoo analysis task."""
    if not HAS_REQUESTS:
        return None
    url = f"{CUCKOO_API}/tasks/view/{task_id}"
    resp = requests.get(url, timeout=30)
    if resp.status_code == 200:
        return resp.json().get("task", {}).get("status")
    return None


def load_report(task_id, report_dir=None):
    """Load a Cuckoo JSON report from disk."""
    if report_dir is None:
        report_dir = CUCKOO_STORAGE
    report_path = os.path.join(report_dir, str(task_id), "reports", "report.json")
    if os.path.exists(report_path):
        with open(report_path, "r") as f:
            return json.load(f)
    return None


def analyze_processes(report):
    """Extract and analyze the process tree from the Cuckoo report."""
    processes = []
    for proc in report.get("behavior", {}).get("processes", []):
        pid = proc.get("pid")
        ppid = proc.get("ppid")
        name = proc.get("process_name")
        suspicious_apis = []
        dangerous_apis = [
            "CreateRemoteThread", "VirtualAllocEx", "WriteProcessMemory",
            "NtCreateThreadEx", "RegSetValueExA", "URLDownloadToFileA",
            "ShellExecuteA", "ShellExecuteW", "WinExec", "CreateProcessA",
            "NtWriteVirtualMemory", "QueueUserAPC",
        ]
        for call in proc.get("calls", []):
            if call.get("api") in dangerous_apis:
                args = {arg["name"]: arg["value"] for arg in call.get("arguments", [])}
                suspicious_apis.append({"api": call["api"], "args": args})
        processes.append({
            "pid": pid,
            "ppid": ppid,
            "name": name,
            "suspicious_api_calls": len(suspicious_apis),
            "top_suspicious": suspicious_apis[:10],
        })
    return processes


def analyze_network(report):
    """Extract network activity from the Cuckoo report."""
    network = report.get("network", {})
    return {
        "dns": [
            {"request": d.get("request"), "answers": d.get("answers", [])}
            for d in network.get("dns", [])
        ],
        "http": [
            {"method": h.get("method"), "host": h.get("host"),
             "uri": h.get("uri"), "body_size": len(h.get("body", ""))}
            for h in network.get("http", [])
        ],
        "tcp_connections": [
            {"src": t.get("src"), "sport": t.get("sport"),
             "dst": t.get("dst"), "dport": t.get("dport")}
            for t in network.get("tcp", [])
        ],
        "udp_connections": [
            {"src": u.get("src"), "sport": u.get("sport"),
             "dst": u.get("dst"), "dport": u.get("dport")}
            for u in network.get("udp", [])
        ],
    }


def analyze_dropped_files(report):
    """Extract dropped file information from the report."""
    dropped = []
    for d in report.get("dropped", []):
        dropped.append({
            "filepath": d.get("filepath", ""),
            "sha256": d.get("sha256", ""),
            "size": d.get("size", 0),
            "type": d.get("type", ""),
        })
    return dropped


def analyze_signatures(report):
    """Extract triggered behavioral signatures."""
    signatures = []
    for sig in report.get("signatures", []):
        marks = []
        for mark in sig.get("marks", []):
            if mark.get("ioc"):
                marks.append(mark["ioc"])
            elif mark.get("call"):
                marks.append(mark["call"].get("api", ""))
        signatures.append({
            "name": sig.get("name"),
            "severity": sig.get("severity"),
            "description": sig.get("description"),
            "marks": marks[:5],
        })
    return sorted(signatures, key=lambda x: x.get("severity", 0), reverse=True)


def analyze_registry(report):
    """Extract registry modifications from behavior summary."""
    summary = report.get("behavior", {}).get("summary", {})
    return {
        "keys_modified": summary.get("keys", [])[:20],
        "files_created": summary.get("files", [])[:20],
        "mutexes": summary.get("mutexes", [])[:10],
    }


def generate_summary(report, processes, network, dropped, signatures, registry):
    """Generate a consolidated analysis summary."""
    info = report.get("info", {})
    score = info.get("score", 0)
    return {
        "task_id": info.get("id"),
        "sample": info.get("category", "file"),
        "analysis_time": info.get("duration", 0),
        "machine": info.get("machine", {}).get("name", ""),
        "threat_score": score,
        "process_count": len(processes),
        "suspicious_api_total": sum(p["suspicious_api_calls"] for p in processes),
        "dns_queries": len(network["dns"]),
        "http_requests": len(network["http"]),
        "tcp_connections": len(network["tcp_connections"]),
        "dropped_files": len(dropped),
        "signatures_triggered": len(signatures),
        "high_severity_sigs": len([s for s in signatures if s["severity"] >= 3]),
        "registry_keys_modified": len(registry["keys_modified"]),
        "files_created": len(registry["files_created"]),
    }


if __name__ == "__main__":
    print("=" * 60)
    print("Cuckoo Sandbox Behavioral Analysis Agent")
    print("Automated malware detonation and report parsing")
    print("=" * 60)

    if len(sys.argv) > 1:
        arg = sys.argv[1]

        # Check if argument is a report JSON path
        if arg.endswith(".json") and os.path.exists(arg):
            print(f"\n[*] Loading report: {arg}")
            with open(arg, "r") as f:
                report = json.load(f)
        elif arg.isdigit():
            print(f"\n[*] Loading report for task ID: {arg}")
            report = load_report(int(arg))
        elif os.path.exists(arg):
            print(f"\n[*] Submitting sample: {arg}")
            sha256 = hashlib.sha256(open(arg, "rb").read()).hexdigest()
            print(f"[*] SHA-256: {sha256}")
            task_id = submit_file(arg)
            if task_id:
                print(f"[*] Task submitted: ID={task_id}")
                print(f"[*] Monitor at: {CUCKOO_API.replace('8090', '8080')}/analysis/{task_id}/")
            else:
                print("[ERROR] Failed to submit. Check Cuckoo API connection.")
            sys.exit(0)
        else:
            report = None

        if report:
            processes = analyze_processes(report)
            network = analyze_network(report)
            dropped = analyze_dropped_files(report)
            signatures = analyze_signatures(report)
            registry = analyze_registry(report)
            summary = generate_summary(report, processes, network, dropped, signatures, registry)

            print(f"\n--- Analysis Summary ---")
            print(f"  Score: {summary['threat_score']}/10")
            print(f"  Processes: {summary['process_count']}")
            print(f"  Suspicious APIs: {summary['suspicious_api_total']}")
            print(f"  Signatures: {summary['signatures_triggered']} "
                  f"({summary['high_severity_sigs']} high severity)")

            print(f"\n--- Network ---")
            print(f"  DNS: {summary['dns_queries']}, HTTP: {summary['http_requests']}, "
                  f"TCP: {summary['tcp_connections']}")
            for http in network["http"][:5]:
                print(f"    {http['method']} {http['host']}{http['uri']}")

            print(f"\n--- Dropped Files ---")
            for d in dropped[:5]:
                print(f"    {d['filepath']} ({d['size']} bytes)")

            print(f"\n--- Top Signatures ---")
            for s in signatures[:5]:
                print(f"  [{s['severity']}/5] {s['name']}: {s['description']}")
    else:
        print(f"\n[DEMO] Usage:")
        print(f"  python agent.py <sample.exe>      # Submit to Cuckoo")
        print(f"  python agent.py <task_id>          # Parse existing report")
        print(f"  python agent.py <report.json>      # Parse JSON report file")