Anthropic-Cybersecurity-Skills/skills/performing-firmware-malware-analysis/scripts/agent.py

#!/usr/bin/env python3
"""Agent for performing firmware malware analysis.

Automates firmware extraction with binwalk, filesystem analysis,
string extraction, entropy analysis, and IOC identification.
"""

import subprocess
import os
import sys
import hashlib
import json
import re
from pathlib import Path


class FirmwareAnalysisAgent:
    """Automates firmware image analysis and malware detection."""

    def __init__(self, firmware_path, output_dir):
        self.firmware_path = Path(firmware_path)
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)

    def compute_hash(self):
        """Compute SHA-256 hash of the firmware image."""
        sha256 = hashlib.sha256(self.firmware_path.read_bytes()).hexdigest()
        return {"sha256": sha256, "size": self.firmware_path.stat().st_size}

    def run_binwalk_scan(self):
        """Scan firmware with binwalk to identify embedded components."""
        result = subprocess.run(
            ["binwalk", str(self.firmware_path)],
            capture_output=True, text=True
        )
        return {"output": result.stdout, "returncode": result.returncode}

    def run_binwalk_extract(self):
        """Extract firmware components with binwalk recursive extraction."""
        extract_dir = self.output_dir / "extracted"
        result = subprocess.run(
            ["binwalk", "-eM", "-C", str(extract_dir), str(self.firmware_path)],
            capture_output=True, text=True
        )
        return {"extract_dir": str(extract_dir), "returncode": result.returncode}

    def run_entropy_analysis(self):
        """Run binwalk entropy analysis to detect encrypted/compressed regions."""
        result = subprocess.run(
            ["binwalk", "-E", str(self.firmware_path)],
            capture_output=True, text=True
        )
        return {"output": result.stdout}

    def find_filesystem_root(self):
        """Locate extracted filesystem root directory."""
        extract_dir = self.output_dir / "extracted"
        for root, dirs, files in os.walk(str(extract_dir)):
            if "squashfs-root" in dirs:
                return Path(root) / "squashfs-root"
            if "etc" in dirs and "bin" in dirs:
                return Path(root)
        return None

    def search_hardcoded_credentials(self, fs_root):
        """Search filesystem for hardcoded credentials."""
        findings = []
        patterns = [
            (r"password|passwd|secret|api_key|token", "credential_keyword"),
            (r"root:\$[0-9a-zA-Z\$\.\/]+", "password_hash"),
            (r"ssh-rsa\s+[A-Za-z0-9+/=]+", "ssh_key"),
        ]

        search_dirs = ["etc", "usr/bin", "usr/sbin", "home", "root", "var"]
        for search_dir in search_dirs:
            target = fs_root / search_dir
            if not target.exists():
                continue
            for filepath in target.rglob("*"):
                if not filepath.is_file():
                    continue
                try:
                    content = filepath.read_text(errors="ignore")
                    for pattern, finding_type in patterns:
                        matches = re.findall(pattern, content, re.IGNORECASE)
                        if matches:
                            findings.append({
                                "file": str(filepath.relative_to(fs_root)),
                                "type": finding_type,
                                "match_count": len(matches),
                                "samples": matches[:3],
                            })
                except (PermissionError, OSError):
                    continue
        return findings

    def check_startup_scripts(self, fs_root):
        """Analyze startup scripts for backdoor entries."""
        scripts = []
        startup_paths = [
            "etc/init.d", "etc/rc.d", "etc/inittab", "etc/rcS",
            "etc/crontab", "etc/cron.d",
        ]
        suspicious_patterns = [
            r"nc\s+-[el]", r"netcat", r"bash\s+-i", r"wget\s+http",
            r"curl\s+http.*\|.*sh", r"telnetd", r"/dev/tcp/",
        ]

        for spath in startup_paths:
            target = fs_root / spath
            if target.is_file():
                scripts.append(self._analyze_script(target, fs_root, suspicious_patterns))
            elif target.is_dir():
                for f in target.iterdir():
                    if f.is_file():
                        scripts.append(self._analyze_script(f, fs_root, suspicious_patterns))
        return [s for s in scripts if s]

    def _analyze_script(self, filepath, fs_root, patterns):
        """Analyze a single script for suspicious content."""
        try:
            content = filepath.read_text(errors="ignore")
        except (PermissionError, OSError):
            return None
        suspicious = []
        for pattern in patterns:
            if re.search(pattern, content, re.IGNORECASE):
                suspicious.append(pattern)
        return {
            "file": str(filepath.relative_to(fs_root)),
            "size": filepath.stat().st_size,
            "suspicious_patterns": suspicious,
            "is_suspicious": len(suspicious) > 0,
        }

    def find_elf_binaries(self, fs_root):
        """Identify all ELF binaries in the extracted filesystem."""
        binaries = []
        for filepath in fs_root.rglob("*"):
            if not filepath.is_file():
                continue
            try:
                result = subprocess.run(
                    ["file", "--brief", str(filepath)],
                    capture_output=True, text=True
                )
                if "ELF" in result.stdout:
                    binaries.append({
                        "file": str(filepath.relative_to(fs_root)),
                        "type": result.stdout.strip(),
                        "size": filepath.stat().st_size,
                        "sha256": hashlib.sha256(filepath.read_bytes()).hexdigest(),
                    })
            except (PermissionError, OSError):
                continue
        return binaries

    def extract_strings_iocs(self, fs_root):
        """Extract IOCs (IPs, URLs, domains) from firmware binaries."""
        iocs = {"ips": set(), "urls": set(), "domains": set()}
        ip_pattern = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")
        url_pattern = re.compile(r"https?://[^\s\"'<>]+")
        domain_pattern = re.compile(r"\b[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.[a-z]{2,}\b")

        for filepath in fs_root.rglob("*"):
            if not filepath.is_file() or filepath.stat().st_size > 10_000_000:
                continue
            try:
                content = filepath.read_bytes().decode("utf-8", errors="ignore")
                iocs["ips"].update(ip_pattern.findall(content))
                iocs["urls"].update(url_pattern.findall(content))
                iocs["domains"].update(domain_pattern.findall(content))
            except (PermissionError, OSError):
                continue

        private_ips = {"127.0.0.1", "0.0.0.0", "255.255.255.255"}
        iocs["ips"] -= private_ips
        return {k: sorted(v) for k, v in iocs.items()}

    def generate_report(self):
        """Run full firmware analysis and generate a report."""
        report = {"firmware": str(self.firmware_path), "hash": self.compute_hash()}
        report["binwalk_scan"] = self.run_binwalk_scan()
        self.run_binwalk_extract()

        fs_root = self.find_filesystem_root()
        if fs_root:
            report["credentials"] = self.search_hardcoded_credentials(fs_root)
            report["startup_scripts"] = self.check_startup_scripts(fs_root)
            report["elf_binaries"] = self.find_elf_binaries(fs_root)[:50]
            report["iocs"] = self.extract_strings_iocs(fs_root)
        else:
            report["filesystem"] = "No filesystem root found"

        report_path = self.output_dir / "firmware_analysis_report.json"
        with open(report_path, "w") as f:
            json.dump(report, f, indent=2, default=list)
        print(json.dumps(report, indent=2, default=list))
        return report


def main():
    if len(sys.argv) < 3:
        print("Usage: agent.py <firmware_image> <output_dir>")
        sys.exit(1)
    agent = FirmwareAnalysisAgent(sys.argv[1], sys.argv[2])
    agent.generate_report()


if __name__ == "__main__":
    main()