Files
Anthropic-Cybersecurity-Skills/skills/performing-firmware-malware-analysis/scripts/agent.py
T
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

216 lines
8.3 KiB
Python

#!/usr/bin/env python3
"""Agent for performing firmware malware analysis.
Automates firmware extraction with binwalk, filesystem analysis,
string extraction, entropy analysis, and IOC identification.
"""
import subprocess
import os
import sys
import hashlib
import json
import re
from pathlib import Path
class FirmwareAnalysisAgent:
"""Automates firmware image analysis and malware detection."""
def __init__(self, firmware_path, output_dir):
self.firmware_path = Path(firmware_path)
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
def compute_hash(self):
"""Compute SHA-256 hash of the firmware image."""
sha256 = hashlib.sha256(self.firmware_path.read_bytes()).hexdigest()
return {"sha256": sha256, "size": self.firmware_path.stat().st_size}
def run_binwalk_scan(self):
"""Scan firmware with binwalk to identify embedded components."""
result = subprocess.run(
["binwalk", str(self.firmware_path)],
capture_output=True, text=True,
timeout=120,
)
return {"output": result.stdout, "returncode": result.returncode}
def run_binwalk_extract(self):
"""Extract firmware components with binwalk recursive extraction."""
extract_dir = self.output_dir / "extracted"
result = subprocess.run(
["binwalk", "-eM", "-C", str(extract_dir), str(self.firmware_path)],
capture_output=True, text=True,
timeout=120,
)
return {"extract_dir": str(extract_dir), "returncode": result.returncode}
def run_entropy_analysis(self):
"""Run binwalk entropy analysis to detect encrypted/compressed regions."""
result = subprocess.run(
["binwalk", "-E", str(self.firmware_path)],
capture_output=True, text=True,
timeout=120,
)
return {"output": result.stdout}
def find_filesystem_root(self):
"""Locate extracted filesystem root directory."""
extract_dir = self.output_dir / "extracted"
for root, dirs, files in os.walk(str(extract_dir)):
if "squashfs-root" in dirs:
return Path(root) / "squashfs-root"
if "etc" in dirs and "bin" in dirs:
return Path(root)
return None
def search_hardcoded_credentials(self, fs_root):
"""Search filesystem for hardcoded credentials."""
findings = []
patterns = [
(r"password|passwd|secret|api_key|token", "credential_keyword"),
(r"root:\$[0-9a-zA-Z\$\.\/]+", "password_hash"),
(r"ssh-rsa\s+[A-Za-z0-9+/=]+", "ssh_key"),
]
search_dirs = ["etc", "usr/bin", "usr/sbin", "home", "root", "var"]
for search_dir in search_dirs:
target = fs_root / search_dir
if not target.exists():
continue
for filepath in target.rglob("*"):
if not filepath.is_file():
continue
try:
content = filepath.read_text(errors="ignore")
for pattern, finding_type in patterns:
matches = re.findall(pattern, content, re.IGNORECASE)
if matches:
findings.append({
"file": str(filepath.relative_to(fs_root)),
"type": finding_type,
"match_count": len(matches),
"samples": matches[:3],
})
except (PermissionError, OSError):
continue
return findings
def check_startup_scripts(self, fs_root):
"""Analyze startup scripts for backdoor entries."""
scripts = []
startup_paths = [
"etc/init.d", "etc/rc.d", "etc/inittab", "etc/rcS",
"etc/crontab", "etc/cron.d",
]
suspicious_patterns = [
r"nc\s+-[el]", r"netcat", r"bash\s+-i", r"wget\s+http",
r"curl\s+http.*\|.*sh", r"telnetd", r"/dev/tcp/",
]
for spath in startup_paths:
target = fs_root / spath
if target.is_file():
scripts.append(self._analyze_script(target, fs_root, suspicious_patterns))
elif target.is_dir():
for f in target.iterdir():
if f.is_file():
scripts.append(self._analyze_script(f, fs_root, suspicious_patterns))
return [s for s in scripts if s]
def _analyze_script(self, filepath, fs_root, patterns):
"""Analyze a single script for suspicious content."""
try:
content = filepath.read_text(errors="ignore")
except (PermissionError, OSError):
return None
suspicious = []
for pattern in patterns:
if re.search(pattern, content, re.IGNORECASE):
suspicious.append(pattern)
return {
"file": str(filepath.relative_to(fs_root)),
"size": filepath.stat().st_size,
"suspicious_patterns": suspicious,
"is_suspicious": len(suspicious) > 0,
}
def find_elf_binaries(self, fs_root):
"""Identify all ELF binaries in the extracted filesystem."""
binaries = []
for filepath in fs_root.rglob("*"):
if not filepath.is_file():
continue
try:
result = subprocess.run(
["file", "--brief", str(filepath)],
capture_output=True, text=True,
timeout=120,
)
if "ELF" in result.stdout:
binaries.append({
"file": str(filepath.relative_to(fs_root)),
"type": result.stdout.strip(),
"size": filepath.stat().st_size,
"sha256": hashlib.sha256(filepath.read_bytes()).hexdigest(),
})
except (PermissionError, OSError):
continue
return binaries
def extract_strings_iocs(self, fs_root):
"""Extract IOCs (IPs, URLs, domains) from firmware binaries."""
iocs = {"ips": set(), "urls": set(), "domains": set()}
ip_pattern = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")
url_pattern = re.compile(r"https?://[^\s\"'<>]+")
domain_pattern = re.compile(r"\b[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.[a-z]{2,}\b")
for filepath in fs_root.rglob("*"):
if not filepath.is_file() or filepath.stat().st_size > 10_000_000:
continue
try:
content = filepath.read_bytes().decode("utf-8", errors="ignore")
iocs["ips"].update(ip_pattern.findall(content))
iocs["urls"].update(url_pattern.findall(content))
iocs["domains"].update(domain_pattern.findall(content))
except (PermissionError, OSError):
continue
private_ips = {"127.0.0.1", "0.0.0.0", "255.255.255.255"}
iocs["ips"] -= private_ips
return {k: sorted(v) for k, v in iocs.items()}
def generate_report(self):
"""Run full firmware analysis and generate a report."""
report = {"firmware": str(self.firmware_path), "hash": self.compute_hash()}
report["binwalk_scan"] = self.run_binwalk_scan()
self.run_binwalk_extract()
fs_root = self.find_filesystem_root()
if fs_root:
report["credentials"] = self.search_hardcoded_credentials(fs_root)
report["startup_scripts"] = self.check_startup_scripts(fs_root)
report["elf_binaries"] = self.find_elf_binaries(fs_root)[:50]
report["iocs"] = self.extract_strings_iocs(fs_root)
else:
report["filesystem"] = "No filesystem root found"
report_path = self.output_dir / "firmware_analysis_report.json"
with open(report_path, "w") as f:
json.dump(report, f, indent=2, default=list)
print(json.dumps(report, indent=2, default=list))
return report
def main():
if len(sys.argv) < 3:
print("Usage: agent.py <firmware_image> <output_dir>")
sys.exit(1)
agent = FirmwareAnalysisAgent(sys.argv[1], sys.argv[2])
agent.generate_report()
if __name__ == "__main__":
main()