mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-06-12 22:24:56 +03:00
449 lines
16 KiB
Python
449 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
"""Firmware extraction and analysis agent using binwalk for signature scanning,
|
|
entropy analysis, filesystem extraction, and string-based credential discovery."""
|
|
|
|
import argparse
|
|
import struct
|
|
import hashlib
|
|
import math
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
import re
|
|
import json
|
|
from collections import Counter
|
|
from pathlib import Path
|
|
|
|
DISCLAIMER = """
|
|
==========================================================================
|
|
AUTHORIZED USE ONLY -- This tool is intended for authorized security
|
|
testing, firmware research, and educational purposes. Ensure you have
|
|
explicit written permission before analyzing any firmware image you do
|
|
not own. Unauthorized access to or reverse engineering of proprietary
|
|
firmware may violate applicable laws and vendor agreements.
|
|
==========================================================================
|
|
"""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Entropy Analysis
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def calculate_entropy(data):
|
|
"""Calculate Shannon entropy of a byte sequence (0.0 = uniform, 8.0 = max random)."""
|
|
if not data:
|
|
return 0.0
|
|
counter = Counter(data)
|
|
length = len(data)
|
|
entropy = -sum(
|
|
(count / length) * math.log2(count / length)
|
|
for count in counter.values()
|
|
)
|
|
return round(entropy, 4)
|
|
|
|
|
|
def entropy_map(file_path, block_size=4096):
|
|
"""Generate a block-by-block entropy map of a firmware image."""
|
|
results = []
|
|
with open(file_path, "rb") as f:
|
|
offset = 0
|
|
while True:
|
|
block = f.read(block_size)
|
|
if not block:
|
|
break
|
|
ent = calculate_entropy(block)
|
|
results.append({
|
|
"offset": offset,
|
|
"offset_hex": f"0x{offset:08X}",
|
|
"entropy": ent,
|
|
"classification": classify_entropy(ent),
|
|
})
|
|
offset += len(block)
|
|
return results
|
|
|
|
|
|
def classify_entropy(value):
|
|
"""Classify an entropy value into a human-readable category."""
|
|
if value < 1.0:
|
|
return "empty/padding"
|
|
elif value < 5.0:
|
|
return "plaintext/code"
|
|
elif value < 7.0:
|
|
return "compressed"
|
|
elif value < 7.9:
|
|
return "highly-compressed"
|
|
else:
|
|
return "encrypted/random"
|
|
|
|
|
|
def detect_entropy_regions(entropy_data, threshold_high=7.0, threshold_low=1.0):
|
|
"""Identify contiguous regions of high or low entropy in a firmware image."""
|
|
regions = []
|
|
current_region = None
|
|
for entry in entropy_data:
|
|
classification = entry["classification"]
|
|
if current_region and current_region["classification"] == classification:
|
|
current_region["end_offset"] = entry["offset"]
|
|
current_region["block_count"] += 1
|
|
else:
|
|
if current_region:
|
|
regions.append(current_region)
|
|
current_region = {
|
|
"start_offset": entry["offset"],
|
|
"start_hex": entry["offset_hex"],
|
|
"end_offset": entry["offset"],
|
|
"classification": classification,
|
|
"block_count": 1,
|
|
}
|
|
if current_region:
|
|
regions.append(current_region)
|
|
return regions
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Firmware Header Parsing
|
|
# ---------------------------------------------------------------------------
|
|
|
|
MAGIC_SIGNATURES = {
|
|
b"\x27\x05\x19\x56": "U-Boot image header (uImage)",
|
|
b"\x68\x73\x71\x73": "SquashFS filesystem (little-endian)",
|
|
b"\x73\x71\x73\x68": "SquashFS filesystem (big-endian)",
|
|
b"\x45\x3D\xCD\x28": "CramFS filesystem",
|
|
b"\x85\x19\x01\x20": "JFFS2 filesystem (little-endian)",
|
|
b"\x19\x85\x20\x01": "JFFS2 filesystem (big-endian)",
|
|
b"\x1F\x8B\x08": "gzip compressed data",
|
|
b"\x5D\x00\x00": "LZMA compressed data",
|
|
b"\xFD\x37\x7A\x58\x5A\x00": "XZ compressed data",
|
|
b"\x30\x37\x30\x37\x30\x31": "CPIO archive",
|
|
b"\x55\xAA": "x86 boot sector",
|
|
b"\xD0\x0D\xFE\xED": "Device Tree Blob (DTB)",
|
|
b"\x4D\x5A": "PE/COFF executable (EFI binary)",
|
|
b"\x7F\x45\x4C\x46": "ELF executable",
|
|
b"\x89\x50\x4E\x47": "PNG image",
|
|
b"\xFF\xD8\xFF": "JPEG image",
|
|
}
|
|
|
|
|
|
def scan_signatures(file_path, chunk_size=65536):
|
|
"""Scan a firmware image for known magic byte signatures."""
|
|
matches = []
|
|
file_size = os.path.getsize(file_path)
|
|
with open(file_path, "rb") as f:
|
|
offset = 0
|
|
while offset < file_size:
|
|
f.seek(offset)
|
|
data = f.read(chunk_size)
|
|
if not data:
|
|
break
|
|
for magic, description in MAGIC_SIGNATURES.items():
|
|
pos = 0
|
|
while True:
|
|
idx = data.find(magic, pos)
|
|
if idx == -1:
|
|
break
|
|
absolute_offset = offset + idx
|
|
matches.append({
|
|
"offset": absolute_offset,
|
|
"offset_hex": f"0x{absolute_offset:08X}",
|
|
"magic_hex": magic.hex().upper(),
|
|
"description": description,
|
|
})
|
|
pos = idx + 1
|
|
offset += chunk_size - max(len(m) for m in MAGIC_SIGNATURES) + 1
|
|
matches.sort(key=lambda x: x["offset"])
|
|
return matches
|
|
|
|
|
|
def parse_uboot_header(file_path, offset=0):
|
|
"""Parse a U-Boot image header at the given offset."""
|
|
with open(file_path, "rb") as f:
|
|
f.seek(offset)
|
|
header = f.read(64)
|
|
if len(header) < 64:
|
|
return None
|
|
magic = struct.unpack(">I", header[0:4])[0]
|
|
if magic != 0x27051956:
|
|
return None
|
|
header_crc = struct.unpack(">I", header[4:8])[0]
|
|
timestamp = struct.unpack(">I", header[8:12])[0]
|
|
data_size = struct.unpack(">I", header[12:16])[0]
|
|
load_addr = struct.unpack(">I", header[16:20])[0]
|
|
entry_point = struct.unpack(">I", header[20:24])[0]
|
|
data_crc = struct.unpack(">I", header[24:28])[0]
|
|
os_type = header[28]
|
|
arch = header[29]
|
|
image_type = header[30]
|
|
comp_type = header[31]
|
|
name = header[32:64].split(b"\x00")[0].decode("ascii", errors="replace")
|
|
|
|
OS_TYPES = {0: "Invalid", 1: "OpenBSD", 2: "NetBSD", 3: "FreeBSD",
|
|
4: "4_4BSD", 5: "Linux", 6: "SVR4", 7: "Esix", 8: "Solaris",
|
|
9: "Irix", 10: "SCO", 11: "Dell", 12: "NCR", 14: "QNX",
|
|
15: "U-Boot", 16: "RTEMS"}
|
|
ARCH_TYPES = {0: "Invalid", 1: "Alpha", 2: "ARM", 3: "x86", 4: "IA64",
|
|
5: "MIPS", 6: "MIPS64", 7: "PowerPC", 8: "S390",
|
|
9: "SuperH", 10: "SPARC", 11: "SPARC64", 12: "M68K",
|
|
15: "AArch64", 22: "RISC-V"}
|
|
COMP_TYPES = {0: "none", 1: "gzip", 2: "bzip2", 3: "lzma", 4: "lzo",
|
|
5: "lz4", 6: "zstd"}
|
|
|
|
return {
|
|
"magic": f"0x{magic:08X}",
|
|
"header_crc": f"0x{header_crc:08X}",
|
|
"data_size": data_size,
|
|
"load_address": f"0x{load_addr:08X}",
|
|
"entry_point": f"0x{entry_point:08X}",
|
|
"data_crc": f"0x{data_crc:08X}",
|
|
"os": OS_TYPES.get(os_type, f"Unknown({os_type})"),
|
|
"architecture": ARCH_TYPES.get(arch, f"Unknown({arch})"),
|
|
"compression": COMP_TYPES.get(comp_type, f"Unknown({comp_type})"),
|
|
"name": name,
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# String Analysis
|
|
# ---------------------------------------------------------------------------
|
|
|
|
SENSITIVE_PATTERNS = [
|
|
(re.compile(rb"password\s*[:=]\s*\S+", re.IGNORECASE), "Hardcoded password"),
|
|
(re.compile(rb"passwd\s*[:=]\s*\S+", re.IGNORECASE), "Hardcoded password"),
|
|
(re.compile(rb"api[_-]?key\s*[:=]\s*\S+", re.IGNORECASE), "API key"),
|
|
(re.compile(rb"secret\s*[:=]\s*\S+", re.IGNORECASE), "Secret value"),
|
|
(re.compile(rb"token\s*[:=]\s*\S+", re.IGNORECASE), "Authentication token"),
|
|
(re.compile(rb"-----BEGIN\s+(RSA |DSA |EC )?PRIVATE KEY-----"), "Private key"),
|
|
(re.compile(rb"-----BEGIN CERTIFICATE-----"), "X.509 certificate"),
|
|
(re.compile(rb"https?://\S+"), "URL/endpoint"),
|
|
(re.compile(rb"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b"), "IP address"),
|
|
(re.compile(rb"root:\$[156]\$"), "Root password hash"),
|
|
(re.compile(rb"telnetd|dropbear|sshd|httpd|uHTTPd"), "Network service"),
|
|
]
|
|
|
|
|
|
def scan_strings(file_path, min_length=8):
|
|
"""Extract printable ASCII strings and scan for sensitive patterns."""
|
|
findings = []
|
|
file_size = os.path.getsize(file_path)
|
|
with open(file_path, "rb") as f:
|
|
data = f.read()
|
|
# Extract ASCII strings
|
|
ascii_pattern = re.compile(rb"[\x20-\x7E]{%d,}" % min_length)
|
|
strings_found = ascii_pattern.findall(data)
|
|
# Scan each string against sensitive patterns
|
|
for s in strings_found:
|
|
for pattern, description in SENSITIVE_PATTERNS:
|
|
if pattern.search(s):
|
|
offset = data.find(s)
|
|
findings.append({
|
|
"offset": f"0x{offset:08X}",
|
|
"type": description,
|
|
"value": s[:120].decode("ascii", errors="replace"),
|
|
})
|
|
break
|
|
return findings
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Binwalk Subprocess Interface
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def run_binwalk_scan(firmware_path):
|
|
"""Run binwalk signature scan via subprocess and return parsed output."""
|
|
try:
|
|
result = subprocess.run(
|
|
["binwalk", firmware_path],
|
|
capture_output=True, text=True, timeout=120,
|
|
)
|
|
return {"stdout": result.stdout, "stderr": result.stderr, "rc": result.returncode}
|
|
except FileNotFoundError:
|
|
return {"stdout": "", "stderr": "binwalk not found in PATH", "rc": -1}
|
|
except subprocess.TimeoutExpired:
|
|
return {"stdout": "", "stderr": "binwalk scan timed out", "rc": -2}
|
|
|
|
|
|
def run_binwalk_extract(firmware_path, output_dir=None, recursive=False):
|
|
"""Run binwalk extraction via subprocess."""
|
|
cmd = ["binwalk", "-e"]
|
|
if recursive:
|
|
cmd.append("-M")
|
|
if output_dir:
|
|
cmd.extend(["-C", output_dir])
|
|
cmd.append(firmware_path)
|
|
try:
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
|
|
return {"stdout": result.stdout, "stderr": result.stderr, "rc": result.returncode}
|
|
except FileNotFoundError:
|
|
return {"stdout": "", "stderr": "binwalk not found in PATH", "rc": -1}
|
|
except subprocess.TimeoutExpired:
|
|
return {"stdout": "", "stderr": "binwalk extraction timed out", "rc": -2}
|
|
|
|
|
|
def run_binwalk_entropy(firmware_path):
|
|
"""Run binwalk entropy analysis via subprocess."""
|
|
try:
|
|
result = subprocess.run(
|
|
["binwalk", "-E", firmware_path],
|
|
capture_output=True, text=True, timeout=120,
|
|
)
|
|
return {"stdout": result.stdout, "stderr": result.stderr, "rc": result.returncode}
|
|
except FileNotFoundError:
|
|
return {"stdout": "", "stderr": "binwalk not found in PATH", "rc": -1}
|
|
except subprocess.TimeoutExpired:
|
|
return {"stdout": "", "stderr": "binwalk entropy analysis timed out", "rc": -2}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Firmware Metadata
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def get_firmware_metadata(file_path):
|
|
"""Compute basic metadata for a firmware image file."""
|
|
file_size = os.path.getsize(file_path)
|
|
sha256 = hashlib.sha256()
|
|
md5 = hashlib.md5()
|
|
with open(file_path, "rb") as f:
|
|
while True:
|
|
chunk = f.read(65536)
|
|
if not chunk:
|
|
break
|
|
sha256.update(chunk)
|
|
md5.update(chunk)
|
|
return {
|
|
"file": os.path.basename(file_path),
|
|
"path": str(Path(file_path).resolve()),
|
|
"size_bytes": file_size,
|
|
"size_human": f"{file_size / (1024*1024):.2f} MB" if file_size > 1048576
|
|
else f"{file_size / 1024:.2f} KB",
|
|
"sha256": sha256.hexdigest(),
|
|
"md5": md5.hexdigest(),
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main Entry Point
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def analyze_firmware(firmware_path):
|
|
"""Perform a complete firmware analysis pipeline."""
|
|
print("=" * 65)
|
|
print(" Firmware Extraction & Analysis Agent (binwalk)")
|
|
print("=" * 65)
|
|
|
|
if not os.path.isfile(firmware_path):
|
|
print(f"[ERROR] File not found: {firmware_path}")
|
|
return
|
|
|
|
# Metadata
|
|
meta = get_firmware_metadata(firmware_path)
|
|
print(f"\n[*] File: {meta['file']}")
|
|
print(f"[*] Size: {meta['size_human']} ({meta['size_bytes']} bytes)")
|
|
print(f"[*] SHA-256: {meta['sha256']}")
|
|
print(f"[*] MD5: {meta['md5']}")
|
|
|
|
# Signature scan
|
|
print("\n--- Signature Scan ---")
|
|
sigs = scan_signatures(firmware_path)
|
|
if sigs:
|
|
for s in sigs:
|
|
print(f" {s['offset_hex']} {s['description']} (magic: {s['magic_hex']})")
|
|
else:
|
|
print(" No known signatures detected.")
|
|
|
|
# U-Boot header
|
|
for s in sigs:
|
|
if "U-Boot" in s["description"]:
|
|
print(f"\n--- U-Boot Header at {s['offset_hex']} ---")
|
|
hdr = parse_uboot_header(firmware_path, s["offset"])
|
|
if hdr:
|
|
for k, v in hdr.items():
|
|
print(f" {k}: {v}")
|
|
|
|
# Entropy analysis
|
|
print("\n--- Entropy Analysis ---")
|
|
emap = entropy_map(firmware_path, block_size=8192)
|
|
regions = detect_entropy_regions(emap)
|
|
for r in regions:
|
|
size_bytes = (r["block_count"]) * 8192
|
|
print(f" {r['start_hex']} - 0x{r['end_offset']:08X} "
|
|
f"({size_bytes:>8} bytes) [{r['classification']}]")
|
|
|
|
# String analysis for sensitive data
|
|
print("\n--- Sensitive String Analysis ---")
|
|
findings = scan_strings(firmware_path, min_length=8)
|
|
if findings:
|
|
seen = set()
|
|
for f in findings[:30]:
|
|
key = (f["type"], f["value"][:60])
|
|
if key not in seen:
|
|
seen.add(key)
|
|
print(f" [{f['type']}] @ {f['offset']}: {f['value'][:80]}")
|
|
else:
|
|
print(" No sensitive strings detected.")
|
|
|
|
# Binwalk subprocess scan
|
|
print("\n--- Binwalk Scan Output ---")
|
|
bw = run_binwalk_scan(firmware_path)
|
|
if bw["rc"] == 0:
|
|
print(bw["stdout"][:2000])
|
|
elif bw["rc"] == -1:
|
|
print(" [WARN] binwalk binary not found; install with: pip install binwalk3")
|
|
else:
|
|
print(f" [ERROR] binwalk returned code {bw['rc']}: {bw['stderr'][:200]}")
|
|
|
|
print("\n[*] Analysis complete.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
description="Firmware extraction and analysis agent using binwalk for "
|
|
"signature scanning, entropy analysis, filesystem extraction, "
|
|
"and string-based credential discovery.",
|
|
epilog="Authorized use only. Ensure you have permission to analyze the target firmware.",
|
|
)
|
|
parser.add_argument(
|
|
"firmware",
|
|
help="Path to a firmware image file (.bin, .img, .rom)",
|
|
)
|
|
parser.add_argument(
|
|
"--block-size", "-b",
|
|
type=int, default=8192,
|
|
help="Block size in bytes for entropy analysis (default: 8192)",
|
|
)
|
|
parser.add_argument(
|
|
"--min-string-length", "-s",
|
|
type=int, default=8,
|
|
help="Minimum string length for sensitive string scanning (default: 8)",
|
|
)
|
|
parser.add_argument(
|
|
"--extract", "-e",
|
|
action="store_true",
|
|
help="Run binwalk extraction after analysis",
|
|
)
|
|
parser.add_argument(
|
|
"--recursive", "-M",
|
|
action="store_true",
|
|
help="Enable recursive (matryoshka) extraction",
|
|
)
|
|
parser.add_argument(
|
|
"--output-dir", "-o",
|
|
type=str, default=None,
|
|
help="Output directory for extraction results",
|
|
)
|
|
parser.add_argument(
|
|
"--json-output", "-j",
|
|
action="store_true",
|
|
help="Output results in JSON format instead of text",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
print(DISCLAIMER)
|
|
analyze_firmware(args.firmware)
|
|
|
|
if args.extract:
|
|
print("\n--- Running Binwalk Extraction ---")
|
|
result = run_binwalk_extract(args.firmware, args.output_dir, args.recursive)
|
|
if result["rc"] == 0:
|
|
print(result["stdout"][:2000])
|
|
else:
|
|
print(f" [ERROR] Extraction failed: {result.get('stderr', '')[:200]}")
|