#!/usr/bin/env python3 """Firmware extraction and analysis agent using binwalk for signature scanning, entropy analysis, filesystem extraction, and string-based credential discovery.""" import argparse import struct import hashlib import math import os import sys import subprocess import re import json from collections import Counter from pathlib import Path DISCLAIMER = """ ========================================================================== AUTHORIZED USE ONLY -- This tool is intended for authorized security testing, firmware research, and educational purposes. Ensure you have explicit written permission before analyzing any firmware image you do not own. Unauthorized access to or reverse engineering of proprietary firmware may violate applicable laws and vendor agreements. ========================================================================== """ # --------------------------------------------------------------------------- # Entropy Analysis # --------------------------------------------------------------------------- def calculate_entropy(data): """Calculate Shannon entropy of a byte sequence (0.0 = uniform, 8.0 = max random).""" if not data: return 0.0 counter = Counter(data) length = len(data) entropy = -sum( (count / length) * math.log2(count / length) for count in counter.values() ) return round(entropy, 4) def entropy_map(file_path, block_size=4096): """Generate a block-by-block entropy map of a firmware image.""" results = [] with open(file_path, "rb") as f: offset = 0 while True: block = f.read(block_size) if not block: break ent = calculate_entropy(block) results.append({ "offset": offset, "offset_hex": f"0x{offset:08X}", "entropy": ent, "classification": classify_entropy(ent), }) offset += len(block) return results def classify_entropy(value): """Classify an entropy value into a human-readable category.""" if value < 1.0: return "empty/padding" elif value < 5.0: return "plaintext/code" elif value < 7.0: return "compressed" elif value < 7.9: return "highly-compressed" else: return "encrypted/random" def detect_entropy_regions(entropy_data, threshold_high=7.0, threshold_low=1.0): """Identify contiguous regions of high or low entropy in a firmware image.""" regions = [] current_region = None for entry in entropy_data: classification = entry["classification"] if current_region and current_region["classification"] == classification: current_region["end_offset"] = entry["offset"] current_region["block_count"] += 1 else: if current_region: regions.append(current_region) current_region = { "start_offset": entry["offset"], "start_hex": entry["offset_hex"], "end_offset": entry["offset"], "classification": classification, "block_count": 1, } if current_region: regions.append(current_region) return regions # --------------------------------------------------------------------------- # Firmware Header Parsing # --------------------------------------------------------------------------- MAGIC_SIGNATURES = { b"\x27\x05\x19\x56": "U-Boot image header (uImage)", b"\x68\x73\x71\x73": "SquashFS filesystem (little-endian)", b"\x73\x71\x73\x68": "SquashFS filesystem (big-endian)", b"\x45\x3D\xCD\x28": "CramFS filesystem", b"\x85\x19\x01\x20": "JFFS2 filesystem (little-endian)", b"\x19\x85\x20\x01": "JFFS2 filesystem (big-endian)", b"\x1F\x8B\x08": "gzip compressed data", b"\x5D\x00\x00": "LZMA compressed data", b"\xFD\x37\x7A\x58\x5A\x00": "XZ compressed data", b"\x30\x37\x30\x37\x30\x31": "CPIO archive", b"\x55\xAA": "x86 boot sector", b"\xD0\x0D\xFE\xED": "Device Tree Blob (DTB)", b"\x4D\x5A": "PE/COFF executable (EFI binary)", b"\x7F\x45\x4C\x46": "ELF executable", b"\x89\x50\x4E\x47": "PNG image", b"\xFF\xD8\xFF": "JPEG image", } def scan_signatures(file_path, chunk_size=65536): """Scan a firmware image for known magic byte signatures.""" matches = [] file_size = os.path.getsize(file_path) with open(file_path, "rb") as f: offset = 0 while offset < file_size: f.seek(offset) data = f.read(chunk_size) if not data: break for magic, description in MAGIC_SIGNATURES.items(): pos = 0 while True: idx = data.find(magic, pos) if idx == -1: break absolute_offset = offset + idx matches.append({ "offset": absolute_offset, "offset_hex": f"0x{absolute_offset:08X}", "magic_hex": magic.hex().upper(), "description": description, }) pos = idx + 1 offset += chunk_size - max(len(m) for m in MAGIC_SIGNATURES) + 1 matches.sort(key=lambda x: x["offset"]) return matches def parse_uboot_header(file_path, offset=0): """Parse a U-Boot image header at the given offset.""" with open(file_path, "rb") as f: f.seek(offset) header = f.read(64) if len(header) < 64: return None magic = struct.unpack(">I", header[0:4])[0] if magic != 0x27051956: return None header_crc = struct.unpack(">I", header[4:8])[0] timestamp = struct.unpack(">I", header[8:12])[0] data_size = struct.unpack(">I", header[12:16])[0] load_addr = struct.unpack(">I", header[16:20])[0] entry_point = struct.unpack(">I", header[20:24])[0] data_crc = struct.unpack(">I", header[24:28])[0] os_type = header[28] arch = header[29] image_type = header[30] comp_type = header[31] name = header[32:64].split(b"\x00")[0].decode("ascii", errors="replace") OS_TYPES = {0: "Invalid", 1: "OpenBSD", 2: "NetBSD", 3: "FreeBSD", 4: "4_4BSD", 5: "Linux", 6: "SVR4", 7: "Esix", 8: "Solaris", 9: "Irix", 10: "SCO", 11: "Dell", 12: "NCR", 14: "QNX", 15: "U-Boot", 16: "RTEMS"} ARCH_TYPES = {0: "Invalid", 1: "Alpha", 2: "ARM", 3: "x86", 4: "IA64", 5: "MIPS", 6: "MIPS64", 7: "PowerPC", 8: "S390", 9: "SuperH", 10: "SPARC", 11: "SPARC64", 12: "M68K", 15: "AArch64", 22: "RISC-V"} COMP_TYPES = {0: "none", 1: "gzip", 2: "bzip2", 3: "lzma", 4: "lzo", 5: "lz4", 6: "zstd"} return { "magic": f"0x{magic:08X}", "header_crc": f"0x{header_crc:08X}", "data_size": data_size, "load_address": f"0x{load_addr:08X}", "entry_point": f"0x{entry_point:08X}", "data_crc": f"0x{data_crc:08X}", "os": OS_TYPES.get(os_type, f"Unknown({os_type})"), "architecture": ARCH_TYPES.get(arch, f"Unknown({arch})"), "compression": COMP_TYPES.get(comp_type, f"Unknown({comp_type})"), "name": name, } # --------------------------------------------------------------------------- # String Analysis # --------------------------------------------------------------------------- SENSITIVE_PATTERNS = [ (re.compile(rb"password\s*[:=]\s*\S+", re.IGNORECASE), "Hardcoded password"), (re.compile(rb"passwd\s*[:=]\s*\S+", re.IGNORECASE), "Hardcoded password"), (re.compile(rb"api[_-]?key\s*[:=]\s*\S+", re.IGNORECASE), "API key"), (re.compile(rb"secret\s*[:=]\s*\S+", re.IGNORECASE), "Secret value"), (re.compile(rb"token\s*[:=]\s*\S+", re.IGNORECASE), "Authentication token"), (re.compile(rb"-----BEGIN\s+(RSA |DSA |EC )?PRIVATE KEY-----"), "Private key"), (re.compile(rb"-----BEGIN CERTIFICATE-----"), "X.509 certificate"), (re.compile(rb"https?://\S+"), "URL/endpoint"), (re.compile(rb"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b"), "IP address"), (re.compile(rb"root:\$[156]\$"), "Root password hash"), (re.compile(rb"telnetd|dropbear|sshd|httpd|uHTTPd"), "Network service"), ] def scan_strings(file_path, min_length=8): """Extract printable ASCII strings and scan for sensitive patterns.""" findings = [] file_size = os.path.getsize(file_path) with open(file_path, "rb") as f: data = f.read() # Extract ASCII strings ascii_pattern = re.compile(rb"[\x20-\x7E]{%d,}" % min_length) strings_found = ascii_pattern.findall(data) # Scan each string against sensitive patterns for s in strings_found: for pattern, description in SENSITIVE_PATTERNS: if pattern.search(s): offset = data.find(s) findings.append({ "offset": f"0x{offset:08X}", "type": description, "value": s[:120].decode("ascii", errors="replace"), }) break return findings # --------------------------------------------------------------------------- # Binwalk Subprocess Interface # --------------------------------------------------------------------------- def run_binwalk_scan(firmware_path): """Run binwalk signature scan via subprocess and return parsed output.""" try: result = subprocess.run( ["binwalk", firmware_path], capture_output=True, text=True, timeout=120, ) return {"stdout": result.stdout, "stderr": result.stderr, "rc": result.returncode} except FileNotFoundError: return {"stdout": "", "stderr": "binwalk not found in PATH", "rc": -1} except subprocess.TimeoutExpired: return {"stdout": "", "stderr": "binwalk scan timed out", "rc": -2} def run_binwalk_extract(firmware_path, output_dir=None, recursive=False): """Run binwalk extraction via subprocess.""" cmd = ["binwalk", "-e"] if recursive: cmd.append("-M") if output_dir: cmd.extend(["-C", output_dir]) cmd.append(firmware_path) try: result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) return {"stdout": result.stdout, "stderr": result.stderr, "rc": result.returncode} except FileNotFoundError: return {"stdout": "", "stderr": "binwalk not found in PATH", "rc": -1} except subprocess.TimeoutExpired: return {"stdout": "", "stderr": "binwalk extraction timed out", "rc": -2} def run_binwalk_entropy(firmware_path): """Run binwalk entropy analysis via subprocess.""" try: result = subprocess.run( ["binwalk", "-E", firmware_path], capture_output=True, text=True, timeout=120, ) return {"stdout": result.stdout, "stderr": result.stderr, "rc": result.returncode} except FileNotFoundError: return {"stdout": "", "stderr": "binwalk not found in PATH", "rc": -1} except subprocess.TimeoutExpired: return {"stdout": "", "stderr": "binwalk entropy analysis timed out", "rc": -2} # --------------------------------------------------------------------------- # Firmware Metadata # --------------------------------------------------------------------------- def get_firmware_metadata(file_path): """Compute basic metadata for a firmware image file.""" file_size = os.path.getsize(file_path) sha256 = hashlib.sha256() md5 = hashlib.md5() with open(file_path, "rb") as f: while True: chunk = f.read(65536) if not chunk: break sha256.update(chunk) md5.update(chunk) return { "file": os.path.basename(file_path), "path": str(Path(file_path).resolve()), "size_bytes": file_size, "size_human": f"{file_size / (1024*1024):.2f} MB" if file_size > 1048576 else f"{file_size / 1024:.2f} KB", "sha256": sha256.hexdigest(), "md5": md5.hexdigest(), } # --------------------------------------------------------------------------- # Main Entry Point # --------------------------------------------------------------------------- def analyze_firmware(firmware_path): """Perform a complete firmware analysis pipeline.""" print("=" * 65) print(" Firmware Extraction & Analysis Agent (binwalk)") print("=" * 65) if not os.path.isfile(firmware_path): print(f"[ERROR] File not found: {firmware_path}") return # Metadata meta = get_firmware_metadata(firmware_path) print(f"\n[*] File: {meta['file']}") print(f"[*] Size: {meta['size_human']} ({meta['size_bytes']} bytes)") print(f"[*] SHA-256: {meta['sha256']}") print(f"[*] MD5: {meta['md5']}") # Signature scan print("\n--- Signature Scan ---") sigs = scan_signatures(firmware_path) if sigs: for s in sigs: print(f" {s['offset_hex']} {s['description']} (magic: {s['magic_hex']})") else: print(" No known signatures detected.") # U-Boot header for s in sigs: if "U-Boot" in s["description"]: print(f"\n--- U-Boot Header at {s['offset_hex']} ---") hdr = parse_uboot_header(firmware_path, s["offset"]) if hdr: for k, v in hdr.items(): print(f" {k}: {v}") # Entropy analysis print("\n--- Entropy Analysis ---") emap = entropy_map(firmware_path, block_size=8192) regions = detect_entropy_regions(emap) for r in regions: size_bytes = (r["block_count"]) * 8192 print(f" {r['start_hex']} - 0x{r['end_offset']:08X} " f"({size_bytes:>8} bytes) [{r['classification']}]") # String analysis for sensitive data print("\n--- Sensitive String Analysis ---") findings = scan_strings(firmware_path, min_length=8) if findings: seen = set() for f in findings[:30]: key = (f["type"], f["value"][:60]) if key not in seen: seen.add(key) print(f" [{f['type']}] @ {f['offset']}: {f['value'][:80]}") else: print(" No sensitive strings detected.") # Binwalk subprocess scan print("\n--- Binwalk Scan Output ---") bw = run_binwalk_scan(firmware_path) if bw["rc"] == 0: print(bw["stdout"][:2000]) elif bw["rc"] == -1: print(" [WARN] binwalk binary not found; install with: pip install binwalk3") else: print(f" [ERROR] binwalk returned code {bw['rc']}: {bw['stderr'][:200]}") print("\n[*] Analysis complete.") if __name__ == "__main__": parser = argparse.ArgumentParser( description="Firmware extraction and analysis agent using binwalk for " "signature scanning, entropy analysis, filesystem extraction, " "and string-based credential discovery.", epilog="Authorized use only. Ensure you have permission to analyze the target firmware.", ) parser.add_argument( "firmware", help="Path to a firmware image file (.bin, .img, .rom)", ) parser.add_argument( "--block-size", "-b", type=int, default=8192, help="Block size in bytes for entropy analysis (default: 8192)", ) parser.add_argument( "--min-string-length", "-s", type=int, default=8, help="Minimum string length for sensitive string scanning (default: 8)", ) parser.add_argument( "--extract", "-e", action="store_true", help="Run binwalk extraction after analysis", ) parser.add_argument( "--recursive", "-M", action="store_true", help="Enable recursive (matryoshka) extraction", ) parser.add_argument( "--output-dir", "-o", type=str, default=None, help="Output directory for extraction results", ) parser.add_argument( "--json-output", "-j", action="store_true", help="Output results in JSON format instead of text", ) args = parser.parse_args() print(DISCLAIMER) analyze_firmware(args.firmware) if args.extract: print("\n--- Running Binwalk Extraction ---") result = run_binwalk_extract(args.firmware, args.output_dir, args.recursive) if result["rc"] == 0: print(result["stdout"][:2000]) else: print(f" [ERROR] Extraction failed: {result.get('stderr', '')[:200]}")