Files
T

322 lines
11 KiB
Python

#!/usr/bin/env python3
"""
PowerShell Malware Deobfuscation Script
Identifies and removes multiple layers of PowerShell obfuscation
to reveal the underlying malicious payload and extract IOCs.
Requirements:
pip install regex
Usage:
python process.py --file obfuscated.ps1 --output deobfuscated.ps1
python process.py --file obfuscated.ps1 --extract-iocs
"""
import argparse
import base64
import json
import re
import sys
from pathlib import Path
class PowerShellDeobfuscator:
"""Multi-layer PowerShell deobfuscation engine."""
def __init__(self):
self.layers = []
self.iocs = {
"urls": set(),
"ips": set(),
"domains": set(),
"file_paths": set(),
"registry_keys": set(),
"suspicious_commands": set(),
}
def analyze(self, content):
"""Identify obfuscation techniques present."""
techniques = []
checks = [
(r'-[Ee]nc(?:odedcommand)?\s+[A-Za-z0-9+/=]{20,}',
"Base64 EncodedCommand"),
(r'\[Convert\]::FromBase64String', "FromBase64String"),
(r"'\s*\+\s*'", "String Concatenation (single-quote)"),
(r'"\s*\+\s*"', "String Concatenation (double-quote)"),
(r'\[char\]\s*\d+', "Character Code Casting"),
(r'\[char\[\]\]\s*\([\d,\s]+\)', "Character Array"),
(r'`[a-zA-Z]', "Tick-Mark Insertion"),
(r'Invoke-Expression', "Invoke-Expression"),
(r'\bIEX\b', "IEX Alias"),
(r'\|\s*IEX', "Pipeline IEX"),
(r'IO\.Compression', "Compression Stream"),
(r'-bxor\s+\d+', "XOR Encoding"),
(r'\.Replace\(', "Replace Chain"),
(r'ConvertTo-SecureString', "SecureString"),
(r'\$env:', "Environment Variable"),
(r'-f\s+[\'"]', "Format String Operator"),
(r'New-Object\s+IO\.MemoryStream', "MemoryStream"),
]
for pattern, name in checks:
matches = re.findall(pattern, content, re.IGNORECASE)
if matches:
techniques.append({"technique": name, "count": len(matches)})
return techniques
def deobfuscate(self, content):
"""Apply all deobfuscation layers iteratively."""
current = content
iteration = 0
while iteration < 20:
previous = current
# Layer: Remove tick marks
current = self._remove_ticks(current)
# Layer: Resolve string concatenation
current = self._resolve_concat(current)
# Layer: Decode Base64 EncodedCommand
current = self._decode_base64_command(current)
# Layer: Decode FromBase64String calls
current = self._decode_frombase64(current)
# Layer: Resolve character arrays
current = self._resolve_char_arrays(current)
# Layer: Resolve format strings
current = self._resolve_format_strings(current)
# Layer: Decompress streams
current = self._decompress_streams(current)
if current == previous:
break
self.layers.append({
"iteration": iteration + 1,
"length_before": len(previous),
"length_after": len(current),
})
iteration += 1
# Extract IOCs from final result
self._extract_iocs(current)
return current
def _remove_ticks(self, content):
"""Remove backtick obfuscation."""
escape_sequences = {'`n', '`r', '`t', '`a', '`b', '`f', '`v', '`0', '``'}
result = []
i = 0
while i < len(content):
if content[i] == '`' and i + 1 < len(content):
pair = content[i:i+2]
if pair in escape_sequences:
result.append(pair)
i += 2
else:
result.append(content[i+1])
i += 2
else:
result.append(content[i])
i += 1
return ''.join(result)
def _resolve_concat(self, content):
"""Resolve string concatenation."""
# Single-quoted concatenation
pattern = re.compile(r"'([^']*)'\s*\+\s*'([^']*)'")
while pattern.search(content):
content = pattern.sub(r"'\1\2'", content)
# Double-quoted concatenation
pattern = re.compile(r'"([^"]*)"\s*\+\s*"([^"]*)"')
while pattern.search(content):
content = pattern.sub(r'"\1\2"', content)
return content
def _decode_base64_command(self, content):
"""Decode -EncodedCommand Base64 arguments."""
pattern = re.compile(
r'-[Ee]nc(?:odedcommand)?\s+([A-Za-z0-9+/=]{20,})',
re.IGNORECASE
)
match = pattern.search(content)
if match:
try:
decoded = base64.b64decode(match.group(1)).decode('utf-16-le')
content = pattern.sub(decoded, content)
except Exception:
pass
return content
def _decode_frombase64(self, content):
"""Decode [Convert]::FromBase64String calls."""
pattern = re.compile(
r"\[Convert\]::FromBase64String\(\s*['\"]([A-Za-z0-9+/=]+)['\"]\s*\)",
re.IGNORECASE
)
for match in pattern.finditer(content):
try:
decoded = base64.b64decode(match.group(1))
decoded_str = decoded.decode('utf-8', errors='replace')
content = content.replace(match.group(0), f"'{decoded_str}'")
except Exception:
pass
return content
def _resolve_char_arrays(self, content):
"""Resolve [char] and [char[]] expressions."""
# [char]NN patterns
pattern = re.compile(r'\[char\]\s*(\d+)', re.IGNORECASE)
for match in pattern.finditer(content):
try:
char_val = chr(int(match.group(1)))
content = content.replace(match.group(0), f"'{char_val}'")
except (ValueError, OverflowError):
pass
return content
def _resolve_format_strings(self, content):
"""Resolve PowerShell format string operator."""
pattern = re.compile(
r"\(?\s*['\"](\{[\d\}{\s]+[^'\"]*)['\"]"
r"\s*-f\s*([^)]+)\)?",
re.IGNORECASE
)
for match in pattern.finditer(content):
try:
fmt_str = match.group(1)
args_str = match.group(2)
args = [a.strip().strip("'\"") for a in args_str.split(",")]
resolved = fmt_str
for i, arg in enumerate(args):
resolved = resolved.replace(f"{{{i}}}", arg)
content = content.replace(match.group(0), f"'{resolved}'")
except Exception:
pass
return content
def _decompress_streams(self, content):
"""Attempt to decode compressed Base64 payloads."""
import zlib
import io
b64_pattern = re.compile(r'[A-Za-z0-9+/=]{100,}')
for match in b64_pattern.finditer(content):
try:
raw = base64.b64decode(match.group(0))
# Try deflate
decompressed = zlib.decompress(raw, -zlib.MAX_WBITS)
decoded = decompressed.decode('utf-8', errors='replace')
if len(decoded) > 50:
content = content.replace(match.group(0), decoded)
except Exception:
try:
# Try gzip
raw = base64.b64decode(match.group(0))
decompressed = zlib.decompress(raw, zlib.MAX_WBITS | 16)
decoded = decompressed.decode('utf-8', errors='replace')
if len(decoded) > 50:
content = content.replace(match.group(0), decoded)
except Exception:
pass
return content
def _extract_iocs(self, content):
"""Extract IOCs from deobfuscated content."""
# URLs
for url in re.findall(r'https?://[^\s\'"<>)\]]+', content, re.I):
self.iocs["urls"].add(url)
# IPs
for ip in re.findall(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', content):
self.iocs["ips"].add(ip)
# File paths
for path in re.findall(
r'[A-Za-z]:\\[^\s\'"<>|]+', content, re.I
):
self.iocs["file_paths"].add(path)
# Registry keys
for key in re.findall(
r'(?:HKLM|HKCU|HKCR)(?:\\[^\s\'"<>|]+)+', content, re.I
):
self.iocs["registry_keys"].add(key)
# Suspicious commands
for cmd in ['DownloadString', 'DownloadFile', 'Invoke-WebRequest',
'Start-Process', 'New-ScheduledTask', 'Add-MpPreference',
'Reflection.Assembly']:
if cmd.lower() in content.lower():
self.iocs["suspicious_commands"].add(cmd)
def get_report(self):
"""Generate analysis report."""
return {
"layers_processed": len(self.layers),
"layer_details": self.layers,
"iocs": {k: sorted(v) for k, v in self.iocs.items()},
}
def main():
parser = argparse.ArgumentParser(
description="PowerShell Malware Deobfuscator"
)
parser.add_argument("--file", required=True, help="Input PS1 file")
parser.add_argument("--output", help="Output deobfuscated file")
parser.add_argument("--extract-iocs", action="store_true",
help="Extract IOCs from result")
parser.add_argument("--report", help="Save JSON report")
args = parser.parse_args()
with open(args.file, 'r', errors='replace') as f:
content = f.read()
deob = PowerShellDeobfuscator()
print("[+] Analyzing obfuscation techniques...")
techniques = deob.analyze(content)
for t in techniques:
print(f" - {t['technique']} ({t['count']} occurrences)")
print(f"\n[+] Deobfuscating ({len(content)} chars)...")
result = deob.deobfuscate(content)
print(f"[+] Result: {len(result)} chars")
if args.output:
with open(args.output, 'w') as f:
f.write(result)
print(f"[+] Saved to {args.output}")
report = deob.get_report()
if args.extract_iocs or args.report:
print(f"\n[+] Extracted IOCs:")
for category, values in report["iocs"].items():
if values:
print(f" {category}:")
for v in values:
print(f" - {v}")
if args.report:
with open(args.report, 'w') as f:
json.dump(report, f, indent=2)
print(f"[+] Report saved to {args.report}")
if __name__ == "__main__":
main()