Files
Anthropic-Cybersecurity-Skills/skills/deobfuscating-javascript-malware/scripts/agent.py
T
mukul975 27c6414ca5 Add folder anatomy (scripts/agent.py + references/api-reference.md) for 648 cybersecurity skills
Complete skill folder anatomy across all cybersecurity skills:
- scripts/agent.py: 80-150 line Python agents using real libraries (impacket,
  boto3, azure-mgmt-*, kubernetes, pefile, yara, scapy, shodan, stix2, etc.)
- references/api-reference.md: real API documentation with method signatures
- LICENSE: MIT license for all skill folders
2026-03-10 21:02:12 +01:00

211 lines
7.1 KiB
Python

#!/usr/bin/env python3
"""JavaScript malware deobfuscation agent using jsbeautifier and pattern matching."""
import re
import sys
import json
import base64
import urllib.parse
from pathlib import Path
try:
import jsbeautifier
except ImportError:
jsbeautifier = None
def beautify_js(code):
"""Beautify JavaScript code using jsbeautifier."""
if jsbeautifier is None:
return code
opts = jsbeautifier.default_options()
opts.indent_size = 2
opts.wrap_line_length = 120
return jsbeautifier.beautify(code, opts)
def decode_hex_strings(code):
"""Replace \\xNN hex escape sequences with ASCII characters."""
def hex_replace(match):
hex_str = match.group(0)
try:
return bytes.fromhex(hex_str.replace("\\x", "")).decode("ascii", errors="replace")
except Exception:
return hex_str
return re.sub(r'(?:\\x[0-9a-fA-F]{2})+', hex_replace, code)
def decode_unicode_escapes(code):
"""Replace \\uNNNN sequences with actual characters."""
def unicode_replace(match):
try:
return chr(int(match.group(1), 16))
except Exception:
return match.group(0)
return re.sub(r'\\u([0-9a-fA-F]{4})', unicode_replace, code)
def decode_charcode_calls(code):
"""Resolve String.fromCharCode() calls with static arguments."""
def charcode_replace(match):
try:
codes = [int(c.strip()) for c in match.group(1).split(",") if c.strip()]
return '"' + "".join(chr(c) for c in codes) + '"'
except Exception:
return match.group(0)
return re.sub(r'String\.fromCharCode\(([0-9,\s]+)\)', charcode_replace, code)
def decode_atob_calls(code):
"""Resolve atob() calls containing static base64 strings."""
def atob_replace(match):
try:
decoded = base64.b64decode(match.group(1)).decode("utf-8", errors="replace")
return json.dumps(decoded)
except Exception:
return match.group(0)
return re.sub(r'atob\(["\']([A-Za-z0-9+/=]+)["\']\)', atob_replace, code)
def decode_unescape_calls(code):
"""Resolve unescape() calls with percent-encoded strings."""
def unescape_replace(match):
try:
decoded = urllib.parse.unquote(match.group(1))
return json.dumps(decoded)
except Exception:
return match.group(0)
return re.sub(r'unescape\(["\']([^"\']+)["\']\)', unescape_replace, code)
def detect_obfuscation_techniques(code):
"""Identify obfuscation techniques used in the script."""
techniques = []
if re.search(r'\\x[0-9a-fA-F]{2}', code):
techniques.append("hex_encoding")
if re.search(r'\\u[0-9a-fA-F]{4}', code):
techniques.append("unicode_escapes")
if "String.fromCharCode" in code:
techniques.append("fromCharCode")
if "atob(" in code:
techniques.append("base64_atob")
if re.search(r'eval\s*\(', code):
techniques.append("eval_chain")
if "new Function(" in code or "new Function (" in code:
techniques.append("function_constructor")
if re.search(r'document\.write\s*\(', code):
techniques.append("document_write")
if re.search(r'setTimeout\s*\(', code):
techniques.append("setTimeout_exec")
if re.search(r'setInterval\s*\(\s*function\s*\(\)\s*\{\s*debugger', code):
techniques.append("anti_debugging_debugger")
if re.search(r'window\.outerWidth\s*-\s*window\.innerWidth', code):
techniques.append("anti_debugging_devtools")
if re.search(r'performance\.now\s*\(\)', code):
techniques.append("anti_debugging_timing")
if re.search(r'_0x[0-9a-fA-F]+', code):
techniques.append("variable_mangling")
if re.search(r'var\s+_0x[0-9a-fA-F]+\s*=\s*\[', code):
techniques.append("string_array")
if "unescape(" in code:
techniques.append("unescape_encoding")
return techniques
def extract_iocs(code):
"""Extract potential IOCs from deobfuscated JavaScript."""
iocs = {"urls": [], "domains": [], "ips": [], "emails": []}
url_pattern = re.compile(r'https?://[^\s"\'<>\)]+', re.IGNORECASE)
domain_pattern = re.compile(r'(?:[a-zA-Z0-9-]+\.)+(?:com|net|org|io|xyz|top|info|cc|ru|cn|tk)\b')
ip_pattern = re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}\b')
email_pattern = re.compile(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}')
iocs["urls"] = list(set(url_pattern.findall(code)))
iocs["domains"] = list(set(domain_pattern.findall(code)))
iocs["ips"] = list(set(ip_pattern.findall(code)))
iocs["emails"] = list(set(email_pattern.findall(code)))
return iocs
def remove_anti_debug(code):
"""Remove common anti-debugging traps from JavaScript."""
code = re.sub(
r'setInterval\s*\(\s*function\s*\(\)\s*\{\s*debugger\s*;?\s*\}\s*,\s*\d+\s*\)',
'/* anti-debug removed */',
code
)
code = re.sub(
r'if\s*\(\s*window\.outerWidth\s*-\s*window\.innerWidth\s*>\s*\d+\s*\)[^}]*\}',
'/* devtools detection removed */',
code
)
return code
def deobfuscate(code, remove_debug=True):
"""Apply all deobfuscation passes to JavaScript code."""
if remove_debug:
code = remove_anti_debug(code)
code = decode_hex_strings(code)
code = decode_unicode_escapes(code)
code = decode_charcode_calls(code)
code = decode_atob_calls(code)
code = decode_unescape_calls(code)
code = beautify_js(code)
return code
def extract_scripts_from_html(html_content):
"""Extract inline JavaScript from HTML file."""
pattern = re.compile(r'<script[^>]*>(.*?)</script>', re.DOTALL | re.IGNORECASE)
scripts = pattern.findall(html_content)
return [s.strip() for s in scripts if s.strip()]
def analyze_file(file_path):
"""Full analysis pipeline for a JavaScript or HTML file."""
path = Path(file_path)
if not path.exists():
return {"error": f"File not found: {file_path}"}
content = path.read_text(encoding="utf-8", errors="replace")
if path.suffix.lower() in (".html", ".htm"):
scripts = extract_scripts_from_html(content)
else:
scripts = [content]
results = []
for i, script in enumerate(scripts):
techniques = detect_obfuscation_techniques(script)
deobfuscated = deobfuscate(script)
iocs = extract_iocs(deobfuscated)
results.append({
"script_index": i,
"original_size": len(script),
"deobfuscated_size": len(deobfuscated),
"obfuscation_techniques": techniques,
"iocs": iocs,
"deobfuscated_preview": deobfuscated[:2000],
})
return {
"file": file_path,
"script_count": len(scripts),
"analyses": results,
}
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: agent.py <file.js|file.html> [--full]")
sys.exit(1)
result = analyze_file(sys.argv[1])
if "--full" in sys.argv:
print(json.dumps(result, indent=2, default=str))
else:
for analysis in result.get("analyses", []):
analysis.pop("deobfuscated_preview", None)
print(json.dumps(result, indent=2, default=str))