mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-06-11 13:44:56 +03:00
c47eed6a64
- Fix 25 shell=True subprocess calls with list-based commands - Fix 49 verify=False in defensive skills (env-var override) - Add timeout to 231 HTTP/subprocess/socket calls - Fix 6 SQL injection patterns with whitelist validation - Replace 8 __import__() with standard imports - Remove 701 unused imports across 442 files - Add authorized-testing disclaimers to all offensive skills - Complete 11 incomplete skill directories - Expand 10 stub SKILL.md files with full content - Fix 2 YAML parse errors in frontmatter - Fix 5 pre-existing syntax errors - Convert 22 hardcoded paths/ports to environment variables - Back up 21 redundant skill pairs to .bak - Fix 2 global declaration errors - 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE) - 0 compile errors across all 724 agent.py files
150 lines
6.0 KiB
Python
150 lines
6.0 KiB
Python
#!/usr/bin/env python3
|
|
"""Agent for performing malware IOC extraction from files, reports, and samples."""
|
|
|
|
import json
|
|
import argparse
|
|
import re
|
|
import hashlib
|
|
from pathlib import Path
|
|
|
|
|
|
IOC_PATTERNS = {
|
|
"ipv4": re.compile(r"\b(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)\.){3}(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)\b"),
|
|
"ipv6": re.compile(r"\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b"),
|
|
"domain": re.compile(r"\b(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+(?:com|net|org|io|ru|cn|xyz|top|info|biz|cc|tk|ml|ga|cf|gq|pw)\b"),
|
|
"url": re.compile(r"https?://[^\s<>\"'\)]+"),
|
|
"email": re.compile(r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b"),
|
|
"md5": re.compile(r"\b[a-f0-9]{32}\b"),
|
|
"sha1": re.compile(r"\b[a-f0-9]{40}\b"),
|
|
"sha256": re.compile(r"\b[a-f0-9]{64}\b"),
|
|
"cve": re.compile(r"CVE-\d{4}-\d{4,7}", re.I),
|
|
"registry_key": re.compile(r"(?:HKLM|HKCU|HKCR|HKU|HKCC)\\[^\s\"']+"),
|
|
"file_path_windows": re.compile(r"[A-Z]:\\(?:[^\s\\\"]+\\)*[^\s\\\"]+\.\w{1,5}"),
|
|
"file_path_unix": re.compile(r"/(?:tmp|var|etc|usr|home|opt|bin|sbin)/[^\s\"']+"),
|
|
"mutex": re.compile(r"(?:Global|Local)\\[^\s\"']+"),
|
|
"bitcoin_addr": re.compile(r"\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b"),
|
|
"mitre_technique": re.compile(r"T\d{4}(?:\.\d{3})?"),
|
|
"user_agent": re.compile(r"Mozilla/5\.0[^\n\"]{20,200}"),
|
|
}
|
|
|
|
DEFANGED_PATTERNS = {
|
|
"ip_defanged": (re.compile(r"\b\d+\[\.\]\d+\[\.\]\d+\[\.\]\d+\b"), lambda m: m.group().replace("[.]", ".")),
|
|
"url_defanged": (re.compile(r"hxxps?://[^\s]+"), lambda m: m.group().replace("hxxp", "http")),
|
|
"domain_defanged": (re.compile(r"\b\S+\[\.\]\S+\b"), lambda m: m.group().replace("[.]", ".")),
|
|
}
|
|
|
|
|
|
def extract_iocs_from_text(text):
|
|
"""Extract all IOC types from raw text."""
|
|
refanged = text
|
|
for name, (pattern, fixer) in DEFANGED_PATTERNS.items():
|
|
refanged = pattern.sub(fixer, refanged)
|
|
extracted = {}
|
|
for ioc_type, pattern in IOC_PATTERNS.items():
|
|
matches = list(set(pattern.findall(refanged)))
|
|
if matches:
|
|
extracted[ioc_type] = sorted(matches)[:200]
|
|
private_ip = re.compile(r"^(?:10\.|172\.(?:1[6-9]|2\d|3[01])\.|192\.168\.|127\.)")
|
|
if "ipv4" in extracted:
|
|
extracted["ipv4"] = [ip for ip in extracted["ipv4"] if not private_ip.match(ip)]
|
|
return extracted
|
|
|
|
|
|
def extract_from_file(file_path):
|
|
"""Extract IOCs from a file (text, PDF text, or report)."""
|
|
content = Path(file_path).read_text(encoding="utf-8", errors="replace")
|
|
iocs = extract_iocs_from_text(content)
|
|
total = sum(len(v) for v in iocs.values())
|
|
return {
|
|
"source": file_path, "total_iocs": total,
|
|
"by_type": {k: len(v) for k, v in iocs.items()},
|
|
"indicators": iocs,
|
|
}
|
|
|
|
|
|
def hash_file(file_path):
|
|
"""Calculate file hashes for malware sample identification."""
|
|
data = Path(file_path).read_bytes()
|
|
return {
|
|
"file": file_path,
|
|
"size_bytes": len(data),
|
|
"md5": hashlib.md5(data).hexdigest(),
|
|
"sha1": hashlib.sha1(data).hexdigest(),
|
|
"sha256": hashlib.sha256(data).hexdigest(),
|
|
}
|
|
|
|
|
|
def extract_strings(file_path, min_length=6):
|
|
"""Extract printable strings from binary file."""
|
|
data = Path(file_path).read_bytes()
|
|
ascii_strings = re.findall(rb"[\x20-\x7e]{%d,}" % min_length, data)
|
|
wide_strings = re.findall(rb"(?:[\x20-\x7e]\x00){%d,}" % min_length, data)
|
|
all_strings = [s.decode("ascii", errors="replace") for s in ascii_strings]
|
|
all_strings += [s.decode("utf-16-le", errors="replace") for s in wide_strings]
|
|
iocs = extract_iocs_from_text("\n".join(all_strings))
|
|
suspicious = []
|
|
suspicious_kw = ["http", "socket", "connect", "download", "upload", "exec", "cmd.exe",
|
|
"powershell", "reg add", "CreateRemoteThread", "VirtualAlloc", "WriteProcessMemory",
|
|
"LoadLibrary", "GetProcAddress", "WinExec", "ShellExecute"]
|
|
for s in all_strings:
|
|
if any(kw.lower() in s.lower() for kw in suspicious_kw):
|
|
suspicious.append(s[:200])
|
|
return {
|
|
"file": file_path, "total_strings": len(all_strings),
|
|
"suspicious_strings": suspicious[:30],
|
|
"extracted_iocs": {k: len(v) for k, v in iocs.items()},
|
|
"ioc_details": iocs,
|
|
}
|
|
|
|
|
|
def generate_ioc_report(file_path, output=None):
|
|
"""Generate comprehensive IOC extraction report."""
|
|
hashes = hash_file(file_path)
|
|
strings = extract_strings(file_path)
|
|
report = {
|
|
"generated": datetime.utcnow().isoformat() if "datetime" in dir() else "",
|
|
"file_info": hashes,
|
|
"strings_analysis": {
|
|
"total": strings["total_strings"],
|
|
"suspicious": strings["suspicious_strings"],
|
|
},
|
|
"extracted_iocs": strings["ioc_details"],
|
|
"ioc_summary": strings["extracted_iocs"],
|
|
}
|
|
if output:
|
|
with open(output, "w") as f:
|
|
json.dump(report, f, indent=2)
|
|
return report
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Malware IOC Extraction Agent")
|
|
sub = parser.add_subparsers(dest="command")
|
|
t = sub.add_parser("text", help="Extract IOCs from text/report file")
|
|
t.add_argument("--file", required=True)
|
|
h = sub.add_parser("hash", help="Calculate file hashes")
|
|
h.add_argument("--file", required=True)
|
|
s = sub.add_parser("strings", help="Extract strings and IOCs from binary")
|
|
s.add_argument("--file", required=True)
|
|
s.add_argument("--min-length", type=int, default=6)
|
|
r = sub.add_parser("report", help="Generate full IOC report")
|
|
r.add_argument("--file", required=True)
|
|
r.add_argument("--output", help="Output JSON file")
|
|
args = parser.parse_args()
|
|
if args.command == "text":
|
|
result = extract_from_file(args.file)
|
|
elif args.command == "hash":
|
|
result = hash_file(args.file)
|
|
elif args.command == "strings":
|
|
result = extract_strings(args.file, args.min_length)
|
|
elif args.command == "report":
|
|
result = generate_ioc_report(args.file, args.output)
|
|
else:
|
|
parser.print_help()
|
|
return
|
|
print(json.dumps(result, indent=2, default=str))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|