mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-06-11 13:44:56 +03:00
c47eed6a64
- Fix 25 shell=True subprocess calls with list-based commands - Fix 49 verify=False in defensive skills (env-var override) - Add timeout to 231 HTTP/subprocess/socket calls - Fix 6 SQL injection patterns with whitelist validation - Replace 8 __import__() with standard imports - Remove 701 unused imports across 442 files - Add authorized-testing disclaimers to all offensive skills - Complete 11 incomplete skill directories - Expand 10 stub SKILL.md files with full content - Fix 2 YAML parse errors in frontmatter - Fix 5 pre-existing syntax errors - Convert 22 hardcoded paths/ports to environment variables - Back up 21 redundant skill pairs to .bak - Fix 2 global declaration errors - 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE) - 0 compile errors across all 724 agent.py files
272 lines
11 KiB
Python
272 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
# For authorized testing in lab/CTF environments only
|
|
"""IOC extraction agent using pefile, yara-python, and requests for VirusTotal validation."""
|
|
|
|
import argparse
|
|
import csv
|
|
import hashlib
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import sys
|
|
from datetime import datetime
|
|
from typing import List, Set
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
|
logger = logging.getLogger(__name__)
|
|
|
|
try:
|
|
import pefile
|
|
except ImportError:
|
|
sys.exit("pefile required: pip install pefile")
|
|
|
|
try:
|
|
import yara
|
|
except ImportError:
|
|
yara = None
|
|
logger.warning("yara-python not installed; YARA scanning disabled")
|
|
|
|
try:
|
|
import requests
|
|
except ImportError:
|
|
requests = None
|
|
logger.warning("requests not installed; VT validation disabled")
|
|
|
|
IP_RE = re.compile(r"\b(?:(?:25[0-5]|2[0-4]\d|1?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|1?\d\d?)\b")
|
|
DOMAIN_RE = re.compile(r"\b[a-zA-Z0-9](?:[a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z]{2,})+\b")
|
|
URL_RE = re.compile(r"https?://[^\s<>\"'{}|\\^`\[\]]+")
|
|
EMAIL_RE = re.compile(r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z]{2,}")
|
|
|
|
PRIVATE_IP_PREFIXES = ("10.", "127.", "0.", "192.168.", "169.254.")
|
|
FALSE_DOMAIN_SUFFIXES = (".dll", ".exe", ".sys", ".ocx", ".drv", ".pdb")
|
|
|
|
|
|
def compute_hashes(file_path: str) -> dict:
|
|
"""Compute MD5, SHA-1, SHA-256 hashes of a file."""
|
|
with open(file_path, "rb") as f:
|
|
data = f.read()
|
|
return {
|
|
"md5": hashlib.md5(data).hexdigest(),
|
|
"sha1": hashlib.sha1(data).hexdigest(),
|
|
"sha256": hashlib.sha256(data).hexdigest(),
|
|
"size_bytes": len(data),
|
|
}
|
|
|
|
|
|
def extract_pe_metadata(file_path: str) -> dict:
|
|
"""Extract PE file metadata including imphash and compile time."""
|
|
try:
|
|
pe = pefile.PE(file_path)
|
|
meta = {
|
|
"imphash": pe.get_imphash(),
|
|
"compile_time": datetime.utcfromtimestamp(pe.FILE_HEADER.TimeDateStamp).isoformat(),
|
|
"sections": [],
|
|
"imports": [],
|
|
}
|
|
for section in pe.sections:
|
|
name = section.Name.rstrip(b"\x00").decode("ascii", errors="replace")
|
|
meta["sections"].append({
|
|
"name": name, "entropy": round(section.get_entropy(), 2),
|
|
"virtual_size": section.Misc_VirtualSize, "raw_size": section.SizeOfRawData,
|
|
})
|
|
if hasattr(pe, "DIRECTORY_ENTRY_IMPORT"):
|
|
for entry in pe.DIRECTORY_ENTRY_IMPORT:
|
|
dll_name = entry.dll.decode("ascii", errors="replace")
|
|
funcs = [imp.name.decode("ascii", errors="replace") for imp in entry.imports if imp.name]
|
|
meta["imports"].append({"dll": dll_name, "functions": funcs[:20]})
|
|
pe.close()
|
|
return meta
|
|
except pefile.PEFormatError:
|
|
return {"error": "Not a valid PE file"}
|
|
|
|
|
|
def extract_strings(file_path: str, min_length: int = 4) -> List[str]:
|
|
"""Extract ASCII and Unicode strings from binary."""
|
|
with open(file_path, "rb") as f:
|
|
data = f.read()
|
|
ascii_strs = [s.decode("ascii") for s in re.findall(b"[ -~]{%d,}" % min_length, data)]
|
|
unicode_strs = [s.decode("utf-16-le", errors="ignore")
|
|
for s in re.findall(b"(?:[ -~]\x00){%d,}" % min_length, data)]
|
|
return ascii_strs + unicode_strs
|
|
|
|
|
|
def extract_network_iocs(strings: List[str]) -> dict:
|
|
"""Extract IPs, domains, URLs, emails from string list."""
|
|
ips: Set[str] = set()
|
|
domains: Set[str] = set()
|
|
urls: Set[str] = set()
|
|
emails: Set[str] = set()
|
|
|
|
for s in strings:
|
|
for ip in IP_RE.findall(s):
|
|
if not any(ip.startswith(p) for p in PRIVATE_IP_PREFIXES):
|
|
octets = ip.split(".")
|
|
if not (int(octets[0]) == 172 and 16 <= int(octets[1]) <= 31):
|
|
ips.add(ip)
|
|
for d in DOMAIN_RE.findall(s):
|
|
if not any(d.lower().endswith(sfx) for sfx in FALSE_DOMAIN_SUFFIXES):
|
|
domains.add(d.lower())
|
|
for u in URL_RE.findall(s):
|
|
urls.add(u)
|
|
for e in EMAIL_RE.findall(s):
|
|
emails.add(e.lower())
|
|
|
|
return {"ips": sorted(ips), "domains": sorted(domains),
|
|
"urls": sorted(urls), "emails": sorted(emails)}
|
|
|
|
|
|
def extract_host_iocs(strings: List[str]) -> dict:
|
|
"""Extract file paths, registry keys, and mutexes from strings."""
|
|
file_paths = set()
|
|
registry_keys = set()
|
|
mutexes = set()
|
|
|
|
for s in strings:
|
|
if re.match(r"[A-Z]:\\", s) and len(s) > 5:
|
|
file_paths.add(s)
|
|
if re.match(r"(?i)(HKLM|HKCU|HKCR|HKU|HKCC)\\", s):
|
|
registry_keys.add(s)
|
|
if re.match(r"(?i)(Global\\|Local\\)", s):
|
|
mutexes.add(s)
|
|
|
|
return {"file_paths": sorted(file_paths)[:30], "registry_keys": sorted(registry_keys)[:20],
|
|
"mutexes": sorted(mutexes)[:10]}
|
|
|
|
|
|
def run_yara_scan(file_path: str, rules_path: str) -> List[dict]:
|
|
"""Scan file with YARA rules."""
|
|
if not yara:
|
|
return [{"error": "yara-python not installed"}]
|
|
try:
|
|
rules = yara.compile(filepath=rules_path)
|
|
matches = rules.match(file_path)
|
|
return [{"rule": m.rule, "tags": m.tags, "meta": m.meta,
|
|
"strings": [(s.identifier, s.instances[0].offset if s.instances else 0)
|
|
for s in m.strings][:10]}
|
|
for m in matches]
|
|
except yara.Error as exc:
|
|
return [{"error": str(exc)}]
|
|
|
|
|
|
def validate_ioc_virustotal(ioc_value: str, ioc_type: str, api_key: str) -> dict:
|
|
"""Validate a single IOC against VirusTotal API v3."""
|
|
if not requests or not api_key:
|
|
return {"validated": False}
|
|
endpoints = {"ip": f"https://www.virustotal.com/api/v3/ip_addresses/{ioc_value}",
|
|
"domain": f"https://www.virustotal.com/api/v3/domains/{ioc_value}",
|
|
"hash": f"https://www.virustotal.com/api/v3/files/{ioc_value}"}
|
|
url = endpoints.get(ioc_type)
|
|
if not url:
|
|
return {"validated": False}
|
|
try:
|
|
resp = requests.get(url, headers={"x-apikey": api_key}, timeout=10)
|
|
if resp.status_code == 200:
|
|
stats = resp.json()["data"]["attributes"]["last_analysis_stats"]
|
|
return {"validated": True, "malicious": stats.get("malicious", 0),
|
|
"suspicious": stats.get("suspicious", 0)}
|
|
except Exception:
|
|
pass
|
|
return {"validated": False}
|
|
|
|
|
|
def defang_ioc(value: str) -> str:
|
|
"""Defang an IOC for safe sharing."""
|
|
return value.replace("http", "hxxp").replace(".", "[.]")
|
|
|
|
|
|
def export_stix_bundle(iocs: dict, sha256: str) -> dict:
|
|
"""Build a minimal STIX 2.1 bundle from extracted IOCs."""
|
|
indicators = []
|
|
ts = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
indicators.append({"type": "indicator", "spec_version": "2.1",
|
|
"pattern": f"[file:hashes.'SHA-256' = '{sha256}']",
|
|
"pattern_type": "stix", "valid_from": ts, "name": "Malware Hash"})
|
|
for ip in iocs.get("ips", []):
|
|
indicators.append({"type": "indicator", "spec_version": "2.1",
|
|
"pattern": f"[ipv4-addr:value = '{ip}']",
|
|
"pattern_type": "stix", "valid_from": ts, "name": f"C2 IP {ip}"})
|
|
for domain in iocs.get("domains", [])[:20]:
|
|
indicators.append({"type": "indicator", "spec_version": "2.1",
|
|
"pattern": f"[domain-name:value = '{domain}']",
|
|
"pattern_type": "stix", "valid_from": ts, "name": f"C2 Domain {domain}"})
|
|
return {"type": "bundle", "id": "bundle--ioc-extract", "objects": indicators}
|
|
|
|
|
|
def export_csv(iocs: dict, hashes: dict, output_path: str) -> None:
|
|
"""Export IOCs to CSV for SIEM ingestion."""
|
|
with open(output_path, "w", newline="", encoding="utf-8") as f:
|
|
writer = csv.writer(f)
|
|
writer.writerow(["type", "value", "context", "confidence"])
|
|
writer.writerow(["sha256", hashes["sha256"], "malware_sample", "high"])
|
|
writer.writerow(["md5", hashes["md5"], "malware_sample", "high"])
|
|
for ip in iocs.get("ips", []):
|
|
writer.writerow(["ipv4", ip, "c2_server", "high"])
|
|
for d in iocs.get("domains", []):
|
|
writer.writerow(["domain", d, "c2_domain", "medium"])
|
|
for u in iocs.get("urls", []):
|
|
writer.writerow(["url", u, "c2_url", "medium"])
|
|
logger.info("Exported IOCs to %s", output_path)
|
|
|
|
|
|
def run_extraction(sample_path: str, output_dir: str, yara_rules: str = "",
|
|
vt_key: str = "") -> dict:
|
|
"""Run full IOC extraction pipeline."""
|
|
report = {"analysis_date": datetime.utcnow().isoformat(), "sample": sample_path}
|
|
|
|
report["hashes"] = compute_hashes(sample_path)
|
|
report["pe_metadata"] = extract_pe_metadata(sample_path)
|
|
|
|
strings = extract_strings(sample_path)
|
|
report["string_count"] = len(strings)
|
|
report["network_iocs"] = extract_network_iocs(strings)
|
|
report["host_iocs"] = extract_host_iocs(strings)
|
|
|
|
if yara_rules and os.path.isfile(yara_rules):
|
|
report["yara_matches"] = run_yara_scan(sample_path, yara_rules)
|
|
else:
|
|
report["yara_matches"] = []
|
|
|
|
if vt_key:
|
|
vt_result = validate_ioc_virustotal(report["hashes"]["sha256"], "hash", vt_key)
|
|
report["virustotal"] = vt_result
|
|
|
|
stix = export_stix_bundle(report["network_iocs"], report["hashes"]["sha256"])
|
|
stix_path = os.path.join(output_dir, "iocs_stix.json")
|
|
with open(stix_path, "w") as f:
|
|
json.dump(stix, f, indent=2)
|
|
|
|
export_csv(report["network_iocs"], report["hashes"], os.path.join(output_dir, "iocs.csv"))
|
|
|
|
report["summary"] = {
|
|
"ips": len(report["network_iocs"]["ips"]),
|
|
"domains": len(report["network_iocs"]["domains"]),
|
|
"urls": len(report["network_iocs"]["urls"]),
|
|
"file_paths": len(report["host_iocs"]["file_paths"]),
|
|
"registry_keys": len(report["host_iocs"]["registry_keys"]),
|
|
"yara_hits": len(report["yara_matches"]),
|
|
}
|
|
return report
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Malware IOC Extraction Agent")
|
|
parser.add_argument("--sample", required=True, help="Path to malware sample")
|
|
parser.add_argument("--yara-rules", default="", help="Path to YARA rules file")
|
|
parser.add_argument("--vt-key", default="", help="VirusTotal API key")
|
|
parser.add_argument("--output-dir", default=".", help="Output directory")
|
|
parser.add_argument("--output", default="ioc_report.json")
|
|
args = parser.parse_args()
|
|
|
|
os.makedirs(args.output_dir, exist_ok=True)
|
|
report = run_extraction(args.sample, args.output_dir, args.yara_rules, args.vt_key)
|
|
out_path = os.path.join(args.output_dir, args.output)
|
|
with open(out_path, "w") as f:
|
|
json.dump(report, f, indent=2, default=str)
|
|
logger.info("Report saved to %s", out_path)
|
|
print(json.dumps(report, indent=2, default=str))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|