mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-06-14 23:14:55 +03:00
c47eed6a64
- Fix 25 shell=True subprocess calls with list-based commands - Fix 49 verify=False in defensive skills (env-var override) - Add timeout to 231 HTTP/subprocess/socket calls - Fix 6 SQL injection patterns with whitelist validation - Replace 8 __import__() with standard imports - Remove 701 unused imports across 442 files - Add authorized-testing disclaimers to all offensive skills - Complete 11 incomplete skill directories - Expand 10 stub SKILL.md files with full content - Fix 2 YAML parse errors in frontmatter - Fix 5 pre-existing syntax errors - Convert 22 hardcoded paths/ports to environment variables - Back up 21 redundant skill pairs to .bak - Fix 2 global declaration errors - 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE) - 0 compile errors across all 724 agent.py files
179 lines
5.9 KiB
Python
179 lines
5.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Outlook PST file forensic analysis agent.
|
|
|
|
Parses PST/OST files using pypff (libpff) to extract emails, attachments,
|
|
metadata, and deleted items for forensic investigation.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import hashlib
|
|
import re
|
|
|
|
try:
|
|
import pypff
|
|
HAS_PYPFF = True
|
|
except ImportError:
|
|
HAS_PYPFF = False
|
|
|
|
|
|
def compute_hash(filepath):
|
|
sha256 = hashlib.sha256()
|
|
with open(filepath, "rb") as f:
|
|
for chunk in iter(lambda: f.read(65536), b""):
|
|
sha256.update(chunk)
|
|
return sha256.hexdigest()
|
|
|
|
|
|
def open_pst(filepath):
|
|
if not HAS_PYPFF:
|
|
return None, "pypff not installed. pip install libpff-python"
|
|
pst = pypff.file()
|
|
pst.open(filepath)
|
|
return pst, None
|
|
|
|
|
|
def extract_messages(folder, max_messages=1000):
|
|
messages = []
|
|
for i in range(min(folder.number_of_sub_messages, max_messages)):
|
|
msg = folder.get_sub_message(i)
|
|
entry = {
|
|
"subject": msg.subject or "",
|
|
"sender": msg.sender_name or "",
|
|
"headers": (msg.transport_headers or "")[:500],
|
|
"creation_time": str(msg.creation_time) if msg.creation_time else "",
|
|
"delivery_time": str(msg.delivery_time) if msg.delivery_time else "",
|
|
"has_attachments": msg.number_of_attachments > 0,
|
|
"attachment_count": msg.number_of_attachments,
|
|
"body_size": len(msg.plain_text_body or "") if msg.plain_text_body else 0,
|
|
}
|
|
# Extract attachment metadata
|
|
attachments = []
|
|
for j in range(msg.number_of_attachments):
|
|
att = msg.get_attachment(j)
|
|
attachments.append({
|
|
"name": att.name or f"attachment_{j}",
|
|
"size": att.size,
|
|
})
|
|
entry["attachments"] = attachments
|
|
messages.append(entry)
|
|
return messages
|
|
|
|
|
|
def walk_folders(folder, path="", results=None):
|
|
if results is None:
|
|
results = []
|
|
current_path = f"{path}/{folder.name}" if folder.name else path or "/Root"
|
|
messages = extract_messages(folder)
|
|
if messages:
|
|
results.append({
|
|
"folder": current_path,
|
|
"message_count": len(messages),
|
|
"messages": messages,
|
|
})
|
|
for i in range(folder.number_of_sub_folders):
|
|
subfolder = folder.get_sub_folder(i)
|
|
walk_folders(subfolder, current_path, results)
|
|
return results
|
|
|
|
|
|
def extract_email_addresses(messages):
|
|
addresses = set()
|
|
email_pattern = re.compile(r"[\w.+-]+@[\w-]+\.[\w.-]+")
|
|
for msg in messages:
|
|
for field in [msg.get("sender", ""), msg.get("headers", "")]:
|
|
addresses.update(email_pattern.findall(field))
|
|
return sorted(addresses)
|
|
|
|
|
|
def detect_suspicious_emails(messages):
|
|
findings = []
|
|
suspicious_exts = [".exe", ".scr", ".bat", ".cmd", ".ps1", ".vbs",
|
|
".js", ".hta", ".lnk", ".iso", ".img"]
|
|
for msg in messages:
|
|
for att in msg.get("attachments", []):
|
|
name = (att.get("name") or "").lower()
|
|
for ext in suspicious_exts:
|
|
if name.endswith(ext):
|
|
findings.append({
|
|
"type": "suspicious_attachment",
|
|
"subject": msg.get("subject", "")[:80],
|
|
"attachment": att.get("name"),
|
|
"extension": ext,
|
|
"severity": "HIGH",
|
|
})
|
|
subject = (msg.get("subject") or "").lower()
|
|
urgency_words = ["urgent", "immediate action", "password expired",
|
|
"verify your account", "suspended", "click here"]
|
|
for word in urgency_words:
|
|
if word in subject:
|
|
findings.append({
|
|
"type": "phishing_indicator",
|
|
"subject": msg.get("subject", "")[:80],
|
|
"keyword": word,
|
|
"severity": "MEDIUM",
|
|
})
|
|
break
|
|
return findings
|
|
|
|
|
|
def generate_report(filepath, folder_data):
|
|
all_messages = []
|
|
for fd in folder_data:
|
|
all_messages.extend(fd.get("messages", []))
|
|
addresses = extract_email_addresses(all_messages)
|
|
suspicious = detect_suspicious_emails(all_messages)
|
|
return {
|
|
"file": filepath,
|
|
"sha256": compute_hash(filepath),
|
|
"size": os.path.getsize(filepath),
|
|
"total_folders": len(folder_data),
|
|
"total_messages": len(all_messages),
|
|
"unique_addresses": len(addresses),
|
|
"top_addresses": addresses[:20],
|
|
"suspicious_findings": suspicious,
|
|
"folders": [{
|
|
"path": f["folder"],
|
|
"count": f["message_count"],
|
|
} for f in folder_data],
|
|
}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("=" * 60)
|
|
print("Outlook PST Forensic Analysis Agent")
|
|
print("Email extraction, attachment analysis, phishing detection")
|
|
print("=" * 60)
|
|
|
|
target = sys.argv[1] if len(sys.argv) > 1 else None
|
|
if not target or not os.path.exists(target):
|
|
print("\n[DEMO] Usage: python agent.py <mailbox.pst>")
|
|
print(f" pypff available: {HAS_PYPFF}")
|
|
sys.exit(0)
|
|
|
|
pst, err = open_pst(target)
|
|
if err:
|
|
print(f"[!] {err}")
|
|
sys.exit(1)
|
|
|
|
print(f"\n[*] Parsing: {target}")
|
|
root = pst.get_root_folder()
|
|
folder_data = walk_folders(root)
|
|
report = generate_report(target, folder_data)
|
|
|
|
print(f"[*] Folders: {report['total_folders']}")
|
|
print(f"[*] Messages: {report['total_messages']}")
|
|
print(f"[*] Unique addresses: {report['unique_addresses']}")
|
|
|
|
print("\n--- Folder Structure ---")
|
|
for f in report["folders"]:
|
|
print(f" {f['path']}: {f['count']} messages")
|
|
|
|
print(f"\n--- Suspicious ({len(report['suspicious_findings'])}) ---")
|
|
for s in report["suspicious_findings"][:10]:
|
|
print(f" [{s['severity']}] {s['type']}: {s.get('attachment', s.get('keyword', ''))}")
|
|
|
|
pst.close()
|
|
print(f"\n{json.dumps(report, indent=2, default=str)}")
|