Files
T
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

179 lines
5.9 KiB
Python

#!/usr/bin/env python3
"""Outlook PST file forensic analysis agent.
Parses PST/OST files using pypff (libpff) to extract emails, attachments,
metadata, and deleted items for forensic investigation.
"""
import os
import sys
import json
import hashlib
import re
try:
import pypff
HAS_PYPFF = True
except ImportError:
HAS_PYPFF = False
def compute_hash(filepath):
sha256 = hashlib.sha256()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(65536), b""):
sha256.update(chunk)
return sha256.hexdigest()
def open_pst(filepath):
if not HAS_PYPFF:
return None, "pypff not installed. pip install libpff-python"
pst = pypff.file()
pst.open(filepath)
return pst, None
def extract_messages(folder, max_messages=1000):
messages = []
for i in range(min(folder.number_of_sub_messages, max_messages)):
msg = folder.get_sub_message(i)
entry = {
"subject": msg.subject or "",
"sender": msg.sender_name or "",
"headers": (msg.transport_headers or "")[:500],
"creation_time": str(msg.creation_time) if msg.creation_time else "",
"delivery_time": str(msg.delivery_time) if msg.delivery_time else "",
"has_attachments": msg.number_of_attachments > 0,
"attachment_count": msg.number_of_attachments,
"body_size": len(msg.plain_text_body or "") if msg.plain_text_body else 0,
}
# Extract attachment metadata
attachments = []
for j in range(msg.number_of_attachments):
att = msg.get_attachment(j)
attachments.append({
"name": att.name or f"attachment_{j}",
"size": att.size,
})
entry["attachments"] = attachments
messages.append(entry)
return messages
def walk_folders(folder, path="", results=None):
if results is None:
results = []
current_path = f"{path}/{folder.name}" if folder.name else path or "/Root"
messages = extract_messages(folder)
if messages:
results.append({
"folder": current_path,
"message_count": len(messages),
"messages": messages,
})
for i in range(folder.number_of_sub_folders):
subfolder = folder.get_sub_folder(i)
walk_folders(subfolder, current_path, results)
return results
def extract_email_addresses(messages):
addresses = set()
email_pattern = re.compile(r"[\w.+-]+@[\w-]+\.[\w.-]+")
for msg in messages:
for field in [msg.get("sender", ""), msg.get("headers", "")]:
addresses.update(email_pattern.findall(field))
return sorted(addresses)
def detect_suspicious_emails(messages):
findings = []
suspicious_exts = [".exe", ".scr", ".bat", ".cmd", ".ps1", ".vbs",
".js", ".hta", ".lnk", ".iso", ".img"]
for msg in messages:
for att in msg.get("attachments", []):
name = (att.get("name") or "").lower()
for ext in suspicious_exts:
if name.endswith(ext):
findings.append({
"type": "suspicious_attachment",
"subject": msg.get("subject", "")[:80],
"attachment": att.get("name"),
"extension": ext,
"severity": "HIGH",
})
subject = (msg.get("subject") or "").lower()
urgency_words = ["urgent", "immediate action", "password expired",
"verify your account", "suspended", "click here"]
for word in urgency_words:
if word in subject:
findings.append({
"type": "phishing_indicator",
"subject": msg.get("subject", "")[:80],
"keyword": word,
"severity": "MEDIUM",
})
break
return findings
def generate_report(filepath, folder_data):
all_messages = []
for fd in folder_data:
all_messages.extend(fd.get("messages", []))
addresses = extract_email_addresses(all_messages)
suspicious = detect_suspicious_emails(all_messages)
return {
"file": filepath,
"sha256": compute_hash(filepath),
"size": os.path.getsize(filepath),
"total_folders": len(folder_data),
"total_messages": len(all_messages),
"unique_addresses": len(addresses),
"top_addresses": addresses[:20],
"suspicious_findings": suspicious,
"folders": [{
"path": f["folder"],
"count": f["message_count"],
} for f in folder_data],
}
if __name__ == "__main__":
print("=" * 60)
print("Outlook PST Forensic Analysis Agent")
print("Email extraction, attachment analysis, phishing detection")
print("=" * 60)
target = sys.argv[1] if len(sys.argv) > 1 else None
if not target or not os.path.exists(target):
print("\n[DEMO] Usage: python agent.py <mailbox.pst>")
print(f" pypff available: {HAS_PYPFF}")
sys.exit(0)
pst, err = open_pst(target)
if err:
print(f"[!] {err}")
sys.exit(1)
print(f"\n[*] Parsing: {target}")
root = pst.get_root_folder()
folder_data = walk_folders(root)
report = generate_report(target, folder_data)
print(f"[*] Folders: {report['total_folders']}")
print(f"[*] Messages: {report['total_messages']}")
print(f"[*] Unique addresses: {report['unique_addresses']}")
print("\n--- Folder Structure ---")
for f in report["folders"]:
print(f" {f['path']}: {f['count']} messages")
print(f"\n--- Suspicious ({len(report['suspicious_findings'])}) ---")
for s in report["suspicious_findings"][:10]:
print(f" [{s['severity']}] {s['type']}: {s.get('attachment', s.get('keyword', ''))}")
pst.close()
print(f"\n{json.dumps(report, indent=2, default=str)}")