Files
Anthropic-Cybersecurity-Skills/skills/performing-sqlite-database-forensics/scripts/agent.py
T
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

205 lines
7.7 KiB
Python

#!/usr/bin/env python3
"""Agent for SQLite database forensics.
Parses SQLite file headers, analyzes freelist pages for deleted records,
examines WAL files, decodes browser/app timestamps, and extracts
evidence from common forensic databases.
"""
import struct
import sqlite3
import json
import sys
import os
import re
from datetime import datetime, timedelta
from pathlib import Path
_SAFE_TABLE_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
class SQLiteForensicsAgent:
"""Performs forensic analysis on SQLite database files."""
def __init__(self, db_path, output_dir="./sqlite_forensics"):
self.db_path = db_path
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.findings = []
def parse_header(self):
"""Parse the 100-byte SQLite database header."""
with open(self.db_path, "rb") as f:
header = f.read(100)
magic = header[0:16]
if magic != b"SQLite format 3\x00":
return {"error": "Not a valid SQLite database"}
page_size = struct.unpack(">H", header[16:18])[0]
if page_size == 1:
page_size = 65536
return {
"magic": magic[:15].decode("ascii"),
"page_size": page_size,
"write_format": header[18],
"read_format": header[19],
"change_counter": struct.unpack(">I", header[24:28])[0],
"db_size_pages": struct.unpack(">I", header[28:32])[0],
"first_freelist_page": struct.unpack(">I", header[32:36])[0],
"total_freelist_pages": struct.unpack(">I", header[36:40])[0],
"schema_cookie": struct.unpack(">I", header[40:44])[0],
"text_encoding": {1: "UTF-8", 2: "UTF-16le", 3: "UTF-16be"}.get(
struct.unpack(">I", header[52:56])[0], "unknown"),
"db_size_bytes": os.path.getsize(self.db_path),
}
def analyze_freelist(self):
"""Walk freelist trunk chain to identify pages with deleted data."""
with open(self.db_path, "rb") as f:
header = f.read(100)
page_size = struct.unpack(">H", header[16:18])[0]
if page_size == 1:
page_size = 65536
first_trunk = struct.unpack(">I", header[32:36])[0]
total_free = struct.unpack(">I", header[36:40])[0]
if first_trunk == 0:
return {"freelist_pages": 0, "trunk_pages": [], "leaf_pages": []}
trunk_pages, leaf_pages = [], []
trunk = first_trunk
while trunk != 0:
offset = (trunk - 1) * page_size
f.seek(offset)
page_data = f.read(page_size)
next_trunk = struct.unpack(">I", page_data[0:4])[0]
leaf_count = struct.unpack(">I", page_data[4:8])[0]
leaves = []
for i in range(leaf_count):
lp = struct.unpack(">I", page_data[8 + i * 4:12 + i * 4])[0]
leaves.append(lp)
trunk_pages.append({"page": trunk, "leaf_count": leaf_count})
leaf_pages.extend(leaves)
trunk = next_trunk
if leaf_pages:
self.findings.append({"type": "freelist_data",
"pages": len(leaf_pages),
"note": "Deleted records may be recoverable"})
return {"freelist_pages": total_free,
"trunk_pages": trunk_pages, "leaf_pages": leaf_pages}
def extract_freelist_pages(self):
"""Dump raw freelist leaf pages for hex analysis."""
info = self.analyze_freelist()
with open(self.db_path, "rb") as f:
hdr = f.read(100)
page_size = struct.unpack(">H", hdr[16:18])[0]
if page_size == 1:
page_size = 65536
out_dir = self.output_dir / "freelist_pages"
out_dir.mkdir(exist_ok=True)
for pn in info["leaf_pages"]:
f.seek((pn - 1) * page_size)
data = f.read(page_size)
(out_dir / f"page_{pn}.bin").write_bytes(data)
return len(info["leaf_pages"])
def parse_wal(self):
"""Parse WAL file frames for transaction history."""
wal_path = self.db_path + "-wal"
if not os.path.exists(wal_path):
return {"wal_exists": False}
with open(wal_path, "rb") as f:
header = f.read(32)
magic = struct.unpack(">I", header[0:4])[0]
page_size = struct.unpack(">I", header[8:12])[0]
checkpoint_seq = struct.unpack(">I", header[12:16])[0]
file_size = os.path.getsize(wal_path)
frames = []
offset = 32
frame_num = 0
while offset + 24 + page_size <= file_size:
f.seek(offset)
fh = f.read(24)
page_number = struct.unpack(">I", fh[0:4])[0]
frames.append({"frame": frame_num, "page": page_number,
"offset": offset})
offset += 24 + page_size
frame_num += 1
return {"wal_exists": True, "magic": hex(magic),
"page_size": page_size, "checkpoint_seq": checkpoint_seq,
"total_frames": len(frames), "frames": frames[:50]}
def query_tables(self):
"""List all tables and row counts in the database."""
conn = sqlite3.connect(f"file:{self.db_path}?mode=ro", uri=True)
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = []
for (name,) in cursor.fetchall():
try:
if not _SAFE_TABLE_RE.match(name):
continue
cursor.execute(f"SELECT COUNT(*) FROM [{name}]")
count = cursor.fetchone()[0]
except sqlite3.OperationalError:
count = -1
tables.append({"table": name, "row_count": count})
conn.close()
return tables
@staticmethod
def decode_timestamp(value, fmt="unix"):
"""Decode timestamps from common database formats."""
try:
if fmt == "unix":
return datetime.utcfromtimestamp(value).isoformat()
elif fmt == "chrome":
epoch_delta = 11644473600
return datetime.utcfromtimestamp(
(value / 1_000_000) - epoch_delta).isoformat()
elif fmt == "mac_absolute":
mac_epoch = datetime(2001, 1, 1)
return (mac_epoch + timedelta(seconds=value)).isoformat()
elif fmt == "mozilla":
return datetime.utcfromtimestamp(value / 1_000_000).isoformat()
except (OSError, ValueError, OverflowError):
return None
def generate_report(self):
"""Generate comprehensive forensic analysis report."""
report = {
"database": self.db_path,
"analysis_date": datetime.utcnow().isoformat(),
"header": self.parse_header(),
"tables": self.query_tables(),
"freelist": self.analyze_freelist(),
"wal": self.parse_wal(),
"findings": self.findings,
}
report_path = self.output_dir / "sqlite_forensics_report.json"
with open(report_path, "w") as f:
json.dump(report, f, indent=2, default=str)
print(json.dumps(report, indent=2, default=str))
return report
def main():
if len(sys.argv) < 2:
print("Usage: agent.py <database.db> [output_dir]")
sys.exit(1)
db_path = sys.argv[1]
output_dir = sys.argv[2] if len(sys.argv) > 2 else "./sqlite_forensics"
agent = SQLiteForensicsAgent(db_path, output_dir)
agent.generate_report()
if __name__ == "__main__":
main()