#!/usr/bin/env python3 """Cloud storage forensic acquisition agent. Acquires forensic copies of cloud storage objects from AWS S3, Azure Blob Storage, and GCP Cloud Storage with integrity verification using SHA-256 hashes, metadata preservation, and chain-of-custody logging. """ import argparse import hashlib import json import os import sys from datetime import datetime, timezone try: import boto3 from botocore.exceptions import ClientError HAS_BOTO3 = True except ImportError: HAS_BOTO3 = False def acquire_s3_objects(bucket, prefix="", output_dir=".", profile=None, region=None): """Acquire S3 objects with forensic integrity verification.""" if not HAS_BOTO3: print("[!] boto3 required: pip install boto3", file=sys.stderr) sys.exit(1) kwargs = {} if profile: kwargs["profile_name"] = profile if region: kwargs["region_name"] = region session = boto3.Session(**kwargs) s3 = session.client("s3") print(f"[*] Acquiring objects from s3://{bucket}/{prefix}") evidence_log = [] # List objects paginator = s3.get_paginator("list_objects_v2") pages = paginator.paginate(Bucket=bucket, Prefix=prefix) total_objects = 0 total_bytes = 0 for page in pages: for obj in page.get("Contents", []): key = obj["Key"] size = obj["Size"] if key.endswith("/"): continue total_objects += 1 local_path = os.path.join(output_dir, key.replace("/", os.sep)) os.makedirs(os.path.dirname(local_path), exist_ok=True) # Get object metadata try: head = s3.head_object(Bucket=bucket, Key=key) metadata = { "content_type": head.get("ContentType", ""), "last_modified": head.get("LastModified", "").isoformat() if hasattr(head.get("LastModified", ""), "isoformat") else str(head.get("LastModified", "")), "etag": head.get("ETag", "").strip('"'), "version_id": head.get("VersionId", ""), "server_side_encryption": head.get("ServerSideEncryption", ""), "storage_class": head.get("StorageClass", "STANDARD"), "user_metadata": head.get("Metadata", {}), } except ClientError as e: metadata = {"error": str(e)} # Download with hash computation sha256 = hashlib.sha256() try: s3.download_file(bucket, key, local_path) with open(local_path, "rb") as f: while True: chunk = f.read(8192) if not chunk: break sha256.update(chunk) file_hash = sha256.hexdigest() total_bytes += size entry = { "source": f"s3://{bucket}/{key}", "local_path": local_path, "size": size, "sha256": file_hash, "metadata": metadata, "acquired_at": datetime.now(timezone.utc).isoformat(), "status": "OK", } print(f" [{total_objects:4d}] {key} ({size} bytes, SHA256: {file_hash[:16]}...)") except ClientError as e: entry = { "source": f"s3://{bucket}/{key}", "status": "FAIL", "error": str(e), "acquired_at": datetime.now(timezone.utc).isoformat(), } print(f" [FAIL] {key}: {e}") evidence_log.append(entry) print(f"[+] Acquired {total_objects} objects ({total_bytes / 1024 / 1024:.2f} MB)") return evidence_log def acquire_s3_versions(bucket, key, output_dir=".", profile=None, region=None): """Acquire all versions of a specific S3 object.""" if not HAS_BOTO3: print("[!] boto3 required", file=sys.stderr) sys.exit(1) kwargs = {} if profile: kwargs["profile_name"] = profile if region: kwargs["region_name"] = region session = boto3.Session(**kwargs) s3 = session.client("s3") print(f"[*] Acquiring all versions of s3://{bucket}/{key}") evidence_log = [] try: versions = s3.list_object_versions(Bucket=bucket, Prefix=key) except ClientError as e: print(f"[!] Error listing versions: {e}", file=sys.stderr) return evidence_log for version in versions.get("Versions", []): vid = version.get("VersionId", "null") size = version.get("Size", 0) is_latest = version.get("IsLatest", False) safe_vid = vid.replace("/", "_")[:20] base_name = os.path.basename(key) local_path = os.path.join(output_dir, f"{base_name}.v_{safe_vid}") try: s3.download_file(bucket, key, local_path, ExtraArgs={"VersionId": vid} if vid != "null" else {}) sha256 = hashlib.sha256() with open(local_path, "rb") as f: while True: chunk = f.read(8192) if not chunk: break sha256.update(chunk) entry = { "source": f"s3://{bucket}/{key}?versionId={vid}", "version_id": vid, "is_latest": is_latest, "local_path": local_path, "size": size, "sha256": sha256.hexdigest(), "last_modified": str(version.get("LastModified", "")), "acquired_at": datetime.now(timezone.utc).isoformat(), "status": "OK", } print(f" Version {vid[:12]:12s} | {size:10d} bytes | " f"{'LATEST' if is_latest else ' '} | SHA256: {sha256.hexdigest()[:16]}...") except ClientError as e: entry = {"source": f"s3://{bucket}/{key}", "version_id": vid, "status": "FAIL", "error": str(e)} evidence_log.append(entry) # Also acquire delete markers for marker in versions.get("DeleteMarkers", []): evidence_log.append({ "source": f"s3://{bucket}/{key}", "version_id": marker.get("VersionId", ""), "type": "DELETE_MARKER", "last_modified": str(marker.get("LastModified", "")), "is_latest": marker.get("IsLatest", False), }) return evidence_log def verify_integrity(evidence_log): """Verify SHA-256 hashes of acquired files.""" print(f"\n[*] Verifying integrity of {len(evidence_log)} acquired objects...") verified = 0 failed = 0 for entry in evidence_log: if entry.get("status") != "OK" or not entry.get("local_path"): continue local_path = entry["local_path"] expected_hash = entry.get("sha256", "") if not os.path.isfile(local_path): entry["integrity"] = "MISSING" failed += 1 continue sha256 = hashlib.sha256() with open(local_path, "rb") as f: while True: chunk = f.read(8192) if not chunk: break sha256.update(chunk) if sha256.hexdigest() == expected_hash: entry["integrity"] = "VERIFIED" verified += 1 else: entry["integrity"] = "MISMATCH" failed += 1 print(f" [FAIL] {local_path}: hash mismatch") print(f"[+] Integrity check: {verified} verified, {failed} failed") return verified, failed def format_summary(evidence_log, verified, failed): """Print acquisition summary.""" print(f"\n{'='*60}") print(f" Cloud Storage Forensic Acquisition Report") print(f"{'='*60}") ok = sum(1 for e in evidence_log if e.get("status") == "OK") err = sum(1 for e in evidence_log if e.get("status") == "FAIL") total_bytes = sum(e.get("size", 0) for e in evidence_log if e.get("status") == "OK") print(f" Objects Acquired : {ok}") print(f" Objects Failed : {err}") print(f" Total Size : {total_bytes / 1024 / 1024:.2f} MB") print(f" Integrity OK : {verified}") print(f" Integrity FAIL : {failed}") def main(): parser = argparse.ArgumentParser( description="Cloud storage forensic acquisition agent" ) sub = parser.add_subparsers(dest="command") p_s3 = sub.add_parser("s3", help="Acquire S3 bucket objects") p_s3.add_argument("--bucket", required=True, help="S3 bucket name") p_s3.add_argument("--prefix", default="", help="Object key prefix filter") p_s3.add_argument("--output-dir", default="./evidence", help="Local output directory") p_ver = sub.add_parser("s3-versions", help="Acquire all versions of S3 object") p_ver.add_argument("--bucket", required=True) p_ver.add_argument("--key", required=True, help="S3 object key") p_ver.add_argument("--output-dir", default="./evidence") parser.add_argument("--profile", help="AWS CLI profile") parser.add_argument("--region", help="AWS region") parser.add_argument("--skip-verify", action="store_true", help="Skip integrity verification") parser.add_argument("--output", "-o", help="Output JSON report path") parser.add_argument("--verbose", "-v", action="store_true") args = parser.parse_args() if not args.command: parser.print_help() sys.exit(1) os.makedirs(getattr(args, "output_dir", "./evidence"), exist_ok=True) if args.command == "s3": evidence_log = acquire_s3_objects( args.bucket, args.prefix, args.output_dir, args.profile, args.region ) elif args.command == "s3-versions": evidence_log = acquire_s3_versions( args.bucket, args.key, args.output_dir, args.profile, args.region ) verified, failed = 0, 0 if not args.skip_verify: verified, failed = verify_integrity(evidence_log) format_summary(evidence_log, verified, failed) report = { "timestamp": datetime.now(timezone.utc).isoformat(), "tool": "Cloud Forensic Acquisition", "command": args.command, "evidence_log": evidence_log, "integrity": {"verified": verified, "failed": failed}, } if args.output: with open(args.output, "w") as f: json.dump(report, f, indent=2) print(f"\n[+] Report saved to {args.output}") elif args.verbose: print(json.dumps(report, indent=2)) if __name__ == "__main__": main()