Files
T
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

324 lines
10 KiB
Python

#!/usr/bin/env python3
"""Agent for validating backup integrity for disaster recovery.
Computes cryptographic hashes, compares manifests, detects corruption,
scans for ransomware artifacts, measures file entropy, and validates
backup recoverability.
"""
import argparse
import hashlib
import json
import math
import os
from collections import Counter
from datetime import datetime, timezone
from pathlib import Path
RANSOMWARE_EXTENSIONS = {
".encrypted", ".locked", ".crypt", ".ransom", ".pay",
".wncry", ".wcry", ".cerber", ".locky", ".zepto",
".osiris", ".aesir", ".thor", ".odin", ".crypz",
".crypted", ".enc", ".crypto", ".lockbit",
}
RANSOM_NOTE_PATTERNS = [
"README_TO_DECRYPT", "HOW_TO_RECOVER", "DECRYPT_INSTRUCTIONS",
"HELP_DECRYPT", "RECOVERY_INSTRUCTIONS", "RESTORE_FILES",
"READ_ME_TO_DECRYPT", "YOUR_FILES_ARE_ENCRYPTED",
"!README!", "DECRYPT_YOUR_FILES",
]
def compute_file_hash(filepath, algorithm="sha256"):
"""Compute cryptographic hash of a single file."""
h = hashlib.new(algorithm)
try:
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(65536), b""):
h.update(chunk)
return h.hexdigest()
except (PermissionError, OSError) as e:
return f"ERROR:{e}"
def generate_manifest(directory, algorithm="sha256"):
"""Generate hash manifest for all files in a directory."""
manifest = {}
dir_path = Path(directory)
if not dir_path.is_dir():
return {"error": f"Directory not found: {directory}"}
total = 0
errors = 0
for fpath in sorted(dir_path.rglob("*")):
if fpath.is_file():
total += 1
digest = compute_file_hash(str(fpath), algorithm)
rel = str(fpath.relative_to(dir_path))
manifest[rel] = digest
if digest.startswith("ERROR:"):
errors += 1
return {
"directory": str(directory),
"algorithm": algorithm,
"generated_at": datetime.now(timezone.utc).isoformat(),
"total_files": total,
"errors": errors,
"hashes": manifest,
}
def compare_manifests(baseline_path, restored_path):
"""Compare two manifest files to detect integrity issues."""
with open(baseline_path, "r") as f:
baseline = json.load(f)
with open(restored_path, "r") as f:
restored = json.load(f)
base_hashes = baseline.get("hashes", baseline)
rest_hashes = restored.get("hashes", restored)
missing = []
modified = []
added = []
for fname, base_hash in base_hashes.items():
if fname not in rest_hashes:
missing.append(fname)
elif rest_hashes[fname] != base_hash:
modified.append({"file": fname, "baseline": base_hash,
"restored": rest_hashes[fname]})
for fname in rest_hashes:
if fname not in base_hashes:
added.append(fname)
integrity_pass = len(missing) == 0 and len(modified) == 0
return {
"baseline_files": len(base_hashes),
"restored_files": len(rest_hashes),
"missing_files": missing,
"missing_count": len(missing),
"modified_files": modified,
"modified_count": len(modified),
"added_files": added,
"added_count": len(added),
"integrity_pass": integrity_pass,
}
def calculate_entropy(filepath):
"""Calculate Shannon entropy of a file (0-8 bits per byte)."""
try:
with open(filepath, "rb") as f:
data = f.read()
except (PermissionError, OSError):
return None
if not data:
return 0.0
byte_counts = Counter(data)
length = len(data)
entropy = 0.0
for count in byte_counts.values():
p = count / length
if p > 0:
entropy -= p * math.log2(p)
return round(entropy, 4)
def entropy_scan(directory, threshold=7.9):
"""Scan directory for files with suspiciously high entropy (possible encryption)."""
suspicious = []
scanned = 0
dir_path = Path(directory)
for fpath in dir_path.rglob("*"):
if not fpath.is_file():
continue
if fpath.stat().st_size < 1024:
continue
scanned += 1
ent = calculate_entropy(str(fpath))
if ent is not None and ent >= threshold:
suspicious.append({
"file": str(fpath.relative_to(dir_path)),
"entropy": ent,
"size_bytes": fpath.stat().st_size,
})
return {
"directory": str(directory),
"threshold": threshold,
"files_scanned": scanned,
"suspicious_count": len(suspicious),
"suspicious_files": suspicious[:100],
}
def scan_ransomware_artifacts(directory):
"""Scan restored backup for ransomware indicators."""
findings = {
"ransomware_extensions": [],
"ransom_notes": [],
"total_scanned": 0,
}
dir_path = Path(directory)
for fpath in dir_path.rglob("*"):
if not fpath.is_file():
continue
findings["total_scanned"] += 1
if fpath.suffix.lower() in RANSOMWARE_EXTENSIONS:
findings["ransomware_extensions"].append(
str(fpath.relative_to(dir_path))
)
for pattern in RANSOM_NOTE_PATTERNS:
if pattern.lower() in fpath.name.lower():
findings["ransom_notes"].append(
str(fpath.relative_to(dir_path))
)
break
findings["clean"] = (
len(findings["ransomware_extensions"]) == 0
and len(findings["ransom_notes"]) == 0
)
return findings
def validate_backup(directory, baseline_manifest=None, check_ransomware=True,
check_entropy=True, entropy_threshold=7.9):
"""Run full backup validation suite."""
results = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"directory": str(directory),
"checks": {},
}
# File count and size
dir_path = Path(directory)
if not dir_path.is_dir():
return {"error": f"Directory not found: {directory}"}
total_files = sum(1 for _ in dir_path.rglob("*") if _.is_file())
total_size = sum(f.stat().st_size for f in dir_path.rglob("*") if f.is_file())
results["checks"]["file_stats"] = {
"total_files": total_files,
"total_size_bytes": total_size,
"total_size_mb": round(total_size / (1024 * 1024), 2),
"pass": total_files > 0,
}
# Manifest comparison
if baseline_manifest and os.path.isfile(baseline_manifest):
current = generate_manifest(directory)
current_path = str(dir_path / ".current_manifest.json")
with open(current_path, "w") as f:
json.dump(current, f)
comparison = compare_manifests(baseline_manifest, current_path)
results["checks"]["integrity"] = comparison
os.remove(current_path)
else:
results["checks"]["integrity"] = {"skipped": True,
"reason": "No baseline manifest provided"}
# Ransomware artifact scan
if check_ransomware:
results["checks"]["ransomware_scan"] = scan_ransomware_artifacts(directory)
# Entropy scan
if check_entropy:
results["checks"]["entropy_scan"] = entropy_scan(directory, entropy_threshold)
# Overall verdict
checks = results["checks"]
results["overall_pass"] = (
checks.get("file_stats", {}).get("pass", False)
and checks.get("integrity", {}).get("integrity_pass", True)
and checks.get("ransomware_scan", {}).get("clean", True)
and checks.get("entropy_scan", {}).get("suspicious_count", 0) == 0
)
return results
def main():
parser = argparse.ArgumentParser(
description="Backup Integrity Validation Agent"
)
parser.add_argument("--generate-manifest",
help="Generate hash manifest for a directory")
parser.add_argument("--compare", nargs=2, metavar=("BASELINE", "RESTORED"),
help="Compare two manifest JSON files")
parser.add_argument("--validate", help="Run full validation on a backup directory")
parser.add_argument("--baseline", help="Baseline manifest for comparison")
parser.add_argument("--entropy-scan", help="Scan directory for high-entropy files")
parser.add_argument("--entropy-threshold", type=float, default=7.9,
help="Entropy threshold (default: 7.9)")
parser.add_argument("--ransomware-scan",
help="Scan directory for ransomware artifacts")
parser.add_argument("--algorithm", default="sha256",
choices=["sha256", "sha512", "sha3_256", "blake2b"],
help="Hash algorithm (default: sha256)")
parser.add_argument("--output", "-o", help="Output file path")
args = parser.parse_args()
print("[*] Backup Integrity Validation Agent")
result = None
if args.generate_manifest:
result = generate_manifest(args.generate_manifest, args.algorithm)
print(f"[*] Generated manifest: {result.get('total_files', 0)} files")
elif args.compare:
result = compare_manifests(args.compare[0], args.compare[1])
status = "PASS" if result["integrity_pass"] else "FAIL"
print(f"[*] Integrity check: {status}")
if result["missing_count"]:
print(f"[!] Missing files: {result['missing_count']}")
if result["modified_count"]:
print(f"[!] Modified files: {result['modified_count']}")
elif args.validate:
result = validate_backup(
args.validate,
baseline_manifest=args.baseline,
entropy_threshold=args.entropy_threshold,
)
status = "PASS" if result.get("overall_pass") else "FAIL"
print(f"[*] Overall validation: {status}")
elif args.entropy_scan:
result = entropy_scan(args.entropy_scan, args.entropy_threshold)
print(f"[*] Scanned {result['files_scanned']} files, "
f"{result['suspicious_count']} suspicious")
elif args.ransomware_scan:
result = scan_ransomware_artifacts(args.ransomware_scan)
status = "CLEAN" if result["clean"] else "INFECTED"
print(f"[*] Ransomware scan: {status}")
else:
parser.print_help()
return
if result:
output = json.dumps(result, indent=2)
if args.output:
with open(args.output, "w") as f:
f.write(output)
print(f"[*] Results saved to {args.output}")
else:
print(output)
if __name__ == "__main__":
main()