Files
T
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

235 lines
9.1 KiB
Python

#!/usr/bin/env python3
"""Agent for assessing and managing GDPR data protection compliance."""
import json
import argparse
import re
from datetime import datetime
from collections import Counter
GDPR_ARTICLES = {
"Art5": "Principles of processing",
"Art6": "Lawful basis",
"Art13": "Information to data subject (direct collection)",
"Art14": "Information to data subject (indirect collection)",
"Art15": "Right of access",
"Art17": "Right to erasure",
"Art20": "Right to data portability",
"Art25": "Data protection by design and by default",
"Art30": "Records of processing activities",
"Art32": "Security of processing",
"Art33": "Breach notification (72h to DPA)",
"Art34": "Breach communication to data subjects",
"Art35": "Data Protection Impact Assessment",
"Art44": "Cross-border transfer principles",
}
PII_PATTERNS = {
"email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
"phone_eu": r"\b\+?[0-9]{1,3}[\s.-]?[0-9]{6,14}\b",
"iban": r"\b[A-Z]{2}\d{2}[A-Z0-9]{11,30}\b",
"ip_address": r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b",
"date_of_birth": r"\b\d{2}[/.-]\d{2}[/.-]\d{4}\b",
"national_id": r"\b\d{3}-\d{2}-\d{4}\b",
"passport": r"\b[A-Z]{1,2}\d{6,9}\b",
}
def scan_for_pii(file_path, max_bytes=1024 * 1024):
"""Scan a file for GDPR-relevant personal data patterns."""
try:
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
content = f.read(max_bytes)
except (OSError, PermissionError):
return None
matches = {}
for pii_type, pattern in PII_PATTERNS.items():
found = re.findall(pattern, content)
if found:
matches[pii_type] = len(found)
if not matches:
return None
return {"file": str(file_path), "pii_types": matches,
"total_matches": sum(matches.values()),
"category": classify_data_category(matches)}
def classify_data_category(matches):
"""Classify data into GDPR special categories."""
if any(k in matches for k in ["national_id", "passport"]):
return "special_category"
if "iban" in matches:
return "financial"
return "standard_personal_data"
def assess_ropa_completeness(ropa_json):
"""Assess Records of Processing Activities (ROPA) completeness per Art 30."""
with open(ropa_json) as f:
ropa = json.load(f)
required_fields = [
"controller_name", "purposes", "data_categories", "data_subjects",
"recipients", "transfers", "retention_periods", "security_measures",
]
findings = []
activities = ropa if isinstance(ropa, list) else ropa.get("activities", [])
for activity in activities:
missing = [f for f in required_fields if not activity.get(f)]
findings.append({
"activity": activity.get("name", activity.get("purpose", "unknown")),
"complete": len(missing) == 0,
"missing_fields": missing,
"compliance": "COMPLIANT" if not missing else "NON_COMPLIANT",
})
total = len(findings)
compliant = sum(1 for f in findings if f["compliance"] == "COMPLIANT")
return {
"activities_assessed": total,
"compliant": compliant,
"compliance_rate": round(compliant / total * 100, 1) if total else 0,
"details": findings,
}
def assess_dsr_handling(dsr_log_path):
"""Assess Data Subject Request handling compliance."""
with open(dsr_log_path) as f:
requests_log = json.load(f)
dsrs = requests_log if isinstance(requests_log, list) else requests_log.get("requests", [])
findings = []
for dsr in dsrs:
received = datetime.fromisoformat(dsr.get("received_date", "2024-01-01"))
completed = dsr.get("completed_date")
if completed:
completed_dt = datetime.fromisoformat(completed)
days = (completed_dt - received).days
else:
days = (datetime.utcnow() - received).days
overdue = days > 30 # GDPR requires response within one month
findings.append({
"request_id": dsr.get("id", ""),
"type": dsr.get("type", dsr.get("right", "")),
"status": dsr.get("status", "pending"),
"days_elapsed": days,
"overdue": overdue,
"severity": "HIGH" if overdue else "INFO",
})
overdue_count = sum(1 for f in findings if f["overdue"])
return {
"total_requests": len(findings),
"overdue": overdue_count,
"by_type": dict(Counter(f["type"] for f in findings)),
"details": findings,
}
def assess_breach_notification_readiness(breach_log_path):
"""Assess breach notification compliance (Art 33/34)."""
with open(breach_log_path) as f:
breaches = json.load(f)
breach_list = breaches if isinstance(breaches, list) else breaches.get("breaches", [])
findings = []
for breach in breach_list:
detected = datetime.fromisoformat(breach.get("detected", "2024-01-01"))
notified = breach.get("dpa_notified")
if notified:
notified_dt = datetime.fromisoformat(notified)
hours = (notified_dt - detected).total_seconds() / 3600
else:
hours = None
compliant = hours is not None and hours <= 72
findings.append({
"breach_id": breach.get("id", ""),
"detected": str(detected),
"notification_hours": round(hours, 1) if hours else None,
"art33_compliant": compliant,
"data_subjects_affected": breach.get("subjects_affected", 0),
"severity": breach.get("severity", "HIGH"),
})
return {"total_breaches": len(findings), "details": findings,
"art33_compliance_rate": round(
sum(1 for f in findings if f["art33_compliant"]) / len(findings) * 100, 1
) if findings else 0}
def generate_art32_checklist():
"""Generate Article 32 security measures compliance checklist."""
return {
"encryption": {
"data_at_rest": {"required": True, "standard": "AES-256"},
"data_in_transit": {"required": True, "standard": "TLS 1.2+"},
"key_management": {"required": True, "standard": "HSM or KMS"},
},
"pseudonymization": {
"tokenization": {"required": True},
"key_separation": {"required": True},
},
"access_controls": {
"rbac": {"required": True},
"mfa": {"required": True},
"least_privilege": {"required": True},
"access_reviews": {"required": True, "frequency": "quarterly"},
},
"resilience": {
"backup_strategy": {"required": True},
"disaster_recovery": {"required": True, "rto": "4h", "rpo": "1h"},
"business_continuity": {"required": True},
},
"testing": {
"penetration_testing": {"required": True, "frequency": "annual"},
"vulnerability_scanning": {"required": True, "frequency": "monthly"},
"security_awareness": {"required": True, "frequency": "annual"},
},
}
def main():
parser = argparse.ArgumentParser(description="GDPR Data Protection Agent")
parser.add_argument("--scan-pii", help="File or directory to scan for PII")
parser.add_argument("--ropa", help="ROPA JSON file for completeness check")
parser.add_argument("--dsr-log", help="Data Subject Request log JSON")
parser.add_argument("--breach-log", help="Breach notification log JSON")
parser.add_argument("--action", choices=["scan", "ropa", "dsr", "breach",
"art32", "full"], default="full")
parser.add_argument("--output", default="gdpr_compliance_report.json")
args = parser.parse_args()
report = {"generated_at": datetime.utcnow().isoformat(), "results": {}}
if args.action in ("scan", "full") and args.scan_pii:
result = scan_for_pii(args.scan_pii)
report["results"]["pii_scan"] = result
if result:
print(f"[+] PII found: {result['total_matches']} matches ({result['category']})")
else:
print("[+] No PII detected")
if args.action in ("ropa", "full") and args.ropa:
result = assess_ropa_completeness(args.ropa)
report["results"]["ropa"] = result
print(f"[+] ROPA compliance: {result['compliance_rate']}%")
if args.action in ("dsr", "full") and args.dsr_log:
result = assess_dsr_handling(args.dsr_log)
report["results"]["dsr"] = result
print(f"[+] DSRs: {result['total_requests']} total, {result['overdue']} overdue")
if args.action in ("breach", "full") and args.breach_log:
result = assess_breach_notification_readiness(args.breach_log)
report["results"]["breach"] = result
print(f"[+] Art 33 compliance rate: {result['art33_compliance_rate']}%")
if args.action in ("art32", "full"):
checklist = generate_art32_checklist()
report["results"]["art32_checklist"] = checklist
print("[+] Article 32 security checklist generated")
with open(args.output, "w") as f:
json.dump(report, f, indent=2, default=str)
print(f"[+] Report saved to {args.output}")
if __name__ == "__main__":
main()