Files
Anthropic-Cybersecurity-Skills/skills/performing-privacy-impact-assessment/scripts/agent.py
T

1414 lines
63 KiB
Python

#!/usr/bin/env python3
"""Agent for performing automated Privacy Impact Assessments (PIA/DPIA).
Implements the NIST Privacy Framework PRAM methodology and ICO DPIA guidance
for systematic identification and mitigation of privacy risks. Supports
GDPR Article 35 and CCPA/CPRA compliance checks with risk scoring matrices.
"""
import os
import json
import uuid
import argparse
from datetime import datetime, timedelta
from copy import deepcopy
# ---------------------------------------------------------------------------
# Data sensitivity classification
# ---------------------------------------------------------------------------
DATA_SENSITIVITY = {
# Special category / sensitive (GDPR Art. 9)
"health_data": {"sensitivity": "special_category", "weight": 5},
"biometric_data": {"sensitivity": "special_category", "weight": 5},
"genetic_data": {"sensitivity": "special_category", "weight": 5},
"racial_ethnic_origin": {"sensitivity": "special_category", "weight": 5},
"political_opinions": {"sensitivity": "special_category", "weight": 5},
"religious_beliefs": {"sensitivity": "special_category", "weight": 5},
"sexual_orientation": {"sensitivity": "special_category", "weight": 5},
"trade_union_membership": {"sensitivity": "special_category", "weight": 5},
# High sensitivity PII
"ssn": {"sensitivity": "high", "weight": 4},
"financial_account": {"sensitivity": "high", "weight": 4},
"credit_card": {"sensitivity": "high", "weight": 4},
"passport_number": {"sensitivity": "high", "weight": 4},
"drivers_license": {"sensitivity": "high", "weight": 4},
"login_credentials": {"sensitivity": "high", "weight": 4},
"performance_scores": {"sensitivity": "high", "weight": 4},
"salary_data": {"sensitivity": "high", "weight": 4},
# Medium sensitivity
"email": {"sensitivity": "medium", "weight": 3},
"phone_number": {"sensitivity": "medium", "weight": 3},
"date_of_birth": {"sensitivity": "medium", "weight": 3},
"home_address": {"sensitivity": "medium", "weight": 3},
"ip_address": {"sensitivity": "medium", "weight": 3},
"geolocation": {"sensitivity": "medium", "weight": 3},
"employee_id": {"sensitivity": "medium", "weight": 3},
"purchase_records": {"sensitivity": "medium", "weight": 3},
"transaction_data": {"sensitivity": "medium", "weight": 3},
# Low sensitivity
"name": {"sensitivity": "low", "weight": 2},
"job_title": {"sensitivity": "low", "weight": 2},
"company_name": {"sensitivity": "low", "weight": 2},
"browsing_history": {"sensitivity": "low", "weight": 2},
"device_id": {"sensitivity": "low", "weight": 2},
"device_fingerprint": {"sensitivity": "low", "weight": 2},
"cookie_id": {"sensitivity": "low", "weight": 1},
"public_profile": {"sensitivity": "low", "weight": 1},
}
# Countries with GDPR adequacy decisions (as of 2026)
GDPR_ADEQUATE_COUNTRIES = {
"AD", "AR", "CA", "FO", "GG", "IL", "IM", "JP", "JE", "NZ",
"KR", "CH", "GB", "UY", "US", # US under EU-US Data Privacy Framework
# All EU/EEA member states
"AT", "BE", "BG", "HR", "CY", "CZ", "DK", "EE", "FI", "FR",
"DE", "GR", "HU", "IE", "IT", "LV", "LT", "LU", "MT", "NL",
"PL", "PT", "RO", "SK", "SI", "ES", "SE", "IS", "LI", "NO",
}
# GDPR articles relevant to DPIA
GDPR_DPIA_ARTICLES = {
"5": {
"title": "Principles relating to processing of personal data",
"checks": [
{"id": "5.1a", "desc": "Lawfulness, fairness and transparency",
"field": "legal_basis", "required": True},
{"id": "5.1b", "desc": "Purpose limitation",
"field": "purpose_specified", "required": True},
{"id": "5.1c", "desc": "Data minimization",
"field": "data_minimization_justified", "required": True},
{"id": "5.1d", "desc": "Accuracy of data",
"field": "accuracy_measures", "required": True},
{"id": "5.1e", "desc": "Storage limitation",
"field": "retention_period_days", "required": True},
{"id": "5.1f", "desc": "Integrity and confidentiality",
"field": "security_measures", "required": True},
],
},
"6": {
"title": "Lawfulness of processing",
"checks": [
{"id": "6.1", "desc": "Valid legal basis identified",
"field": "legal_basis", "valid_values": [
"consent", "contract", "legal_obligation",
"vital_interests", "public_task", "legitimate_interest"]},
],
},
"7": {
"title": "Conditions for consent",
"checks": [
{"id": "7.1", "desc": "Demonstrable consent obtained",
"field": "consent_mechanism", "condition_field": "legal_basis",
"condition_value": "consent"},
{"id": "7.3", "desc": "Right to withdraw consent",
"field": "consent_withdrawal_mechanism", "condition_field": "legal_basis",
"condition_value": "consent"},
],
},
"13": {
"title": "Information to be provided at collection",
"checks": [
{"id": "13.1", "desc": "Privacy notice provided at collection",
"field": "privacy_notice", "required": True},
],
},
"22": {
"title": "Automated individual decision-making",
"checks": [
{"id": "22.1", "desc": "Right not to be subject to automated decision",
"field": "human_oversight", "condition_field": "automated_decision_making",
"condition_value": True},
{"id": "22.3", "desc": "Right to contest automated decision",
"field": "appeal_mechanism", "condition_field": "automated_decision_making",
"condition_value": True},
],
},
"25": {
"title": "Data protection by design and by default",
"checks": [
{"id": "25.1", "desc": "Privacy by design measures implemented",
"field": "privacy_by_design", "required": True},
{"id": "25.2", "desc": "Data protection by default",
"field": "data_protection_defaults", "required": True},
],
},
"28": {
"title": "Processor obligations",
"checks": [
{"id": "28.3", "desc": "Data processing agreement with processor",
"field": "data_processing_agreement", "condition_field": "data_processor",
"condition_value": "_nonempty"},
],
},
"30": {
"title": "Records of processing activities",
"checks": [
{"id": "30.1", "desc": "ROPA maintained",
"field": "ropa_entry", "required": True},
],
},
"32": {
"title": "Security of processing",
"checks": [
{"id": "32.1", "desc": "Appropriate technical and organizational measures",
"field": "security_measures", "required": True},
],
},
"33": {
"title": "Notification of breach to supervisory authority",
"checks": [
{"id": "33.1", "desc": "Breach notification within 72 hours capability",
"field": "breach_notification_plan", "required": True},
],
},
"35": {
"title": "Data protection impact assessment",
"checks": [
{"id": "35.7a", "desc": "Systematic description of processing",
"field": "processing_description", "required": True},
{"id": "35.7b", "desc": "Necessity and proportionality assessment",
"field": "necessity_assessment", "required": True},
{"id": "35.7c", "desc": "Risks to rights and freedoms assessment",
"field": "risk_assessment_completed", "required": True},
{"id": "35.7d", "desc": "Measures to address risks",
"field": "risk_mitigation_measures", "required": True},
],
},
"44": {
"title": "General principle for transfers",
"checks": [
{"id": "44.1", "desc": "Transfer safeguards for cross-border data",
"field": "transfer_safeguards", "condition_field": "cross_border_transfer",
"condition_value": True},
],
},
}
# CCPA/CPRA sections for compliance checks
CCPA_SECTIONS = {
"1798.100": {
"title": "Consumer right to know",
"checks": [
{"id": "100.a", "desc": "Right to know what PI is collected",
"field": "data_inventory_available", "required": True},
{"id": "100.b", "desc": "Disclosure of categories of PI collected",
"field": "categories_disclosed", "required": True},
],
},
"1798.105": {
"title": "Consumer right to delete",
"checks": [
{"id": "105.a", "desc": "Right to request deletion of PI",
"field": "deletion_mechanism", "required": True},
],
},
"1798.106": {
"title": "Consumer right to correct",
"checks": [
{"id": "106.a", "desc": "Right to correct inaccurate PI",
"field": "correction_mechanism", "required": True},
],
},
"1798.110": {
"title": "Right to know specific pieces of PI",
"checks": [
{"id": "110.a", "desc": "Disclose categories, sources, purposes, third parties",
"field": "data_inventory_available", "required": True},
],
},
"1798.115": {
"title": "Right to know about selling/sharing",
"checks": [
{"id": "115.a", "desc": "Disclosure of selling or sharing of PI",
"field": "selling_sharing_disclosed", "required": True},
],
},
"1798.120": {
"title": "Consumer right to opt-out of sale/sharing",
"checks": [
{"id": "120.a", "desc": "Do Not Sell or Share opt-out mechanism",
"field": "opt_out_mechanism", "condition_field": "sells_or_shares_data",
"condition_value": True},
],
},
"1798.121": {
"title": "Right to limit use of sensitive PI",
"checks": [
{"id": "121.a", "desc": "Limit use and disclosure of sensitive PI",
"field": "sensitive_data_controls", "condition_field": "_has_sensitive_data",
"condition_value": True},
],
},
"1798.125": {
"title": "Non-discrimination",
"checks": [
{"id": "125.a", "desc": "No discrimination for exercising rights",
"field": "non_discrimination_policy", "required": True},
],
},
"1798.130": {
"title": "Notice and request handling",
"checks": [
{"id": "130.a", "desc": "Methods for submitting consumer requests",
"field": "request_submission_methods", "required": True},
{"id": "130.a.2", "desc": "Respond to requests within 45 days",
"field": "response_sla_days", "required": True, "max_value": 45},
],
},
"1798.135": {
"title": "Do Not Sell or Share link",
"checks": [
{"id": "135.a", "desc": "Clear and conspicuous opt-out link on homepage",
"field": "opt_out_link_on_homepage", "condition_field": "sells_or_shares_data",
"condition_value": True},
],
},
"1798.185": {
"title": "CPRA risk assessment requirement",
"checks": [
{"id": "185.a.15", "desc": "Risk assessment for significant processing",
"field": "risk_assessment_completed", "required": True},
],
},
}
# NIST Privacy Framework functions and categories
NIST_PRIVACY_FUNCTIONS = {
"ID-P": {
"name": "Identify-P",
"description": "Develop organizational understanding to manage privacy risk",
"categories": {
"ID.IM-P": {
"name": "Inventory and Mapping",
"subcategories": [
{"id": "ID.IM-P1", "desc": "Systems and assets that process data are inventoried",
"check": "data_inventory_available"},
{"id": "ID.IM-P2", "desc": "Data actions mapped to system operations",
"check": "data_flow_mapped"},
{"id": "ID.IM-P3", "desc": "Data elements mapped within data actions",
"check": "data_elements_mapped"},
{"id": "ID.IM-P4", "desc": "Data actions of service providers identified",
"check": "processor_actions_identified"},
{"id": "ID.IM-P5", "desc": "Affected individuals identified",
"check": "data_subjects_identified"},
{"id": "ID.IM-P6", "desc": "Data processing purposes identified",
"check": "purpose_specified"},
{"id": "ID.IM-P7", "desc": "Legal authorities for data actions identified",
"check": "legal_basis"},
{"id": "ID.IM-P8", "desc": "Problematic data actions assessed",
"check": "risk_assessment_completed"},
],
},
"ID.BE-P": {
"name": "Business Environment",
"subcategories": [
{"id": "ID.BE-P1", "desc": "Privacy values and policies established",
"check": "privacy_policy_established"},
{"id": "ID.BE-P2", "desc": "Privacy role in enterprise risk management",
"check": "privacy_in_erm"},
],
},
"ID.RA-P": {
"name": "Risk Assessment",
"subcategories": [
{"id": "ID.RA-P1", "desc": "Contextual factors related to data processing identified",
"check": "contextual_factors_documented"},
{"id": "ID.RA-P2", "desc": "Data analytic inputs and outputs identified",
"check": "analytics_io_documented"},
{"id": "ID.RA-P3", "desc": "Potential problematic data actions assessed",
"check": "risk_assessment_completed"},
{"id": "ID.RA-P4", "desc": "Problematic data actions prioritized",
"check": "risks_prioritized"},
{"id": "ID.RA-P5", "desc": "Risk responses identified and prioritized",
"check": "risk_responses_identified"},
],
},
},
},
"GV-P": {
"name": "Govern-P",
"description": "Develop and implement organizational governance structure",
"categories": {
"GV.PO-P": {
"name": "Governance Policies, Processes, and Procedures",
"subcategories": [
{"id": "GV.PO-P1", "desc": "Organizational privacy policy established",
"check": "privacy_policy_established"},
{"id": "GV.PO-P2", "desc": "Privacy risk management processes established",
"check": "risk_management_process"},
{"id": "GV.PO-P3", "desc": "Roles and responsibilities defined",
"check": "privacy_roles_defined"},
{"id": "GV.PO-P4", "desc": "Governance and risk management reviewed",
"check": "governance_reviewed"},
{"id": "GV.PO-P5", "desc": "Legal requirements identified",
"check": "legal_requirements_identified"},
{"id": "GV.PO-P6", "desc": "Governance structure includes privacy",
"check": "privacy_in_governance"},
],
},
"GV.AT-P": {
"name": "Awareness and Training",
"subcategories": [
{"id": "GV.AT-P1", "desc": "Workforce informed of privacy policies",
"check": "privacy_training"},
{"id": "GV.AT-P2", "desc": "Senior executives understand privacy risk",
"check": "executive_awareness"},
{"id": "GV.AT-P3", "desc": "Privacy personnel understand responsibilities",
"check": "privacy_team_trained"},
{"id": "GV.AT-P4", "desc": "Third parties understand privacy obligations",
"check": "third_party_aware"},
],
},
"GV.MT-P": {
"name": "Monitoring and Review",
"subcategories": [
{"id": "GV.MT-P1", "desc": "Privacy risk re-assessed on ongoing basis",
"check": "ongoing_reassessment"},
{"id": "GV.MT-P2", "desc": "Privacy values reflected in practices",
"check": "privacy_values_in_practice"},
{"id": "GV.MT-P3", "desc": "Policies updated to reflect changes",
"check": "policy_update_process"},
],
},
},
},
"CT-P": {
"name": "Control-P",
"description": "Develop and implement appropriate activities for data management",
"categories": {
"CT.DM-P": {
"name": "Data Management",
"subcategories": [
{"id": "CT.DM-P1", "desc": "Data elements managed consistent with risk strategy",
"check": "data_management_aligned"},
{"id": "CT.DM-P2", "desc": "Data retention and disposal policy",
"check": "retention_policy"},
{"id": "CT.DM-P3", "desc": "Data processing accuracy maintained",
"check": "accuracy_measures"},
{"id": "CT.DM-P4", "desc": "Data disclosed only as authorized",
"check": "disclosure_controls"},
{"id": "CT.DM-P5", "desc": "Data from processing manipulations managed",
"check": "derived_data_managed"},
{"id": "CT.DM-P6", "desc": "Data in transit managed",
"check": "transit_controls"},
{"id": "CT.DM-P7", "desc": "Mechanisms for transmitting consent preferences",
"check": "consent_preference_mechanism"},
{"id": "CT.DM-P8", "desc": "Audit/log records for data processing",
"check": "processing_audit_log"},
{"id": "CT.DM-P9", "desc": "Technical measures for consent management",
"check": "consent_management_system"},
{"id": "CT.DM-P10", "desc": "Data quality maintained through lifecycle",
"check": "data_quality_lifecycle"},
],
},
"CT.PO-P": {
"name": "Data Processing Policies",
"subcategories": [
{"id": "CT.PO-P1", "desc": "De-identification policies and processes",
"check": "deidentification_policy"},
{"id": "CT.PO-P2", "desc": "Data tagging for purpose and provenance",
"check": "data_tagging"},
{"id": "CT.PO-P3", "desc": "Administrative access safeguards",
"check": "admin_access_controls"},
{"id": "CT.PO-P4", "desc": "System monitoring for privacy compliance",
"check": "privacy_monitoring"},
],
},
},
},
"CM-P": {
"name": "Communicate-P",
"description": "Develop and implement communication activities",
"categories": {
"CM.AW-P": {
"name": "Communication Policies",
"subcategories": [
{"id": "CM.AW-P1", "desc": "Privacy practices communicated to individuals",
"check": "privacy_notice"},
{"id": "CM.AW-P2", "desc": "Mechanisms for individual complaints",
"check": "complaint_mechanism"},
{"id": "CM.AW-P3", "desc": "System changes communicated",
"check": "change_notification"},
{"id": "CM.AW-P4", "desc": "Data processing awareness program",
"check": "awareness_program"},
],
},
},
},
"PR-P": {
"name": "Protect-P",
"description": "Develop and implement safeguards for data processing",
"categories": {
"PR.AC-P": {
"name": "Identity Management, Authentication and Access Control",
"subcategories": [
{"id": "PR.AC-P1", "desc": "Identities and credentials managed",
"check": "identity_management"},
{"id": "PR.AC-P2", "desc": "Physical access managed",
"check": "physical_access_controls"},
{"id": "PR.AC-P3", "desc": "Remote access managed",
"check": "remote_access_controls"},
{"id": "PR.AC-P4", "desc": "Access permissions managed",
"check": "access_permissions"},
{"id": "PR.AC-P5", "desc": "Network integrity protected",
"check": "network_integrity"},
{"id": "PR.AC-P6", "desc": "Individuals authenticated",
"check": "authentication_controls"},
],
},
"PR.DS-P": {
"name": "Data Security",
"subcategories": [
{"id": "PR.DS-P1", "desc": "Data at rest protected",
"check": "encryption_at_rest"},
{"id": "PR.DS-P2", "desc": "Data in transit protected",
"check": "encryption_in_transit"},
{"id": "PR.DS-P3", "desc": "Systems and data formally managed",
"check": "asset_management"},
{"id": "PR.DS-P4", "desc": "Adequate capacity maintained",
"check": "capacity_management"},
{"id": "PR.DS-P5", "desc": "Protections against data leaks implemented",
"check": "dlp_controls"},
{"id": "PR.DS-P6", "desc": "Integrity checking mechanisms used",
"check": "integrity_checking"},
{"id": "PR.DS-P7", "desc": "Development and testing use non-production data",
"check": "non_production_data"},
{"id": "PR.DS-P8", "desc": "Hardware integrity verified",
"check": "hardware_integrity"},
],
},
"PR.PO-P": {
"name": "Protective Policies",
"subcategories": [
{"id": "PR.PO-P1", "desc": "Baseline configuration maintained",
"check": "baseline_config"},
{"id": "PR.PO-P2", "desc": "Configuration change control in place",
"check": "change_control"},
{"id": "PR.PO-P3", "desc": "Backups maintained and tested",
"check": "backup_tested"},
{"id": "PR.PO-P4", "desc": "Response and recovery plans tested",
"check": "incident_response_tested"},
{"id": "PR.PO-P5", "desc": "Improvements implemented from testing",
"check": "continuous_improvement"},
{"id": "PR.PO-P6", "desc": "Data disposal performed",
"check": "data_disposal"},
{"id": "PR.PO-P7", "desc": "Protection processes improved",
"check": "protection_improvement"},
{"id": "PR.PO-P8", "desc": "Protection technology shared",
"check": "protection_shared"},
{"id": "PR.PO-P9", "desc": "Response plans managed",
"check": "response_plans"},
{"id": "PR.PO-P10", "desc": "Protection updated with testing results",
"check": "protection_updated"},
],
},
},
},
}
# Risk assessment rules
PRIVACY_RISK_RULES = [
{
"id": "RISK-001",
"category": "Data Minimization",
"description": "Excessive data collection beyond stated processing purpose",
"check": lambda a: len(a.get("data_categories", [])) > 5 and not a.get("data_minimization_justified"),
"likelihood_base": 4,
"impact_base": 3,
"mitigation": "Conduct data minimization review; remove data elements not required for stated purpose; document justification for each element collected",
},
{
"id": "RISK-002",
"category": "Purpose Limitation",
"description": "Processing purpose not clearly specified or data used beyond original purpose",
"check": lambda a: not a.get("purpose_specified") or a.get("secondary_purposes"),
"likelihood_base": 3,
"impact_base": 4,
"mitigation": "Document explicit processing purposes; obtain separate legal basis for any secondary use; implement purpose limitation controls in data pipeline",
},
{
"id": "RISK-003",
"category": "Cross-Border Transfer",
"description": "Personal data transferred to jurisdiction without adequate protection",
"check": lambda a: a.get("cross_border_transfer") and any(
d not in GDPR_ADEQUATE_COUNTRIES for d in a.get("transfer_destinations", [])),
"likelihood_base": 3,
"impact_base": 5,
"mitigation": "Implement Standard Contractual Clauses (SCCs) or Binding Corporate Rules (BCRs); conduct Transfer Impact Assessment; consider data localization",
},
{
"id": "RISK-004",
"category": "Automated Decision Making",
"description": "Automated profiling or decision-making without human oversight or appeal",
"check": lambda a: a.get("automated_decision_making") and not a.get("human_oversight"),
"likelihood_base": 4,
"impact_base": 5,
"mitigation": "Implement meaningful human review for significant automated decisions; provide appeal mechanism; document algorithm logic and bias testing results",
},
{
"id": "RISK-005",
"category": "Data Subject Rights",
"description": "Insufficient mechanisms to fulfill data subject access, erasure, or portability requests",
"check": lambda a: not a.get("dsr_mechanism"),
"likelihood_base": 3,
"impact_base": 4,
"mitigation": "Implement automated DSR fulfillment workflows; test access, rectification, erasure, portability, and objection request handling; ensure response within regulatory timeframes",
},
{
"id": "RISK-006",
"category": "Third-Party Risk",
"description": "Data processor lacks adequate privacy controls or valid DPA",
"check": lambda a: a.get("data_processor") and not a.get("data_processing_agreement"),
"likelihood_base": 3,
"impact_base": 4,
"mitigation": "Execute Data Processing Agreement with all processors; conduct processor privacy audits; implement subprocessor approval workflows; require breach notification clauses",
},
{
"id": "RISK-007",
"category": "Security Controls",
"description": "Inadequate encryption, access controls, or breach detection capabilities",
"check": lambda a: not a.get("encryption_at_rest") or not a.get("encryption_in_transit"),
"likelihood_base": 4,
"impact_base": 5,
"mitigation": "Implement AES-256 encryption at rest and TLS 1.3 in transit; enforce MFA and least-privilege access; deploy intrusion detection and breach monitoring",
},
{
"id": "RISK-008",
"category": "Retention",
"description": "Data retained beyond necessary period or no retention policy defined",
"check": lambda a: not a.get("retention_period_days") or a.get("retention_period_days", 0) > 1095,
"likelihood_base": 3,
"impact_base": 3,
"mitigation": "Define and enforce retention schedules per data category; implement automated deletion workflows; conduct periodic retention audits",
},
{
"id": "RISK-009",
"category": "Consent Management",
"description": "Consent mechanisms are ambiguous, pre-checked, or lack granularity",
"check": lambda a: a.get("legal_basis") == "consent" and not a.get("consent_mechanism"),
"likelihood_base": 4,
"impact_base": 4,
"mitigation": "Implement granular consent management with clear opt-in; record consent evidence with timestamp and scope; provide easy withdrawal mechanism; avoid dark patterns",
},
{
"id": "RISK-010",
"category": "Breach Notification",
"description": "No documented breach response plan or inability to notify within 72 hours",
"check": lambda a: not a.get("breach_notification_plan"),
"likelihood_base": 3,
"impact_base": 5,
"mitigation": "Document and test incident response procedures; implement automated breach detection; establish 72-hour notification workflow for supervisory authority and affected individuals",
},
{
"id": "RISK-011",
"category": "Special Category Data",
"description": "Processing of special category data without explicit consent or Art. 9 exemption",
"check": lambda a: any(
DATA_SENSITIVITY.get(cat, {}).get("sensitivity") == "special_category"
for cat in a.get("data_categories", [])
) and a.get("legal_basis") != "explicit_consent" and not a.get("art9_exemption"),
"likelihood_base": 3,
"impact_base": 5,
"mitigation": "Obtain explicit consent for special category data; document Article 9 exemption basis; implement additional safeguards including encryption and access logging",
},
{
"id": "RISK-012",
"category": "Transparency",
"description": "Privacy notice is incomplete, inaccessible, or not provided at point of collection",
"check": lambda a: not a.get("privacy_notice"),
"likelihood_base": 3,
"impact_base": 3,
"mitigation": "Provide clear layered privacy notice at point of collection; include all GDPR Art. 13/14 required information; make notice accessible in plain language",
},
{
"id": "RISK-013",
"category": "Vulnerable Data Subjects",
"description": "Processing data of children, elderly, or other vulnerable individuals without additional safeguards",
"check": lambda a: any(s in a.get("data_subjects", []) for s in [
"children", "minors", "patients", "elderly", "disabled", "employees"]),
"likelihood_base": 3,
"impact_base": 4,
"mitigation": "Implement age verification for children's data; apply heightened protections; conduct specific impact assessment for vulnerable populations; ensure guardian consent where required",
},
{
"id": "RISK-014",
"category": "Data Quality",
"description": "No mechanisms to ensure accuracy, completeness, and currency of personal data",
"check": lambda a: not a.get("accuracy_measures"),
"likelihood_base": 2,
"impact_base": 3,
"mitigation": "Implement data validation at collection; provide self-service data correction; establish periodic data quality reviews; document accuracy requirements per data element",
},
]
def _risk_severity(score):
"""Map a numeric risk score (1-25) to a severity label."""
if score >= 20:
return "CRITICAL"
if score >= 15:
return "HIGH"
if score >= 10:
return "MEDIUM"
if score >= 5:
return "LOW"
return "INFORMATIONAL"
class PrivacyImpactAssessmentEngine:
"""Engine for conducting automated Privacy Impact Assessments."""
def __init__(self, organization_name="", dpo_email=""):
self.organization_name = organization_name
self.dpo_email = dpo_email
self.activities = {}
self.flow_maps = {}
self.risk_reports = {}
self.gdpr_reports = {}
self.ccpa_reports = {}
# ------------------------------------------------------------------
# Processing activity registration
# ------------------------------------------------------------------
def register_processing_activity(self, name, description="",
data_controller="", data_processor="",
data_categories=None, data_subjects=None,
legal_basis="", retention_period_days=None,
cross_border_transfer=False,
transfer_destinations=None,
automated_decision_making=False,
**kwargs):
"""Register a processing activity for assessment."""
activity_id = f"PA-{uuid.uuid4().hex[:8].upper()}"
activity = {
"activity_id": activity_id,
"name": name,
"description": description,
"data_controller": data_controller,
"data_processor": data_processor,
"data_categories": data_categories or [],
"data_subjects": data_subjects or [],
"legal_basis": legal_basis,
"retention_period_days": retention_period_days,
"cross_border_transfer": cross_border_transfer,
"transfer_destinations": transfer_destinations or [],
"automated_decision_making": automated_decision_making,
"registered_at": datetime.utcnow().isoformat(),
"ropa_entry": True,
"purpose_specified": bool(description),
"processing_description": description,
}
activity.update(kwargs)
# Calculate data sensitivity profile
max_sensitivity = "low"
total_weight = 0
for cat in activity["data_categories"]:
info = DATA_SENSITIVITY.get(cat, {"sensitivity": "unknown", "weight": 2})
total_weight += info["weight"]
if info["sensitivity"] == "special_category":
max_sensitivity = "special_category"
elif info["sensitivity"] == "high" and max_sensitivity not in ("special_category",):
max_sensitivity = "high"
elif info["sensitivity"] == "medium" and max_sensitivity in ("low",):
max_sensitivity = "medium"
activity["_sensitivity_profile"] = {
"max_sensitivity": max_sensitivity,
"total_weight": total_weight,
"category_count": len(activity["data_categories"]),
}
self.activities[activity_id] = activity
return activity
# ------------------------------------------------------------------
# Data flow mapping
# ------------------------------------------------------------------
def map_data_flows(self, activity_id, flows):
"""Map data flows for a processing activity."""
if activity_id not in self.activities:
raise ValueError(f"Activity {activity_id} not found")
flow_map = {
"activity_id": activity_id,
"activity_name": self.activities[activity_id]["name"],
"mapped_at": datetime.utcnow().isoformat(),
"flows": [],
"flow_summary": {
"total_stages": 0,
"has_cross_border": False,
"encryption_gaps": [],
"third_parties": [],
},
}
for i, flow in enumerate(flows):
flow_entry = {
"flow_id": f"FL-{i+1:03d}",
"stage": flow.get("stage", "unknown"),
"source": flow.get("source", ""),
"destination": flow.get("destination", ""),
"data_elements": flow.get("data_elements", []),
"encryption_in_transit": flow.get("encryption_in_transit", False),
"encryption_at_rest": flow.get("encryption_at_rest", False),
"protocol": flow.get("protocol", ""),
"cross_border": flow.get("cross_border", False),
"data_processing_agreement": flow.get("data_processing_agreement", False),
"retention_days": flow.get("retention_days"),
"access_controls": flow.get("access_controls", ""),
"method": flow.get("method", ""),
"verification": flow.get("verification", ""),
}
flow_map["flows"].append(flow_entry)
flow_map["flow_summary"]["total_stages"] += 1
if flow_entry["cross_border"]:
flow_map["flow_summary"]["has_cross_border"] = True
if flow_entry["stage"] in ("processing", "storage", "sharing"):
if not flow_entry["encryption_in_transit"] and flow_entry["stage"] != "storage":
flow_map["flow_summary"]["encryption_gaps"].append({
"flow_id": flow_entry["flow_id"],
"gap": "No encryption in transit",
"stage": flow_entry["stage"],
})
if not flow_entry["encryption_at_rest"] and flow_entry["stage"] == "storage":
flow_map["flow_summary"]["encryption_gaps"].append({
"flow_id": flow_entry["flow_id"],
"gap": "No encryption at rest",
"stage": flow_entry["stage"],
})
self.flow_maps[activity_id] = flow_map
# Update activity with flow-derived flags
activity = self.activities[activity_id]
activity["data_flow_mapped"] = True
activity["data_elements_mapped"] = True
if flow_map["flow_summary"]["has_cross_border"]:
activity["cross_border_transfer"] = True
if not flow_map["flow_summary"]["encryption_gaps"]:
activity["encryption_in_transit"] = True
activity["encryption_at_rest"] = True
return flow_map
def render_data_flow_diagram(self, flow_map):
"""Render an ASCII data flow diagram to stdout."""
print(f"\n{'='*70}")
print(f"DATA FLOW DIAGRAM: {flow_map['activity_name']}")
print(f"{'='*70}")
for flow in flow_map["flows"]:
stage_label = flow["stage"].upper()
enc_icon = "[ENC]" if flow["encryption_in_transit"] else "[!!!]"
border_icon = "[XBORDER]" if flow["cross_border"] else ""
elements = ", ".join(flow["data_elements"][:3])
if len(flow["data_elements"]) > 3:
elements += f" (+{len(flow['data_elements'])-3} more)"
print(f"\n [{stage_label}] {flow['source']}")
print(f" |")
print(f" | {enc_icon} {border_icon} ({elements})")
print(f" v")
print(f" [{stage_label}] {flow['destination']}")
if flow_map["flow_summary"]["encryption_gaps"]:
print(f"\n WARNING: {len(flow_map['flow_summary']['encryption_gaps'])} encryption gap(s) detected")
for gap in flow_map["flow_summary"]["encryption_gaps"]:
print(f" - {gap['flow_id']}: {gap['gap']} at {gap['stage']} stage")
print(f"\n{'='*70}\n")
# ------------------------------------------------------------------
# Risk assessment
# ------------------------------------------------------------------
def assess_privacy_risks(self, activity_id, assessment_type="full_dpia"):
"""Assess privacy risks for a processing activity."""
if activity_id not in self.activities:
raise ValueError(f"Activity {activity_id} not found")
activity = self.activities[activity_id]
sensitivity = activity.get("_sensitivity_profile", {})
risks = []
for rule in PRIVACY_RISK_RULES:
try:
triggered = rule["check"](activity)
except Exception:
triggered = False
if triggered:
# Adjust scores based on data sensitivity
likelihood_adj = 0
impact_adj = 0
if sensitivity.get("max_sensitivity") == "special_category":
impact_adj = 1
elif sensitivity.get("max_sensitivity") == "high":
impact_adj = 0.5
if sensitivity.get("category_count", 0) > 8:
likelihood_adj = 1
likelihood = min(5, rule["likelihood_base"] + likelihood_adj)
impact = min(5, rule["impact_base"] + impact_adj)
risk_score = round(likelihood * impact, 1)
risks.append({
"risk_id": rule["id"],
"category": rule["category"],
"description": rule["description"],
"likelihood": likelihood,
"impact": impact,
"risk_score": risk_score,
"severity": _risk_severity(risk_score),
"recommended_mitigation": rule["mitigation"],
})
# Sort by risk score descending
risks.sort(key=lambda r: r["risk_score"], reverse=True)
# Calculate summary
severity_counts = {}
for r in risks:
severity_counts[r["severity"]] = severity_counts.get(r["severity"], 0) + 1
overall = "LOW"
if severity_counts.get("CRITICAL", 0) > 0:
overall = "CRITICAL"
elif severity_counts.get("HIGH", 0) > 0:
overall = "HIGH"
elif severity_counts.get("MEDIUM", 0) > 0:
overall = "MEDIUM"
report = {
"activity_id": activity_id,
"activity_name": activity["name"],
"assessment_type": assessment_type,
"assessed_at": datetime.utcnow().isoformat(),
"overall_risk_level": overall,
"total_risks": len(risks),
"risk_count_by_severity": severity_counts,
"risks": risks,
"data_sensitivity_profile": sensitivity,
}
self.risk_reports[activity_id] = report
activity["risk_assessment_completed"] = True
activity["risks_prioritized"] = True
activity["risk_responses_identified"] = True
return report
# ------------------------------------------------------------------
# ICO DPIA screening checklist
# ------------------------------------------------------------------
def run_screening_checklist(self, uses_special_category_data=False,
large_scale_processing=False,
systematic_monitoring=False,
automated_decision_making=False,
cross_border_transfer=False,
vulnerable_data_subjects=False,
innovative_technology=False,
denial_of_service_or_rights=False,
evaluation_or_scoring=False,
matching_or_combining_datasets=False):
"""Run ICO DPIA screening checklist to determine if full DPIA is required.
A DPIA is required if at least two criteria are met, or if any single
high-impact criterion is triggered.
"""
criteria = {
"special_category_data": uses_special_category_data,
"large_scale_processing": large_scale_processing,
"systematic_monitoring": systematic_monitoring,
"automated_decision_making": automated_decision_making,
"cross_border_transfer": cross_border_transfer,
"vulnerable_data_subjects": vulnerable_data_subjects,
"innovative_technology": innovative_technology,
"denial_of_service_or_rights": denial_of_service_or_rights,
"evaluation_or_scoring": evaluation_or_scoring,
"matching_or_combining_datasets": matching_or_combining_datasets,
}
triggers = [name for name, value in criteria.items() if value]
high_impact_single = {"special_category_data", "large_scale_processing",
"denial_of_service_or_rights"}
dpia_required = (len(triggers) >= 2 or
bool(set(triggers) & high_impact_single))
return {
"dpia_required": dpia_required,
"triggers": triggers,
"trigger_count": len(triggers),
"criteria_evaluated": len(criteria),
"recommendation": (
"Full DPIA must be conducted before processing begins"
if dpia_required
else "Full DPIA not mandatory but recommended as good practice"
),
"ico_reference": "https://ico.org.uk/for-organisations/uk-gdpr-guidance-and-resources/accountability-and-governance/data-protection-impact-assessments-dpias/",
}
# ------------------------------------------------------------------
# GDPR compliance check
# ------------------------------------------------------------------
def check_gdpr_compliance(self, activity_id):
"""Run GDPR article-level compliance checks against a processing activity."""
if activity_id not in self.activities:
raise ValueError(f"Activity {activity_id} not found")
activity = self.activities[activity_id]
findings = []
total_checks = 0
passed = 0
for article_num, article_def in GDPR_DPIA_ARTICLES.items():
for check in article_def["checks"]:
total_checks += 1
check_id = check["id"]
desc = check["desc"]
# Determine if check is applicable
applicable = True
if "condition_field" in check:
cond_field = check["condition_field"]
cond_value = check["condition_value"]
actual = activity.get(cond_field)
if cond_value == "_nonempty":
applicable = bool(actual)
else:
applicable = actual == cond_value
if not applicable:
findings.append({
"article": check_id,
"description": desc,
"status": "NOT_APPLICABLE",
"details": f"Condition not met: {check.get('condition_field', '')}",
})
passed += 1
continue
# Check field value
field = check.get("field", "")
value = activity.get(field)
if "valid_values" in check:
if value in check["valid_values"]:
status = "COMPLIANT"
passed += 1
else:
status = "NON_COMPLIANT"
elif check.get("required"):
if value:
status = "COMPLIANT"
passed += 1
else:
status = "NON_COMPLIANT"
else:
if value:
status = "COMPLIANT"
passed += 1
else:
status = "NON_COMPLIANT"
findings.append({
"article": check_id,
"description": desc,
"status": status,
"field": field,
"value_present": bool(value),
})
score = round((passed / total_checks) * 100, 1) if total_checks else 0
report = {
"activity_id": activity_id,
"regulation": "GDPR",
"checked_at": datetime.utcnow().isoformat(),
"compliance_score": score,
"total_checks": total_checks,
"passed": passed,
"failed": total_checks - passed,
"findings": findings,
}
self.gdpr_reports[activity_id] = report
return report
# ------------------------------------------------------------------
# CCPA/CPRA compliance check
# ------------------------------------------------------------------
def check_ccpa_compliance(self, activity_id):
"""Run CCPA/CPRA section-level compliance checks."""
if activity_id not in self.activities:
raise ValueError(f"Activity {activity_id} not found")
activity = self.activities[activity_id]
# Derive helper flags
has_sensitive = any(
DATA_SENSITIVITY.get(cat, {}).get("sensitivity") in ("special_category", "high")
for cat in activity.get("data_categories", [])
)
activity["_has_sensitive_data"] = has_sensitive
findings = []
total_checks = 0
passed = 0
for section_num, section_def in CCPA_SECTIONS.items():
for check in section_def["checks"]:
total_checks += 1
check_id = check["id"]
desc = check["desc"]
# Applicability
applicable = True
if "condition_field" in check:
cond_field = check["condition_field"]
cond_value = check["condition_value"]
actual = activity.get(cond_field)
applicable = actual == cond_value
if not applicable:
findings.append({
"section": check_id,
"description": desc,
"status": "NOT_APPLICABLE",
})
passed += 1
continue
field = check.get("field", "")
value = activity.get(field)
if "max_value" in check:
if value and isinstance(value, (int, float)) and value <= check["max_value"]:
status = "COMPLIANT"
passed += 1
elif not value:
status = "NON_COMPLIANT"
else:
status = "NON_COMPLIANT"
elif check.get("required"):
if value:
status = "COMPLIANT"
passed += 1
else:
status = "NON_COMPLIANT"
else:
if value:
status = "COMPLIANT"
passed += 1
else:
status = "NON_COMPLIANT"
findings.append({
"section": check_id,
"description": desc,
"status": status,
"field": field,
"value_present": bool(value),
})
score = round((passed / total_checks) * 100, 1) if total_checks else 0
report = {
"activity_id": activity_id,
"regulation": "CCPA/CPRA",
"checked_at": datetime.utcnow().isoformat(),
"compliance_score": score,
"total_checks": total_checks,
"passed": passed,
"failed": total_checks - passed,
"findings": findings,
}
self.ccpa_reports[activity_id] = report
return report
# ------------------------------------------------------------------
# NIST Privacy Framework profile
# ------------------------------------------------------------------
def generate_nist_privacy_profile(self, activity_id, target_tier="tier_2"):
"""Generate NIST Privacy Framework profile for an activity."""
if activity_id not in self.activities:
raise ValueError(f"Activity {activity_id} not found")
activity = self.activities[activity_id]
profile = {
"activity_id": activity_id,
"target_tier": target_tier,
"generated_at": datetime.utcnow().isoformat(),
"functions": {},
}
total_subcats = 0
implemented = 0
for func_id, func_def in NIST_PRIVACY_FUNCTIONS.items():
func_outcomes = []
for cat_id, cat_def in func_def["categories"].items():
for subcat in cat_def["subcategories"]:
total_subcats += 1
check_field = subcat["check"]
is_implemented = bool(activity.get(check_field))
if is_implemented:
implemented += 1
func_outcomes.append({
"subcategory": subcat["id"],
"description": subcat["desc"],
"category": cat_def["name"],
"implemented": is_implemented,
"check_field": check_field,
})
profile["functions"][func_id] = func_outcomes
profile["coverage"] = {
"total_subcategories": total_subcats,
"implemented": implemented,
"gaps": total_subcats - implemented,
"coverage_percentage": round((implemented / total_subcats) * 100, 1) if total_subcats else 0,
}
return profile
# ------------------------------------------------------------------
# Remediation plan
# ------------------------------------------------------------------
def generate_remediation_plan(self, activity_id, risk_report=None,
gdpr_report=None, ccpa_report=None):
"""Generate a prioritized remediation plan from assessment results."""
action_items = []
now = datetime.utcnow()
# From risk report
if risk_report:
for risk in risk_report.get("risks", []):
priority_map = {"CRITICAL": "P1", "HIGH": "P2", "MEDIUM": "P3", "LOW": "P4"}
deadline_days = {"CRITICAL": 14, "HIGH": 30, "MEDIUM": 60, "LOW": 90}
sev = risk["severity"]
action_items.append({
"source": "risk_assessment",
"priority": priority_map.get(sev, "P4"),
"action": risk["recommended_mitigation"],
"addresses_risks": [risk["risk_id"]],
"risk_category": risk["category"],
"risk_score": risk["risk_score"],
"owner": "Privacy Team",
"deadline": (now + timedelta(days=deadline_days.get(sev, 90))).strftime("%Y-%m-%d"),
"status": "open",
})
# From GDPR findings
if gdpr_report:
for finding in gdpr_report.get("findings", []):
if finding["status"] == "NON_COMPLIANT":
action_items.append({
"source": "gdpr_compliance",
"priority": "P2",
"action": f"Address GDPR Art. {finding['article']} non-compliance: {finding['description']}",
"addresses_risks": [f"GDPR-{finding['article']}"],
"risk_category": "Regulatory Compliance",
"owner": "DPO / Legal",
"deadline": (now + timedelta(days=30)).strftime("%Y-%m-%d"),
"status": "open",
})
# From CCPA findings
if ccpa_report:
for finding in ccpa_report.get("findings", []):
if finding["status"] == "NON_COMPLIANT":
action_items.append({
"source": "ccpa_compliance",
"priority": "P2",
"action": f"Address CCPA Sec. {finding['section']} non-compliance: {finding['description']}",
"addresses_risks": [f"CCPA-{finding['section']}"],
"risk_category": "Regulatory Compliance",
"owner": "Privacy Team / Legal",
"deadline": (now + timedelta(days=30)).strftime("%Y-%m-%d"),
"status": "open",
})
# Sort by priority
priority_order = {"P1": 0, "P2": 1, "P3": 2, "P4": 3}
action_items.sort(key=lambda x: priority_order.get(x["priority"], 9))
return {
"activity_id": activity_id,
"generated_at": now.isoformat(),
"total_actions": len(action_items),
"by_priority": {
p: sum(1 for a in action_items if a["priority"] == p)
for p in ["P1", "P2", "P3", "P4"]
},
"action_items": action_items,
}
# ------------------------------------------------------------------
# Report generation
# ------------------------------------------------------------------
def generate_dpia_report(self, activity_id, output_path="dpia_report.json",
format="json"):
"""Generate the formal DPIA report document."""
if activity_id not in self.activities:
raise ValueError(f"Activity {activity_id} not found")
activity = self.activities[activity_id]
report = {
"report_type": "Data Protection Impact Assessment (DPIA)",
"report_id": f"DPIA-{uuid.uuid4().hex[:8].upper()}",
"generated_at": datetime.utcnow().isoformat(),
"organization": self.organization_name,
"dpo_contact": self.dpo_email,
"processing_activity": {
k: v for k, v in activity.items() if not k.startswith("_")
},
"data_flow_map": self.flow_maps.get(activity_id),
"risk_assessment": self.risk_reports.get(activity_id),
"gdpr_compliance": self.gdpr_reports.get(activity_id),
"ccpa_compliance": self.ccpa_reports.get(activity_id),
"dpo_sign_off": {
"status": "pending",
"dpo_name": "",
"date": "",
"comments": "",
},
"review_schedule": {
"next_review": (datetime.utcnow() + timedelta(days=365)).strftime("%Y-%m-%d"),
"review_frequency": "annual",
"triggers_for_early_review": [
"Change in processing purpose",
"New data categories collected",
"Change in processor or subprocessor",
"Security incident or breach",
"Regulatory change",
"Complaints from data subjects",
],
},
}
with open(output_path, "w") as f:
json.dump(report, f, indent=2, default=str)
return report
def main():
parser = argparse.ArgumentParser(
description="Privacy Impact Assessment (PIA/DPIA) Automation Agent"
)
parser.add_argument("--org", default="", help="Organization name")
parser.add_argument("--dpo-email", default="", help="DPO email address")
parser.add_argument("--output", default="pia_report.json", help="Output report path")
parser.add_argument("--action", choices=[
"screening", "full_assessment", "gdpr_check", "ccpa_check",
"nist_profile", "demo",
], default="demo", help="Action to perform")
args = parser.parse_args()
engine = PrivacyImpactAssessmentEngine(
organization_name=args.org,
dpo_email=args.dpo_email,
)
if args.action == "screening":
result = engine.run_screening_checklist(
large_scale_processing=True,
systematic_monitoring=True,
automated_decision_making=True,
)
print(f"[+] DPIA Required: {result['dpia_required']}")
print(f" Triggers: {result['triggers']}")
elif args.action == "demo":
print("[*] Running demonstration PIA workflow...\n")
# Register activity
activity = engine.register_processing_activity(
name="Customer Analytics Platform",
description="Collects browsing behavior and purchase history for personalization",
data_controller="Demo Corp",
data_processor="CloudAnalytics Inc",
data_categories=["browsing_history", "purchase_records", "ip_address",
"device_id", "email", "name"],
data_subjects=["customers", "website_visitors"],
legal_basis="consent",
retention_period_days=730,
cross_border_transfer=True,
transfer_destinations=["US", "IN"],
automated_decision_making=True,
)
print(f"[+] Registered activity: {activity['activity_id']}")
# Map data flows
flow_map = engine.map_data_flows(
activity_id=activity["activity_id"],
flows=[
{"stage": "collection", "source": "Web browser",
"destination": "CDN edge", "data_elements": ["ip_address", "device_id"],
"encryption_in_transit": True, "protocol": "TLS 1.3"},
{"stage": "processing", "source": "CDN edge",
"destination": "Data warehouse (US)", "data_elements": ["browsing_history", "purchase_records"],
"encryption_in_transit": True, "encryption_at_rest": True, "protocol": "mTLS"},
{"stage": "sharing", "source": "Data warehouse",
"destination": "ML Provider (IN)", "data_elements": ["browsing_history"],
"encryption_in_transit": True, "cross_border": True,
"data_processing_agreement": True},
{"stage": "deletion", "source": "All stores",
"destination": "Secure erasure", "method": "Cryptographic erasure"},
],
)
engine.render_data_flow_diagram(flow_map)
# Risk assessment
risk_report = engine.assess_privacy_risks(activity_id=activity["activity_id"])
print(f"[+] Risk Assessment: {risk_report['overall_risk_level']}")
print(f" Total Risks: {risk_report['total_risks']}")
for risk in risk_report["risks"][:5]:
print(f" [{risk['severity']}] {risk['category']}: Score {risk['risk_score']}/25")
# GDPR check
gdpr = engine.check_gdpr_compliance(activity_id=activity["activity_id"])
print(f"\n[+] GDPR Compliance Score: {gdpr['compliance_score']}/100")
for f in gdpr["findings"]:
if f["status"] == "NON_COMPLIANT":
print(f" [FAIL] Art.{f['article']}: {f['description']}")
# CCPA check
ccpa = engine.check_ccpa_compliance(activity_id=activity["activity_id"])
print(f"\n[+] CCPA Compliance Score: {ccpa['compliance_score']}/100")
for f in ccpa["findings"]:
if f["status"] == "NON_COMPLIANT":
print(f" [FAIL] Sec.{f['section']}: {f['description']}")
# Remediation
remediation = engine.generate_remediation_plan(
activity_id=activity["activity_id"],
risk_report=risk_report,
gdpr_report=gdpr,
ccpa_report=ccpa,
)
print(f"\n[+] Remediation Plan: {remediation['total_actions']} action items")
print(f" By Priority: {remediation['by_priority']}")
# Generate report
engine.generate_dpia_report(
activity_id=activity["activity_id"],
output_path=args.output,
)
print(f"\n[+] DPIA report saved to {args.output}")
else:
print(f"[!] Action '{args.action}' requires an activity to be registered first.")
print(" Use --action demo for a full demonstration workflow.")
if __name__ == "__main__":
main()