Files
T
mukul975 27c6414ca5 Add folder anatomy (scripts/agent.py + references/api-reference.md) for 648 cybersecurity skills
Complete skill folder anatomy across all cybersecurity skills:
- scripts/agent.py: 80-150 line Python agents using real libraries (impacket,
  boto3, azure-mgmt-*, kubernetes, pefile, yara, scapy, shodan, stix2, etc.)
- references/api-reference.md: real API documentation with method signatures
- LICENSE: MIT license for all skill folders
2026-03-10 21:02:12 +01:00

198 lines
7.2 KiB
Python

#!/usr/bin/env python3
"""Cloud DLP agent for sensitive data discovery using Google Cloud DLP and AWS Macie."""
import json
import sys
import argparse
from datetime import datetime
try:
import boto3
from botocore.exceptions import ClientError
except ImportError:
boto3 = None
try:
from google.cloud import dlp_v2
except ImportError:
dlp_v2 = None
INFO_TYPES_PII = [
"PERSON_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER", "US_SOCIAL_SECURITY_NUMBER",
"CREDIT_CARD_NUMBER", "US_DRIVERS_LICENSE_NUMBER", "DATE_OF_BIRTH",
"STREET_ADDRESS", "IP_ADDRESS", "PASSPORT",
]
INFO_TYPES_FINANCIAL = [
"CREDIT_CARD_NUMBER", "IBAN_CODE", "SWIFT_CODE",
"US_BANK_ROUTING_MICR", "US_EMPLOYER_IDENTIFICATION_NUMBER",
]
INFO_TYPES_HEALTH = [
"US_HEALTHCARE_NPI", "US_DEA_NUMBER", "MEDICAL_RECORD_NUMBER",
]
def scan_text_with_gcp_dlp(project_id, text, info_types=None):
"""Scan text content for sensitive data using Google Cloud DLP."""
if dlp_v2 is None:
print("[!] Install google-cloud-dlp: pip install google-cloud-dlp")
return None
client = dlp_v2.DlpServiceClient()
parent = f"projects/{project_id}"
if info_types is None:
info_types = INFO_TYPES_PII
inspect_config = {
"info_types": [{"name": it} for it in info_types],
"min_likelihood": dlp_v2.Likelihood.LIKELY,
"include_quote": True,
"limits": {"max_findings_per_request": 50},
}
item = {"value": text}
response = client.inspect_content(
request={"parent": parent, "inspect_config": inspect_config, "item": item})
findings = []
for f in response.result.findings:
findings.append({
"info_type": f.info_type.name,
"likelihood": dlp_v2.Likelihood(f.likelihood).name,
"quote": f.quote[:50] + "..." if len(f.quote) > 50 else f.quote,
"location": {"start": f.location.byte_range.start, "end": f.location.byte_range.end},
})
return findings
def deidentify_text_with_gcp(project_id, text, info_types=None):
"""De-identify sensitive data in text using masking."""
if dlp_v2 is None:
return None
client = dlp_v2.DlpServiceClient()
parent = f"projects/{project_id}"
if info_types is None:
info_types = INFO_TYPES_PII
deidentify_config = {
"info_type_transformations": {
"transformations": [{
"primitive_transformation": {
"character_mask_config": {"masking_character": "*", "number_to_mask": 0}
},
"info_types": [{"name": it} for it in info_types],
}]
}
}
inspect_config = {"info_types": [{"name": it} for it in info_types]}
item = {"value": text}
response = client.deidentify_content(
request={"parent": parent, "deidentify_config": deidentify_config,
"inspect_config": inspect_config, "item": item})
return response.item.value
def enable_macie(region="us-east-1"):
"""Enable Amazon Macie for S3 sensitive data discovery."""
if boto3 is None:
print("[!] Install boto3: pip install boto3")
return None
client = boto3.client("macie2", region_name=region)
try:
client.enable_macie(status="ENABLED", findingPublishingFrequency="FIFTEEN_MINUTES")
return {"status": "enabled"}
except ClientError as e:
if "already enabled" in str(e).lower():
return {"status": "already_enabled"}
return {"error": str(e)}
def create_macie_classification_job(region, bucket_names, job_name):
"""Create a Macie classification job to scan S3 buckets."""
if boto3 is None:
return None
client = boto3.client("macie2", region_name=region)
try:
resp = client.create_classification_job(
jobType="ONE_TIME", name=job_name,
s3JobDefinition={
"bucketDefinitions": [{"accountId": boto3.client("sts").get_caller_identity()["Account"],
"buckets": bucket_names}]
},
description=f"DLP scan for sensitive data in {', '.join(bucket_names)}")
return {"job_id": resp["jobId"], "status": "created"}
except ClientError as e:
return {"error": str(e)}
def get_macie_findings(region="us-east-1", max_results=50):
"""Retrieve Macie findings for sensitive data discoveries."""
if boto3 is None:
return []
client = boto3.client("macie2", region_name=region)
try:
resp = client.list_findings(
sortCriteria={"attributeName": "severity.score", "orderBy": "DESC"},
maxResults=max_results)
finding_ids = resp.get("findingIds", [])
if not finding_ids:
return []
details = client.get_findings(findingIds=finding_ids)
return [{"id": f["id"], "type": f["type"], "severity": f["severity"]["score"],
"title": f["title"], "bucket": f.get("resourcesAffected", {}).get(
"s3Bucket", {}).get("name", ""),
"count": f.get("count", 1)}
for f in details.get("findings", [])]
except ClientError as e:
return [{"error": str(e)}]
def run_dlp_report(project_id=None, region="us-east-1"):
"""Generate a DLP discovery report."""
print(f"\n{'='*60}")
print(f" CLOUD DLP DATA PROTECTION REPORT")
print(f" Generated: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC")
print(f"{'='*60}\n")
if boto3:
print(f"--- AWS MACIE STATUS ---")
macie_status = enable_macie(region)
print(f" Macie: {macie_status}")
findings = get_macie_findings(region)
print(f" Findings: {len(findings)}")
for f in findings[:5]:
print(f" [{f.get('severity', 'N/A')}] {f.get('title', 'N/A')} - {f.get('bucket', 'N/A')}")
if dlp_v2 and project_id:
print(f"\n--- GCP DLP SCAN ---")
sample = "Contact John Doe at john@example.com, SSN 123-45-6789, CC 4111-1111-1111-1111"
findings = scan_text_with_gcp_dlp(project_id, sample)
if findings:
for f in findings:
print(f" [{f['likelihood']}] {f['info_type']}: {f['quote']}")
print(f"\n{'='*60}\n")
def main():
parser = argparse.ArgumentParser(description="Cloud DLP Data Protection Agent")
parser.add_argument("--gcp-project", help="GCP project ID for DLP API")
parser.add_argument("--aws-region", default="us-east-1", help="AWS region for Macie")
parser.add_argument("--scan-text", help="Text to scan for sensitive data")
parser.add_argument("--scan-buckets", nargs="+", help="S3 bucket names to scan with Macie")
parser.add_argument("--report", action="store_true", help="Generate DLP report")
parser.add_argument("--output", help="Save report to JSON")
args = parser.parse_args()
if args.scan_text and args.gcp_project:
findings = scan_text_with_gcp_dlp(args.gcp_project, args.scan_text)
print(json.dumps(findings, indent=2))
elif args.scan_buckets:
result = create_macie_classification_job(args.aws_region, args.scan_buckets, "dlp-agent-scan")
print(json.dumps(result, indent=2))
elif args.report:
run_dlp_report(args.gcp_project, args.aws_region)
else:
parser.print_help()
if __name__ == "__main__":
main()