Files
mukul975 c47eed6a64 Production hardening: security fixes, code quality, 724 skills complete
- Fix 25 shell=True subprocess calls with list-based commands
- Fix 49 verify=False in defensive skills (env-var override)
- Add timeout to 231 HTTP/subprocess/socket calls
- Fix 6 SQL injection patterns with whitelist validation
- Replace 8 __import__() with standard imports
- Remove 701 unused imports across 442 files
- Add authorized-testing disclaimers to all offensive skills
- Complete 11 incomplete skill directories
- Expand 10 stub SKILL.md files with full content
- Fix 2 YAML parse errors in frontmatter
- Fix 5 pre-existing syntax errors
- Convert 22 hardcoded paths/ports to environment variables
- Back up 21 redundant skill pairs to .bak
- Fix 2 global declaration errors
- 724/724 skills with full folder anatomy (SKILL.md + agent.py + api-reference.md + LICENSE)
- 0 compile errors across all 724 agent.py files
2026-03-19 13:26:49 +01:00

166 lines
6.0 KiB
Python

#!/usr/bin/env python3
"""AWS Macie data classification agent using boto3 for S3 sensitive data discovery."""
import argparse
import json
import logging
import os
import sys
from datetime import datetime
from typing import List
try:
import boto3
from botocore.exceptions import ClientError
except ImportError:
sys.exit("boto3 required: pip install boto3")
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)
def get_macie_client(profile: str = "", region: str = "us-east-1"):
"""Create Macie2 client."""
session = boto3.Session(profile_name=profile) if profile else boto3.Session()
return session.client("macie2", region_name=region)
def enable_macie(client) -> dict:
"""Enable Macie in the account if not already enabled."""
try:
client.get_macie_session()
return {"status": "already_enabled"}
except ClientError:
try:
client.enable_macie(status="ENABLED")
return {"status": "enabled"}
except ClientError as exc:
return {"error": str(exc)}
def list_s3_buckets_summary(client) -> List[dict]:
"""Get Macie's summary of S3 bucket inventory."""
try:
resp = client.describe_buckets(criteria={}, maxResults=50)
buckets = []
for b in resp.get("buckets", []):
buckets.append({
"name": b.get("bucketName", ""),
"region": b.get("region", ""),
"classifiable_objects": b.get("classifiableObjectCount", 0),
"classifiable_size": b.get("classifiableSizeInBytes", 0),
"encryption": b.get("serverSideEncryption", {}).get("type", "NONE"),
"public_access": b.get("publicAccess", {}).get("effectivePermission", "NOT_PUBLIC"),
"shared_access": b.get("sharedAccess", "NOT_SHARED"),
})
return buckets
except ClientError as exc:
logger.error("describe_buckets failed: %s", exc)
return []
def create_classification_job(client, bucket_names: List[str], job_name: str) -> dict:
"""Create a one-time sensitive data discovery job for specified buckets."""
try:
resp = client.create_classification_job(
jobType="ONE_TIME",
name=job_name,
s3JobDefinition={
"bucketDefinitions": [{
"accountId": boto3.client("sts").get_caller_identity()["Account"],
"buckets": bucket_names,
}]
},
description=f"Scan {len(bucket_names)} buckets for sensitive data",
)
return {"job_id": resp["jobId"], "job_arn": resp["jobArn"]}
except ClientError as exc:
return {"error": str(exc)}
def get_finding_statistics(client) -> dict:
"""Get statistics on Macie findings by severity and type."""
try:
by_severity = client.get_finding_statistics(
groupBy="severity.description",
)
by_type = client.get_finding_statistics(
groupBy="type",
)
return {
"by_severity": by_severity.get("countsBySeverity", []),
"by_type": by_type.get("countsByGroup", []),
}
except ClientError as exc:
return {"error": str(exc)}
def list_findings(client, severity: str = "High", max_results: int = 50) -> List[dict]:
"""List recent Macie findings filtered by severity."""
try:
resp = client.list_findings(
findingCriteria={
"criterion": {
"severity.description": {"eq": [severity]}
}
},
maxResults=max_results,
)
finding_ids = resp.get("findingIds", [])
if not finding_ids:
return []
details = client.get_findings(findingIds=finding_ids[:20])
return [{
"id": f.get("id", ""),
"type": f.get("type", ""),
"severity": f.get("severity", {}).get("description", ""),
"title": f.get("title", ""),
"bucket": f.get("resourcesAffected", {}).get("s3Bucket", {}).get("name", ""),
"count": f.get("count", 0),
"created": f.get("createdAt", ""),
} for f in details.get("findings", [])]
except ClientError as exc:
return [{"error": str(exc)}]
def generate_report(client) -> dict:
"""Generate Macie data classification report."""
report = {"analysis_date": datetime.utcnow().isoformat()}
report["macie_status"] = enable_macie(client)
report["bucket_inventory"] = list_s3_buckets_summary(client)
report["finding_statistics"] = get_finding_statistics(client)
report["high_findings"] = list_findings(client, "High")
report["critical_findings"] = list_findings(client, "Critical")
public_buckets = [b for b in report["bucket_inventory"]
if b.get("public_access") != "NOT_PUBLIC"]
report["public_buckets"] = public_buckets
report["summary"] = {
"total_buckets": len(report["bucket_inventory"]),
"public_buckets": len(public_buckets),
"high_findings": len(report["high_findings"]),
"critical_findings": len(report["critical_findings"]),
}
return report
def main():
parser = argparse.ArgumentParser(description="AWS Macie Data Classification Agent")
parser.add_argument("--profile", default="", help="AWS CLI profile")
parser.add_argument("--region", default="us-east-1")
parser.add_argument("--output-dir", default=".")
parser.add_argument("--output", default="macie_report.json")
args = parser.parse_args()
os.makedirs(args.output_dir, exist_ok=True)
client = get_macie_client(args.profile, args.region)
report = generate_report(client)
out_path = os.path.join(args.output_dir, args.output)
with open(out_path, "w") as f:
json.dump(report, f, indent=2, default=str)
logger.info("Report saved to %s", out_path)
print(json.dumps(report["summary"], indent=2))
if __name__ == "__main__":
main()