#!/usr/bin/env python3 """ Microsegmentation Policy Analyzer and Flow Validator Analyzes network flow data to identify microsegmentation opportunities, validates policies against observed traffic, and generates segmentation reports. """ import json import csv import sys from collections import defaultdict from datetime import datetime from pathlib import Path from typing import Optional def parse_flow_data(flow_file: str) -> list: """Parse network flow data from CSV or JSON format.""" flows = [] path = Path(flow_file) if path.suffix == ".csv": with open(flow_file) as f: reader = csv.DictReader(f) for row in reader: flows.append({ "src_ip": row.get("src_ip", ""), "dst_ip": row.get("dst_ip", ""), "src_port": int(row.get("src_port", 0)), "dst_port": int(row.get("dst_port", 0)), "protocol": row.get("protocol", "tcp").lower(), "bytes": int(row.get("bytes", 0)), "packets": int(row.get("packets", 0)), "timestamp": row.get("timestamp", ""), "src_label": row.get("src_label", ""), "dst_label": row.get("dst_label", ""), }) elif path.suffix == ".json": with open(flow_file) as f: flows = json.load(f) return flows def build_dependency_map(flows: list) -> dict: """Build an application dependency map from flow data.""" dep_map = defaultdict(lambda: defaultdict(lambda: { "ports": set(), "protocols": set(), "total_bytes": 0, "total_packets": 0, "flow_count": 0 })) for flow in flows: src = flow.get("src_label") or flow["src_ip"] dst = flow.get("dst_label") or flow["dst_ip"] port = flow["dst_port"] proto = flow["protocol"] dep_map[src][dst]["ports"].add(port) dep_map[src][dst]["protocols"].add(proto) dep_map[src][dst]["total_bytes"] += flow.get("bytes", 0) dep_map[src][dst]["total_packets"] += flow.get("packets", 0) dep_map[src][dst]["flow_count"] += 1 serializable = {} for src, destinations in dep_map.items(): serializable[src] = {} for dst, stats in destinations.items(): serializable[src][dst] = { "ports": sorted(list(stats["ports"])), "protocols": sorted(list(stats["protocols"])), "total_bytes": stats["total_bytes"], "total_packets": stats["total_packets"], "flow_count": stats["flow_count"], } return serializable def generate_segmentation_rules(dep_map: dict, default_action: str = "deny") -> list: """Generate microsegmentation rules from dependency map.""" rules = [] rule_id = 1 for src, destinations in dep_map.items(): for dst, stats in destinations.items(): for port in stats["ports"]: for proto in stats["protocols"]: rules.append({ "id": rule_id, "action": "allow", "src": src, "dst": dst, "port": port, "protocol": proto, "justification": f"Observed {stats['flow_count']} flows, {stats['total_bytes']} bytes", "status": "proposed", }) rule_id += 1 rules.append({ "id": rule_id, "action": default_action, "src": "any", "dst": "any", "port": "any", "protocol": "any", "justification": "Default deny - zero trust baseline", "status": "proposed", }) return rules def validate_policy_against_flows(policy_rules: list, flows: list) -> dict: """Validate segmentation policy against observed flows. Identify blocks and gaps.""" results = { "allowed_flows": 0, "blocked_flows": 0, "unmatched_flows": 0, "blocked_details": [], "unmatched_details": [], } allow_rules = [r for r in policy_rules if r["action"] == "allow"] has_default_deny = any( r["action"] == "deny" and r.get("src") == "any" and r.get("dst") == "any" for r in policy_rules ) for flow in flows: src = flow.get("src_label") or flow["src_ip"] dst = flow.get("dst_label") or flow["dst_ip"] port = flow["dst_port"] proto = flow["protocol"] matched = False for rule in allow_rules: src_match = rule["src"] in (src, "any") dst_match = rule["dst"] in (dst, "any") port_match = rule["port"] in (port, "any") proto_match = rule["protocol"] in (proto, "any") if src_match and dst_match and port_match and proto_match: results["allowed_flows"] += 1 matched = True break if not matched: if has_default_deny: results["blocked_flows"] += 1 results["blocked_details"].append({ "src": src, "dst": dst, "port": port, "protocol": proto, "timestamp": flow.get("timestamp", ""), }) else: results["unmatched_flows"] += 1 results["unmatched_details"].append({ "src": src, "dst": dst, "port": port, "protocol": proto, }) return results def identify_segmentation_zones(flows: list, workload_labels: dict) -> dict: """Identify natural segmentation zones from flow patterns and labels.""" zones = defaultdict(lambda: {"workloads": set(), "internal_flows": 0, "external_flows": 0}) for flow in flows: src_ip = flow["src_ip"] dst_ip = flow["dst_ip"] src_zone = workload_labels.get(src_ip, {}).get("application", "unknown") dst_zone = workload_labels.get(dst_ip, {}).get("application", "unknown") zones[src_zone]["workloads"].add(src_ip) zones[dst_zone]["workloads"].add(dst_ip) if src_zone == dst_zone: zones[src_zone]["internal_flows"] += 1 else: zones[src_zone]["external_flows"] += 1 zones[dst_zone]["external_flows"] += 1 serializable = {} for zone_name, stats in zones.items(): serializable[zone_name] = { "workloads": sorted(list(stats["workloads"])), "workload_count": len(stats["workloads"]), "internal_flows": stats["internal_flows"], "external_flows": stats["external_flows"], "isolation_ratio": round( stats["internal_flows"] / max(stats["internal_flows"] + stats["external_flows"], 1), 2 ), } return serializable def detect_anomalous_flows(flows: list, policy_rules: list) -> list: """Detect flows that violate segmentation policy and may indicate lateral movement.""" anomalies = [] allow_set = set() for rule in policy_rules: if rule["action"] == "allow": allow_set.add((rule["src"], rule["dst"], rule["port"], rule["protocol"])) for flow in flows: src = flow.get("src_label") or flow["src_ip"] dst = flow.get("dst_label") or flow["dst_ip"] port = flow["dst_port"] proto = flow["protocol"] is_allowed = ( (src, dst, port, proto) in allow_set or ("any", dst, port, proto) in allow_set or (src, "any", port, proto) in allow_set or (src, dst, "any", proto) in allow_set ) if not is_allowed: anomalies.append({ "src": src, "dst": dst, "port": port, "protocol": proto, "bytes": flow.get("bytes", 0), "timestamp": flow.get("timestamp", ""), "severity": "high" if port in [22, 3389, 445, 135] else "medium", "reason": "Flow not covered by any allow rule", }) return sorted(anomalies, key=lambda x: x["severity"]) def generate_segmentation_report(flows: list, policy_rules: list, workload_labels: dict) -> dict: """Generate comprehensive microsegmentation report.""" dep_map = build_dependency_map(flows) zones = identify_segmentation_zones(flows, workload_labels) validation = validate_policy_against_flows(policy_rules, flows) anomalies = detect_anomalous_flows(flows, policy_rules) unique_sources = set(f.get("src_label") or f["src_ip"] for f in flows) unique_dests = set(f.get("dst_label") or f["dst_ip"] for f in flows) unique_ports = set(f["dst_port"] for f in flows) return { "generated": datetime.now().isoformat(), "summary": { "total_flows": len(flows), "unique_sources": len(unique_sources), "unique_destinations": len(unique_dests), "unique_ports": len(unique_ports), "segmentation_zones": len(zones), "policy_rules": len(policy_rules), "allowed_flows": validation["allowed_flows"], "blocked_flows": validation["blocked_flows"], "anomalies_detected": len(anomalies), }, "zones": zones, "validation": validation, "anomalies": anomalies[:50], "dependency_map_summary": { src: len(dsts) for src, dsts in dep_map.items() }, } def main(): import argparse parser = argparse.ArgumentParser(description="Microsegmentation Policy Analyzer") parser.add_argument("--flows", type=str, help="Path to flow data (CSV or JSON)") parser.add_argument("--policy", type=str, help="Path to policy rules JSON") parser.add_argument("--labels", type=str, help="Path to workload labels JSON") parser.add_argument("--action", choices=["map", "rules", "validate", "report", "anomalies"], default="report", help="Action to perform") parser.add_argument("--output", type=str, default="segmentation_report.json", help="Output file") args = parser.parse_args() if not args.flows: parser.print_help() return flows = parse_flow_data(args.flows) print(f"Loaded {len(flows)} flows") policy_rules = [] if args.policy: with open(args.policy) as f: policy_rules = json.load(f) workload_labels = {} if args.labels: with open(args.labels) as f: workload_labels = json.load(f) if args.action == "map": dep_map = build_dependency_map(flows) with open(args.output, "w") as f: json.dump(dep_map, f, indent=2) print(f"Dependency map: {len(dep_map)} sources mapped") elif args.action == "rules": dep_map = build_dependency_map(flows) rules = generate_segmentation_rules(dep_map) with open(args.output, "w") as f: json.dump(rules, f, indent=2) print(f"Generated {len(rules)} proposed rules") elif args.action == "validate": if not policy_rules: print("Error: --policy required for validation") return results = validate_policy_against_flows(policy_rules, flows) with open(args.output, "w") as f: json.dump(results, f, indent=2) print(f"Allowed: {results['allowed_flows']}, Blocked: {results['blocked_flows']}") elif args.action == "anomalies": if not policy_rules: print("Error: --policy required for anomaly detection") return anomalies = detect_anomalous_flows(flows, policy_rules) with open(args.output, "w") as f: json.dump(anomalies, f, indent=2) print(f"Detected {len(anomalies)} anomalous flows") elif args.action == "report": report = generate_segmentation_report(flows, policy_rules, workload_labels) with open(args.output, "w") as f: json.dump(report, f, indent=2) print(f"Report generated: {args.output}") print(f" Zones: {report['summary']['segmentation_zones']}") print(f" Anomalies: {report['summary']['anomalies_detected']}") print(f"Output saved to {args.output}") if __name__ == "__main__": main()