#!/usr/bin/env python3
"""Agent for analyzing Certificate Transparency logs for phishing detection."""

import json
import argparse
from datetime import datetime

import requests
from pycrtsh import Crtsh


def search_certificates(domain, include_expired=False):
    """Search crt.sh for certificates matching a domain."""
    c = Crtsh()
    certs = c.search(domain)
    if not include_expired:
        now = datetime.utcnow()
        certs = [cert for cert in certs if cert.get("not_after")
                 and datetime.strptime(str(cert["not_after"]), "%Y-%m-%dT%H:%M:%S") > now]
    return certs


def get_certificate_details(cert_id):
    """Get full certificate details from crt.sh by ID."""
    c = Crtsh()
    return c.get(cert_id, type="id")


def search_crtsh_api(domain):
    """Query crt.sh REST API directly for certificate records."""
    url = f"https://crt.sh/?q={domain}&output=json"
    resp = requests.get(url, timeout=30)
    resp.raise_for_status()
    return resp.json()


def levenshtein_distance(s1, s2):
    """Compute Levenshtein distance between two strings."""
    if len(s1) < len(s2):
        return levenshtein_distance(s2, s1)
    if len(s2) == 0:
        return len(s1)
    prev_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        curr_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = prev_row[j + 1] + 1
            deletions = curr_row[j] + 1
            substitutions = prev_row[j] + (c1 != c2)
            curr_row.append(min(insertions, deletions, substitutions))
        prev_row = curr_row
    return prev_row[-1]


def detect_typosquatting(target_domain, ct_records, max_distance=3):
    """Detect typosquatting domains using Levenshtein distance."""
    base = target_domain.split(".")[0]
    suspicious = []
    seen = set()
    for record in ct_records:
        domain = record.get("common_name", "") or record.get("name_value", "")
        if not domain or domain in seen:
            continue
        seen.add(domain)
        candidate_base = domain.split(".")[0].lstrip("*").lstrip(".")
        if candidate_base == base:
            continue
        dist = levenshtein_distance(base, candidate_base)
        if 0 < dist <= max_distance:
            suspicious.append({
                "domain": domain,
                "distance": dist,
                "issuer": record.get("issuer_name", ""),
                "not_before": record.get("not_before", ""),
                "not_after": record.get("not_after", ""),
            })
    return sorted(suspicious, key=lambda x: x["distance"])


def detect_unauthorized_cas(ct_records, allowed_cas):
    """Find certificates issued by unauthorized Certificate Authorities."""
    unauthorized = []
    for record in ct_records:
        issuer = record.get("issuer_name", "")
        if issuer and not any(ca.lower() in issuer.lower() for ca in allowed_cas):
            unauthorized.append({
                "domain": record.get("common_name", ""),
                "issuer": issuer,
                "not_before": record.get("not_before", ""),
                "cert_id": record.get("id"),
            })
    return unauthorized


def monitor_new_certificates(domain, hours_back=24):
    """Find certificates issued in the last N hours."""
    records = search_crtsh_api(f"%.{domain}")
    cutoff = datetime.utcnow().timestamp() - (hours_back * 3600)
    recent = []
    for r in records:
        not_before = r.get("not_before", "")
        if not_before:
            try:
                cert_time = datetime.strptime(not_before, "%Y-%m-%dT%H:%M:%S")
                if cert_time.timestamp() > cutoff:
                    recent.append({
                        "domain": r.get("common_name", ""),
                        "issuer": r.get("issuer_name", ""),
                        "not_before": not_before,
                        "name_value": r.get("name_value", ""),
                    })
            except ValueError:
                continue
    return recent


def find_wildcard_certificates(ct_records):
    """Identify wildcard certificates that could cover many subdomains."""
    wildcards = []
    for r in ct_records:
        name = r.get("common_name", "") or r.get("name_value", "")
        if name.startswith("*."):
            wildcards.append({
                "domain": name,
                "issuer": r.get("issuer_name", ""),
                "not_before": r.get("not_before", ""),
                "not_after": r.get("not_after", ""),
            })
    return wildcards


def main():
    parser = argparse.ArgumentParser(description="Certificate Transparency Analysis Agent")
    parser.add_argument("--domain", required=True, help="Target domain to monitor")
    parser.add_argument("--allowed-cas", nargs="*", default=["Let's Encrypt", "DigiCert",
                        "Sectigo", "Amazon", "Google Trust Services"])
    parser.add_argument("--output", default="ct_report.json")
    parser.add_argument("--action", choices=[
        "search", "typosquat", "unauthorized_ca", "monitor", "full_scan"
    ], default="full_scan")
    args = parser.parse_args()

    report = {"domain": args.domain, "generated_at": datetime.utcnow().isoformat(),
              "findings": {}}

    ct_records = search_crtsh_api(f"%.{args.domain}")
    report["findings"]["total_certificates"] = len(ct_records)
    print(f"[+] Found {len(ct_records)} certificates for {args.domain}")

    if args.action in ("typosquat", "full_scan"):
        typos = detect_typosquatting(args.domain, ct_records)
        report["findings"]["typosquatting"] = typos
        print(f"[+] Typosquatting domains: {len(typos)}")

    if args.action in ("unauthorized_ca", "full_scan"):
        unauth = detect_unauthorized_cas(ct_records, args.allowed_cas)
        report["findings"]["unauthorized_cas"] = unauth[:50]
        print(f"[+] Unauthorized CA certs: {len(unauth)}")

    if args.action in ("monitor", "full_scan"):
        recent = monitor_new_certificates(args.domain)
        report["findings"]["recent_24h"] = recent
        print(f"[+] Certificates issued in last 24h: {len(recent)}")

    wildcards = find_wildcard_certificates(ct_records)
    report["findings"]["wildcard_certs"] = wildcards
    print(f"[+] Wildcard certificates: {len(wildcards)}")

    with open(args.output, "w") as f:
        json.dump(report, f, indent=2, default=str)
    print(f"[+] Report saved to {args.output}")


if __name__ == "__main__":
    main()