From b53f3d49919815fafb02f7283b9b5e3722cc82a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julio=20C=C3=A9sar=20Su=C3=A1stegui?= Date: Fri, 3 Apr 2026 01:09:58 -0600 Subject: [PATCH] fix: add error handling for IOError/UnicodeDecodeError + sync ALLOWED_SUBDOMAINS - Wrap open() call in try/except for IOError and UnicodeDecodeError to report clean errors instead of crashing on encoding issues - Add all subdomains actually used by existing skills in the repo: identity-access-management (33 skills), security-operations (28), identity-and-access-management, zero-trust, ot-security, purple-team, red-team, ai-security, social-engineering-defense, and others - Remove identity-security as the canonical form is identity-access-management --- tools/README.md | 30 +++++++ tools/validate-skill.py | 182 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 212 insertions(+) create mode 100644 tools/README.md create mode 100755 tools/validate-skill.py diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 00000000..edf3c13d --- /dev/null +++ b/tools/README.md @@ -0,0 +1,30 @@ +# Skill Validation Tools + +## validate-skill.py + +Validate SKILL.md metadata before submitting a PR. + +### Usage + +```bash +# Validate a single skill +python tools/validate-skill.py skills/my-new-skill/ + +# Validate all skills +python tools/validate-skill.py --all +``` + +### What it checks + +- SKILL.md exists in the skill directory +- Valid YAML frontmatter (between `---` markers) +- Required fields present: `name`, `description`, `domain`, `subdomain`, `tags` +- Name is kebab-case, 1–64 characters +- Description is 20–500 characters +- Domain is `cybersecurity` +- Subdomain is from the allowed list +- Tags is a list with at least 2 items + +### Requirements + +Python 3.8+ (stdlib only, no external dependencies) diff --git a/tools/validate-skill.py b/tools/validate-skill.py new file mode 100755 index 00000000..96953de7 --- /dev/null +++ b/tools/validate-skill.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 +"""Validate SKILL.md metadata for the Anthropic-Cybersecurity-Skills repository. + +Usage: + python tools/validate-skill.py skills/my-skill/ + python tools/validate-skill.py --all +""" +import os +import re +import sys +import glob + +REQUIRED_FIELDS = ["name", "description", "domain", "subdomain", "tags"] + +ALLOWED_SUBDOMAINS = { + "web-application-security", "network-security", "penetration-testing", + "red-teaming", "digital-forensics", "malware-analysis", "threat-intelligence", + "cloud-security", "container-security", "identity-security", + "identity-access-management", "identity-and-access-management", + "cryptography", "vulnerability-management", "compliance-governance", + "zero-trust-architecture", "zero-trust", "ot-ics-security", "ot-security", + "devsecops", "threat-hunting", "soc-operations", "security-operations", + "incident-response", "endpoint-security", "phishing-defense", + "api-security", "mobile-security", "ransomware-defense", "threat-detection", + "application-security", "blockchain-security", "data-protection", + "deception-technology", "firmware-analysis", "firmware-security", + "governance-risk-compliance", "offensive-security", "privacy-compliance", + "purple-team", "red-team", "supply-chain-security", "wireless-security", + "ai-security", "social-engineering-defense", +} + +KEBAB_RE = re.compile(r"^[a-z0-9]+(-[a-z0-9]+)*$") + +RED = "\033[91m" +GREEN = "\033[92m" +YELLOW = "\033[93m" +RESET = "\033[0m" + + +def parse_frontmatter(text): + """Extract YAML frontmatter as a dict (simple parser, stdlib only).""" + if not text.startswith("---"): + return None + end = text.find("---", 3) + if end == -1: + return None + block = text[3:end].strip() + data = {} + current_key = None + list_values = [] + for line in block.split("\n"): + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + # Handle list items + if stripped.startswith("- ") and current_key: + list_values.append(stripped[2:].strip().strip('"').strip("'")) + data[current_key] = list_values + continue + # Handle inline list: tags: [a, b, c] + match = re.match(r"^(\w[\w_-]*):\s*\[(.+)\]\s*$", stripped) + if match: + current_key = match.group(1) + items = [i.strip().strip('"').strip("'") for i in match.group(2).split(",")] + data[current_key] = items + list_values = items + continue + # Handle key: value + match = re.match(r'^(\w[\w_-]*):\s*(.*)$', stripped) + if match: + current_key = match.group(1) + val = match.group(2).strip().strip('"').strip("'") + if val: + data[current_key] = val + list_values = [] + else: + list_values = [] + continue + return data + + +def validate_skill(skill_dir): + """Validate a single skill directory. Returns list of errors.""" + errors = [] + skill_md = os.path.join(skill_dir, "SKILL.md") + + if not os.path.isfile(skill_md): + return [f"SKILL.md not found in {skill_dir}"] + + try: + with open(skill_md, encoding="utf-8") as f: + content = f.read() + except IOError as e: + return [f"Could not read SKILL.md: {e}"] + except UnicodeDecodeError as e: + return [f"Encoding error in SKILL.md (not valid UTF-8): {e}"] + + fm = parse_frontmatter(content) + if fm is None: + return [f"No valid YAML frontmatter found (must start with ---)"] + + # Check required fields + for field in REQUIRED_FIELDS: + if field not in fm: + errors.append(f"Missing required field: {field}") + + # Validate name + name = fm.get("name", "") + if name: + if not KEBAB_RE.match(name): + errors.append(f"Name '{name}' is not valid kebab-case (lowercase, hyphens only)") + if len(name) > 64: + errors.append(f"Name too long ({len(name)} chars, max 64)") + + # Validate description + desc = fm.get("description", "") + if isinstance(desc, str): + if len(desc) < 20: + errors.append(f"Description too short ({len(desc)} chars, min 20)") + if len(desc) > 500: + errors.append(f"Description too long ({len(desc)} chars, max 500)") + + # Validate domain + domain = fm.get("domain", "") + if domain and domain != "cybersecurity": + errors.append(f"Domain must be 'cybersecurity', got '{domain}'") + + # Validate subdomain + subdomain = fm.get("subdomain", "") + if subdomain and subdomain not in ALLOWED_SUBDOMAINS: + errors.append(f"Unknown subdomain '{subdomain}'. Allowed: {', '.join(sorted(ALLOWED_SUBDOMAINS))}") + + # Validate tags + tags = fm.get("tags", []) + if isinstance(tags, str): + tags = [tags] + if len(tags) < 2: + errors.append(f"Need at least 2 tags, got {len(tags)}") + + return errors + + +def main(): + if len(sys.argv) < 2: + print(f"Usage: {sys.argv[0]} | --all") + sys.exit(1) + + if sys.argv[1] == "--all": + skill_dirs = sorted(glob.glob("skills/*/")) + else: + skill_dirs = [sys.argv[1].rstrip("/") + "/"] + + total = 0 + passed = 0 + failed = 0 + + for skill_dir in skill_dirs: + if not os.path.isdir(skill_dir.rstrip("/")): + print(f"{RED}SKIP{RESET} {skill_dir} — not a directory") + continue + + total += 1 + errors = validate_skill(skill_dir.rstrip("/")) + + name = os.path.basename(skill_dir.rstrip("/")) + if errors: + failed += 1 + print(f"{RED}FAIL{RESET} {name}") + for e in errors: + print(f" {YELLOW}→ {e}{RESET}") + else: + passed += 1 + print(f"{GREEN}PASS{RESET} {name}") + + print(f"\n{'='*50}") + print(f"Total: {total} {GREEN}Passed: {passed}{RESET} {RED}Failed: {failed}{RESET}") + + sys.exit(0 if failed == 0 else 1) + + +if __name__ == "__main__": + main()