fix: add error handling for IOError/UnicodeDecodeError + sync ALLOWED_SUBDOMAINS

- Wrap open() call in try/except for IOError and UnicodeDecodeError
  to report clean errors instead of crashing on encoding issues
- Add all subdomains actually used by existing skills in the repo:
  identity-access-management (33 skills), security-operations (28),
  identity-and-access-management, zero-trust, ot-security, purple-team,
  red-team, ai-security, social-engineering-defense, and others
- Remove identity-security as the canonical form is identity-access-management
This commit is contained in:
Julio César Suástegui
2026-04-03 01:09:58 -06:00
parent c15f73db46
commit b53f3d4991
2 changed files with 212 additions and 0 deletions
+30
View File
@@ -0,0 +1,30 @@
# Skill Validation Tools
## validate-skill.py
Validate SKILL.md metadata before submitting a PR.
### Usage
```bash
# Validate a single skill
python tools/validate-skill.py skills/my-new-skill/
# Validate all skills
python tools/validate-skill.py --all
```
### What it checks
- SKILL.md exists in the skill directory
- Valid YAML frontmatter (between `---` markers)
- Required fields present: `name`, `description`, `domain`, `subdomain`, `tags`
- Name is kebab-case, 164 characters
- Description is 20500 characters
- Domain is `cybersecurity`
- Subdomain is from the allowed list
- Tags is a list with at least 2 items
### Requirements
Python 3.8+ (stdlib only, no external dependencies)
+182
View File
@@ -0,0 +1,182 @@
#!/usr/bin/env python3
"""Validate SKILL.md metadata for the Anthropic-Cybersecurity-Skills repository.
Usage:
python tools/validate-skill.py skills/my-skill/
python tools/validate-skill.py --all
"""
import os
import re
import sys
import glob
REQUIRED_FIELDS = ["name", "description", "domain", "subdomain", "tags"]
ALLOWED_SUBDOMAINS = {
"web-application-security", "network-security", "penetration-testing",
"red-teaming", "digital-forensics", "malware-analysis", "threat-intelligence",
"cloud-security", "container-security", "identity-security",
"identity-access-management", "identity-and-access-management",
"cryptography", "vulnerability-management", "compliance-governance",
"zero-trust-architecture", "zero-trust", "ot-ics-security", "ot-security",
"devsecops", "threat-hunting", "soc-operations", "security-operations",
"incident-response", "endpoint-security", "phishing-defense",
"api-security", "mobile-security", "ransomware-defense", "threat-detection",
"application-security", "blockchain-security", "data-protection",
"deception-technology", "firmware-analysis", "firmware-security",
"governance-risk-compliance", "offensive-security", "privacy-compliance",
"purple-team", "red-team", "supply-chain-security", "wireless-security",
"ai-security", "social-engineering-defense",
}
KEBAB_RE = re.compile(r"^[a-z0-9]+(-[a-z0-9]+)*$")
RED = "\033[91m"
GREEN = "\033[92m"
YELLOW = "\033[93m"
RESET = "\033[0m"
def parse_frontmatter(text):
"""Extract YAML frontmatter as a dict (simple parser, stdlib only)."""
if not text.startswith("---"):
return None
end = text.find("---", 3)
if end == -1:
return None
block = text[3:end].strip()
data = {}
current_key = None
list_values = []
for line in block.split("\n"):
stripped = line.strip()
if not stripped or stripped.startswith("#"):
continue
# Handle list items
if stripped.startswith("- ") and current_key:
list_values.append(stripped[2:].strip().strip('"').strip("'"))
data[current_key] = list_values
continue
# Handle inline list: tags: [a, b, c]
match = re.match(r"^(\w[\w_-]*):\s*\[(.+)\]\s*$", stripped)
if match:
current_key = match.group(1)
items = [i.strip().strip('"').strip("'") for i in match.group(2).split(",")]
data[current_key] = items
list_values = items
continue
# Handle key: value
match = re.match(r'^(\w[\w_-]*):\s*(.*)$', stripped)
if match:
current_key = match.group(1)
val = match.group(2).strip().strip('"').strip("'")
if val:
data[current_key] = val
list_values = []
else:
list_values = []
continue
return data
def validate_skill(skill_dir):
"""Validate a single skill directory. Returns list of errors."""
errors = []
skill_md = os.path.join(skill_dir, "SKILL.md")
if not os.path.isfile(skill_md):
return [f"SKILL.md not found in {skill_dir}"]
try:
with open(skill_md, encoding="utf-8") as f:
content = f.read()
except IOError as e:
return [f"Could not read SKILL.md: {e}"]
except UnicodeDecodeError as e:
return [f"Encoding error in SKILL.md (not valid UTF-8): {e}"]
fm = parse_frontmatter(content)
if fm is None:
return [f"No valid YAML frontmatter found (must start with ---)"]
# Check required fields
for field in REQUIRED_FIELDS:
if field not in fm:
errors.append(f"Missing required field: {field}")
# Validate name
name = fm.get("name", "")
if name:
if not KEBAB_RE.match(name):
errors.append(f"Name '{name}' is not valid kebab-case (lowercase, hyphens only)")
if len(name) > 64:
errors.append(f"Name too long ({len(name)} chars, max 64)")
# Validate description
desc = fm.get("description", "")
if isinstance(desc, str):
if len(desc) < 20:
errors.append(f"Description too short ({len(desc)} chars, min 20)")
if len(desc) > 500:
errors.append(f"Description too long ({len(desc)} chars, max 500)")
# Validate domain
domain = fm.get("domain", "")
if domain and domain != "cybersecurity":
errors.append(f"Domain must be 'cybersecurity', got '{domain}'")
# Validate subdomain
subdomain = fm.get("subdomain", "")
if subdomain and subdomain not in ALLOWED_SUBDOMAINS:
errors.append(f"Unknown subdomain '{subdomain}'. Allowed: {', '.join(sorted(ALLOWED_SUBDOMAINS))}")
# Validate tags
tags = fm.get("tags", [])
if isinstance(tags, str):
tags = [tags]
if len(tags) < 2:
errors.append(f"Need at least 2 tags, got {len(tags)}")
return errors
def main():
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <skill-dir> | --all")
sys.exit(1)
if sys.argv[1] == "--all":
skill_dirs = sorted(glob.glob("skills/*/"))
else:
skill_dirs = [sys.argv[1].rstrip("/") + "/"]
total = 0
passed = 0
failed = 0
for skill_dir in skill_dirs:
if not os.path.isdir(skill_dir.rstrip("/")):
print(f"{RED}SKIP{RESET} {skill_dir} — not a directory")
continue
total += 1
errors = validate_skill(skill_dir.rstrip("/"))
name = os.path.basename(skill_dir.rstrip("/"))
if errors:
failed += 1
print(f"{RED}FAIL{RESET} {name}")
for e in errors:
print(f" {YELLOW}{e}{RESET}")
else:
passed += 1
print(f"{GREEN}PASS{RESET} {name}")
print(f"\n{'='*50}")
print(f"Total: {total} {GREEN}Passed: {passed}{RESET} {RED}Failed: {failed}{RESET}")
sys.exit(0 if failed == 0 else 1)
if __name__ == "__main__":
main()