mirror of
https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git
synced 2026-06-10 13:14:55 +03:00
Merge PR #28: Add bulk skill metadata validation script
This commit is contained in:
@@ -0,0 +1,30 @@
|
|||||||
|
# Skill Validation Tools
|
||||||
|
|
||||||
|
## validate-skill.py
|
||||||
|
|
||||||
|
Validate SKILL.md metadata before submitting a PR.
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Validate a single skill
|
||||||
|
python tools/validate-skill.py skills/my-new-skill/
|
||||||
|
|
||||||
|
# Validate all skills
|
||||||
|
python tools/validate-skill.py --all
|
||||||
|
```
|
||||||
|
|
||||||
|
### What it checks
|
||||||
|
|
||||||
|
- SKILL.md exists in the skill directory
|
||||||
|
- Valid YAML frontmatter (between `---` markers)
|
||||||
|
- Required fields present: `name`, `description`, `domain`, `subdomain`, `tags`
|
||||||
|
- Name is kebab-case, 1–64 characters
|
||||||
|
- Description is at least 50 characters (no upper limit; multi-line folded scalars are valid)
|
||||||
|
- Domain is `cybersecurity`
|
||||||
|
- Subdomain is from the allowed list
|
||||||
|
- Tags is a list with at least 2 items
|
||||||
|
|
||||||
|
### Requirements
|
||||||
|
|
||||||
|
Python 3.8+ (stdlib only, no external dependencies)
|
||||||
Executable
+291
@@ -0,0 +1,291 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Validate SKILL.md metadata for the Anthropic-Cybersecurity-Skills repository.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python tools/validate-skill.py skills/my-skill/
|
||||||
|
python tools/validate-skill.py --all
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import glob
|
||||||
|
|
||||||
|
REQUIRED_FIELDS = ["name", "description", "domain", "subdomain", "tags"]
|
||||||
|
|
||||||
|
# Canonical subdomain → set of accepted aliases (including canonical itself).
|
||||||
|
# When a skill uses an alias, the validator accepts it but the canonical form
|
||||||
|
# is the first entry in each group below. New skills should use the canonical.
|
||||||
|
_SUBDOMAIN_ALIASES = {
|
||||||
|
# identity
|
||||||
|
"identity-access-management": {"identity-access-management", "identity-and-access-management", "identity-security"},
|
||||||
|
# zero-trust
|
||||||
|
"zero-trust-architecture": {"zero-trust-architecture", "zero-trust"},
|
||||||
|
# OT/ICS
|
||||||
|
"ot-ics-security": {"ot-ics-security", "ot-security"},
|
||||||
|
# SOC / security ops
|
||||||
|
"soc-operations": {"soc-operations", "security-operations"},
|
||||||
|
# red team
|
||||||
|
"red-teaming": {"red-teaming", "red-team"},
|
||||||
|
# standalone (no aliases)
|
||||||
|
"web-application-security": {"web-application-security", "application-security"},
|
||||||
|
"network-security": {"network-security"},
|
||||||
|
"penetration-testing": {"penetration-testing", "offensive-security"},
|
||||||
|
"digital-forensics": {"digital-forensics"},
|
||||||
|
"malware-analysis": {"malware-analysis"},
|
||||||
|
"threat-intelligence": {"threat-intelligence"},
|
||||||
|
"cloud-security": {"cloud-security"},
|
||||||
|
"container-security": {"container-security"},
|
||||||
|
"cryptography": {"cryptography"},
|
||||||
|
"vulnerability-management": {"vulnerability-management"},
|
||||||
|
"compliance-governance": {"compliance-governance", "governance-risk-compliance"},
|
||||||
|
"devsecops": {"devsecops"},
|
||||||
|
"threat-hunting": {"threat-hunting"},
|
||||||
|
"incident-response": {"incident-response"},
|
||||||
|
"endpoint-security": {"endpoint-security"},
|
||||||
|
"phishing-defense": {"phishing-defense", "social-engineering-defense"},
|
||||||
|
"api-security": {"api-security"},
|
||||||
|
"mobile-security": {"mobile-security"},
|
||||||
|
"ransomware-defense": {"ransomware-defense"},
|
||||||
|
"threat-detection": {"threat-detection"},
|
||||||
|
"blockchain-security": {"blockchain-security"},
|
||||||
|
"data-protection": {"data-protection"},
|
||||||
|
"deception-technology": {"deception-technology"},
|
||||||
|
"firmware-analysis": {"firmware-analysis", "firmware-security"},
|
||||||
|
"privacy-compliance": {"privacy-compliance"},
|
||||||
|
"purple-team": {"purple-team"},
|
||||||
|
"supply-chain-security": {"supply-chain-security"},
|
||||||
|
"wireless-security": {"wireless-security"},
|
||||||
|
"ai-security": {"ai-security"},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Flat set of all accepted subdomain values (canonical + aliases).
|
||||||
|
ALLOWED_SUBDOMAINS: set = {v for group in _SUBDOMAIN_ALIASES.values() for v in group}
|
||||||
|
|
||||||
|
# Reverse map: alias → canonical (for warning messages).
|
||||||
|
_ALIAS_TO_CANONICAL: dict = {}
|
||||||
|
for canonical, aliases in _SUBDOMAIN_ALIASES.items():
|
||||||
|
for alias in aliases:
|
||||||
|
_ALIAS_TO_CANONICAL[alias] = canonical
|
||||||
|
|
||||||
|
KEBAB_RE = re.compile(r"^[a-z0-9]+(-[a-z0-9]+)*$")
|
||||||
|
|
||||||
|
# Minimum description length. Other repo tooling uses 50 chars; align here.
|
||||||
|
DESCRIPTION_MIN_CHARS = 50
|
||||||
|
|
||||||
|
RED = "\033[91m"
|
||||||
|
GREEN = "\033[92m"
|
||||||
|
YELLOW = "\033[93m"
|
||||||
|
RESET = "\033[0m"
|
||||||
|
|
||||||
|
|
||||||
|
def parse_frontmatter(text):
|
||||||
|
"""Extract YAML frontmatter as a dict (simple stdlib-only parser).
|
||||||
|
|
||||||
|
Handles the common SKILL.md patterns:
|
||||||
|
- key: scalar value
|
||||||
|
- key: [inline, list]
|
||||||
|
- key:\n - list\n - items
|
||||||
|
- key: >- (folded scalar — content on following indented lines)
|
||||||
|
|
||||||
|
Edge case note: ``list_values`` is reset to ``[]`` whenever a new key
|
||||||
|
with a scalar value is encountered, so a list from a prior block cannot
|
||||||
|
leak into an unrelated key. The only remaining theoretical edge case is
|
||||||
|
a key with *no* value that is immediately followed by non-list, non-empty
|
||||||
|
lines that look like scalars — those lines are currently ignored (the key
|
||||||
|
is treated as having no value). This is acceptable for well-formed SKILL.md
|
||||||
|
files and matches the behaviour contributors expect.
|
||||||
|
"""
|
||||||
|
if not text.startswith("---"):
|
||||||
|
return None
|
||||||
|
end = text.find("---", 3)
|
||||||
|
if end == -1:
|
||||||
|
return None
|
||||||
|
block = text[3:end].strip()
|
||||||
|
data = {}
|
||||||
|
current_key = None
|
||||||
|
list_values: list = []
|
||||||
|
in_folded = False # True when we are collecting a YAML >- / > folded scalar
|
||||||
|
folded_lines: list = []
|
||||||
|
|
||||||
|
for line in block.split("\n"):
|
||||||
|
stripped = line.strip()
|
||||||
|
|
||||||
|
# Flush a completed folded scalar when we hit the next top-level key.
|
||||||
|
if in_folded and stripped and not line.startswith(" ") and not line.startswith("\t"):
|
||||||
|
if current_key and folded_lines:
|
||||||
|
data[current_key] = " ".join(folded_lines)
|
||||||
|
in_folded = False
|
||||||
|
folded_lines = []
|
||||||
|
current_key = None
|
||||||
|
|
||||||
|
if in_folded:
|
||||||
|
if stripped:
|
||||||
|
folded_lines.append(stripped)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not stripped or stripped.startswith("#"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Handle list items (must come before key: value to avoid misparse).
|
||||||
|
if stripped.startswith("- ") and current_key:
|
||||||
|
list_values.append(stripped[2:].strip().strip('"').strip("'"))
|
||||||
|
data[current_key] = list(list_values) # copy so future mutations don't leak
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Handle inline list: tags: [a, b, c]
|
||||||
|
m = re.match(r"^(\w[\w_-]*):\s*\[(.+)\]\s*$", stripped)
|
||||||
|
if m:
|
||||||
|
current_key = m.group(1)
|
||||||
|
items = [i.strip().strip('"').strip("'") for i in m.group(2).split(",")]
|
||||||
|
data[current_key] = items
|
||||||
|
list_values = list(items)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Handle key: >- or key: > (folded scalar start)
|
||||||
|
m = re.match(r"^(\w[\w_-]*):\s*>[-|]?\s*$", stripped)
|
||||||
|
if m:
|
||||||
|
current_key = m.group(1)
|
||||||
|
list_values = []
|
||||||
|
in_folded = True
|
||||||
|
folded_lines = []
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Handle key: value (plain scalar)
|
||||||
|
m = re.match(r'^(\w[\w_-]*):\s*(.*)$', stripped)
|
||||||
|
if m:
|
||||||
|
current_key = m.group(1)
|
||||||
|
val = m.group(2).strip().strip('"').strip("'")
|
||||||
|
list_values = [] # reset; new scalar key cannot inherit a prior list
|
||||||
|
if val:
|
||||||
|
data[current_key] = val
|
||||||
|
# If val is empty the key is present but value-less (e.g. start of block list)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Flush any trailing folded scalar.
|
||||||
|
if in_folded and current_key and folded_lines:
|
||||||
|
data[current_key] = " ".join(folded_lines)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def validate_skill(skill_dir):
|
||||||
|
"""Validate a single skill directory. Returns list of error strings."""
|
||||||
|
errors = []
|
||||||
|
skill_md = os.path.join(skill_dir, "SKILL.md")
|
||||||
|
|
||||||
|
if not os.path.isfile(skill_md):
|
||||||
|
return [f"SKILL.md not found in {skill_dir}"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(skill_md, encoding="utf-8") as f:
|
||||||
|
content = f.read()
|
||||||
|
except IOError as e:
|
||||||
|
return [f"Could not read SKILL.md: {e}"]
|
||||||
|
except UnicodeDecodeError as e:
|
||||||
|
return [f"Encoding error in SKILL.md (not valid UTF-8): {e}"]
|
||||||
|
|
||||||
|
fm = parse_frontmatter(content)
|
||||||
|
if fm is None:
|
||||||
|
return ["No valid YAML frontmatter found (must start with ---)"]
|
||||||
|
|
||||||
|
# Check required fields.
|
||||||
|
for field in REQUIRED_FIELDS:
|
||||||
|
if field not in fm:
|
||||||
|
errors.append(f"Missing required field: {field}")
|
||||||
|
|
||||||
|
# Validate name.
|
||||||
|
name = fm.get("name", "")
|
||||||
|
if name:
|
||||||
|
if not KEBAB_RE.match(name):
|
||||||
|
errors.append(
|
||||||
|
f"Name '{name}' is not valid kebab-case (lowercase letters, digits, hyphens only)"
|
||||||
|
)
|
||||||
|
if len(name) > 64:
|
||||||
|
errors.append(f"Name too long ({len(name)} chars, max 64)")
|
||||||
|
|
||||||
|
# Validate description.
|
||||||
|
desc = fm.get("description", "")
|
||||||
|
if isinstance(desc, list):
|
||||||
|
errors.append("Description must be a string value, not a list")
|
||||||
|
elif isinstance(desc, str):
|
||||||
|
if len(desc) < DESCRIPTION_MIN_CHARS:
|
||||||
|
errors.append(
|
||||||
|
f"Description too short ({len(desc)} chars, min {DESCRIPTION_MIN_CHARS})"
|
||||||
|
)
|
||||||
|
# No hard upper-limit enforced; multi-line folded scalars (>-) produce
|
||||||
|
# long strings that are valid and common in this repo.
|
||||||
|
|
||||||
|
# Validate domain.
|
||||||
|
domain = fm.get("domain", "")
|
||||||
|
if domain and domain != "cybersecurity":
|
||||||
|
errors.append(f"Domain must be 'cybersecurity', got '{domain}'")
|
||||||
|
|
||||||
|
# Validate subdomain.
|
||||||
|
subdomain = fm.get("subdomain", "")
|
||||||
|
if subdomain:
|
||||||
|
if subdomain not in ALLOWED_SUBDOMAINS:
|
||||||
|
errors.append(
|
||||||
|
f"Unknown subdomain '{subdomain}'. Allowed: {', '.join(sorted(ALLOWED_SUBDOMAINS))}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
canonical = _ALIAS_TO_CANONICAL.get(subdomain, subdomain)
|
||||||
|
if subdomain != canonical:
|
||||||
|
# Warn (non-blocking) — alias is accepted but canonical is preferred
|
||||||
|
print(
|
||||||
|
f"{YELLOW}WARN{RESET} subdomain '{subdomain}' is an alias;"
|
||||||
|
f" canonical form is '{canonical}'"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Validate tags.
|
||||||
|
tags = fm.get("tags", [])
|
||||||
|
if isinstance(tags, str):
|
||||||
|
tags = [tags]
|
||||||
|
if len(tags) < 2:
|
||||||
|
errors.append(f"Need at least 2 tags, got {len(tags)}")
|
||||||
|
|
||||||
|
return errors
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print(f"Usage: {sys.argv[0]} <skill-dir> | --all")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if sys.argv[1] == "--all":
|
||||||
|
skill_dirs = sorted(glob.glob("skills/*/"))
|
||||||
|
if not skill_dirs:
|
||||||
|
print("ERROR: No skill directories found. Run from the repository root.")
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
skill_dirs = [sys.argv[1].rstrip("/") + "/"]
|
||||||
|
|
||||||
|
total = 0
|
||||||
|
passed = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
for skill_dir in skill_dirs:
|
||||||
|
if not os.path.isdir(skill_dir.rstrip("/")):
|
||||||
|
print(f"{RED}SKIP{RESET} {skill_dir} — not a directory")
|
||||||
|
continue
|
||||||
|
|
||||||
|
total += 1
|
||||||
|
errors = validate_skill(skill_dir.rstrip("/"))
|
||||||
|
|
||||||
|
name = os.path.basename(skill_dir.rstrip("/"))
|
||||||
|
if errors:
|
||||||
|
failed += 1
|
||||||
|
print(f"{RED}FAIL{RESET} {name}")
|
||||||
|
for e in errors:
|
||||||
|
print(f" {YELLOW}→ {e}{RESET}")
|
||||||
|
else:
|
||||||
|
passed += 1
|
||||||
|
print(f"{GREEN}PASS{RESET} {name}")
|
||||||
|
|
||||||
|
print(f"\n{'='*50}")
|
||||||
|
print(f"Total: {total} {GREEN}Passed: {passed}{RESET} {RED}Failed: {failed}{RESET}")
|
||||||
|
|
||||||
|
sys.exit(0 if failed == 0 else 1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user