add validation infrastructure, fix "6" key bug in v1.3

- add keylayout XML parser (parse_keylayout.py) that extracts all key
  mappings, dead key compositions, modifier layers from .keylayout files
- add validation script (validate_layouts.py) that compares layouts against
  v1.3 reference with per-version exception support
- fix action id="6" outputting "p" instead of "6" in v1.3
- generate reference JSON for all 4 layout versions
- document known intentional differences: v1.2 (no ¬ dead key, § vs ẞ),
  v1.4 (ẞ on §/` caps, extra ¬ composition)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-03 17:06:48 +01:00
parent e16b92051d
commit 7084817dab
8 changed files with 9802 additions and 1 deletions

View File

@@ -1123,7 +1123,7 @@
<when state="dead: Ω" output="⁵"/>
</action>
<action id="6">
<when state="none" output="p"/>
<when state="none" output="6"/>
<when state="dead: ¬" output="¾"/>
<when state="dead: Ω" output="⁶"/>
</action>

355
scripts/parse_keylayout.py Normal file
View File

@@ -0,0 +1,355 @@
#!/usr/bin/env python3
"""Parse Apple .keylayout XML files into a flat JSON representation.
Extracts all key mappings across modifier layers (base, Shift, Caps, Option,
Shift+Option, Caps+Option, Command+Option) and resolves dead key states to
their composed outputs.
Usage:
python3 scripts/parse_keylayout.py <file.keylayout> [--output file.json]
Output JSON structure:
{
"name": "EurKEY v1.3",
"modifierMap": { ... },
"keyMaps": {
"0": { "label": "Base", "keys": { "0": {"output": "a", ...}, ... } },
...
},
"actions": {
"a": {
"none": "a",
"dead: ^": "â",
...
},
...
},
"deadKeys": {
"dead: ^": { "terminator": "^", "compositions": { "a": "â", "A": "Â", ... } },
...
}
}
"""
import json
import re
import sys
import xml.etree.ElementTree as ET
from pathlib import Path
# macOS key code → physical key name (US ANSI/ISO layout)
KEY_CODE_NAMES = {
0: "A", 1: "S", 2: "D", 3: "F", 4: "H", 5: "G",
6: "Z", 7: "X", 8: "C", 9: "V", 10: "§/`",
11: "B", 12: "Q", 13: "W", 14: "E", 15: "R",
16: "Y", 17: "T", 18: "1", 19: "2", 20: "3",
21: "4", 22: "6", 23: "5", 24: "=", 25: "9",
26: "7", 27: "-", 28: "8", 29: "0", 30: "]",
31: "O", 32: "U", 33: "[", 34: "I", 35: "P",
36: "Return", 37: "L", 38: "J", 39: "'", 40: "K",
41: ";", 42: "\\", 43: ",", 44: "/", 45: "N",
46: "M", 47: ".", 48: "Tab", 49: "Space", 50: "`",
51: "Delete", 52: "Enter", 53: "Escape",
# numpad
65: "KP.", 67: "KP*", 69: "KP+", 75: "KP/",
76: "KPEnter", 78: "KP-", 81: "KP=",
82: "KP0", 83: "KP1", 84: "KP2", 85: "KP3",
86: "KP4", 87: "KP5", 88: "KP6", 89: "KP7",
91: "KP8", 92: "KP9",
# iso extra key
93: "ISO§", 94: "ISO_backslash", 95: "ISO_comma",
# function/navigation keys
96: "F5", 97: "F6", 98: "F7", 99: "F3",
100: "F8", 101: "F9", 103: "F11", 105: "F13",
107: "F14", 109: "F10", 111: "F12", 113: "F15",
114: "Help/Insert", 115: "Home", 116: "PageUp",
117: "ForwardDelete", 118: "F4", 119: "End",
120: "F2", 121: "PageDown", 122: "F1",
123: "Left", 124: "Right", 125: "Down", 126: "Up",
}
# modifier map index → human-readable label
MODIFIER_LABELS = {
0: "Base",
1: "Shift",
2: "Caps",
3: "Option",
4: "Shift+Option",
5: "Caps+Option",
6: "Command+Option",
7: "Control",
}
# key codes that are "typing" keys (not function/navigation/control)
TYPING_KEY_CODES = set(range(0, 50)) | {50, 93, 94, 95}
def _read_keylayout_xml(filepath):
"""Read a .keylayout file, working around XML 1.1 control character references.
Apple .keylayout files declare XML 1.1 and use numeric character references
for control characters (&#x0001; through &#x001F;) that are invalid in XML 1.0.
Python's ElementTree only supports XML 1.0, so we convert control character
references to placeholder tokens, parse, then restore them.
"""
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
# downgrade XML declaration from 1.1 to 1.0
content = content.replace('version="1.1"', 'version="1.0"')
# strip the DOCTYPE (references local DTD that may not exist)
content = re.sub(r'<!DOCTYPE[^>]*>', '', content)
# replace control character references with placeholder strings
# &#x0001; through &#x001F; and &#x007F; are problematic in XML 1.0
def replace_control_ref(m):
code_point = int(m.group(1), 16)
return f"__CTRL_U{code_point:04X}__"
content = re.sub(
r'&#x(000[0-9A-Fa-f]|001[0-9A-Fa-f]|007[Ff]);',
replace_control_ref,
content,
)
return content
def _restore_control_chars(text):
"""Restore placeholder tokens back to actual characters."""
if text is None:
return None
def restore(m):
code_point = int(m.group(1), 16)
return chr(code_point)
return re.sub(r'__CTRL_U([0-9A-F]{4})__', restore, text)
def parse_keylayout(filepath):
"""Parse a .keylayout XML file and return a structured dict."""
xml_content = _read_keylayout_xml(filepath)
root = ET.fromstring(xml_content)
result = {
"name": root.get("name", ""),
"group": root.get("group", ""),
"id": root.get("id", ""),
}
# parse modifier map
result["modifierMap"] = parse_modifier_map(root)
# parse all keyMapSets
key_map_sets = {}
for kms in root.findall(".//keyMapSet"):
kms_id = kms.get("id")
key_map_sets[kms_id] = parse_key_map_set(kms, key_map_sets)
# parse actions (dead key compositions)
actions = parse_actions(root)
result["actions"] = actions
# parse terminators
terminators = {}
for term in root.findall(".//terminators/when"):
state = term.get("state", "")
output = _restore_control_chars(term.get("output", ""))
terminators[state] = output
result["terminators"] = terminators
# resolve layouts
layouts = root.findall(".//layout")
# build resolved key maps with all key codes from all layout entries
resolved = {}
for layout in layouts:
map_set_id = layout.get("mapSet")
first_code = int(layout.get("first", "0"))
last_code = int(layout.get("last", "0"))
kms = key_map_sets.get(map_set_id, {})
for idx_str, keys in kms.items():
if idx_str not in resolved:
resolved[idx_str] = {}
for code_str, entry in keys.items():
code = int(code_str)
if first_code <= code <= last_code:
resolved[idx_str][code_str] = entry
# build the final keyMaps output
key_maps = {}
for idx_str in sorted(resolved.keys(), key=int):
idx = int(idx_str)
label = MODIFIER_LABELS.get(idx, f"Index {idx}")
keys = {}
for code_str in sorted(resolved[idx_str].keys(), key=int):
code = int(code_str)
entry = resolved[idx_str][code_str]
key_name = KEY_CODE_NAMES.get(code, f"code{code}")
key_info = {"code": code, "keyName": key_name}
if "output" in entry:
key_info["output"] = entry["output"]
if "action" in entry:
action_id = entry["action"]
key_info["action"] = action_id
# resolve the action's base output
if action_id in actions:
action_data = actions[action_id]
if "none" in action_data:
key_info["output"] = action_data["none"]
elif "next" in action_data:
key_info["deadKey"] = action_data["next"]
keys[code_str] = key_info
key_maps[idx_str] = {"label": label, "keys": keys}
result["keyMaps"] = key_maps
# build dead key summary
dead_keys = {}
for state_name, terminator in terminators.items():
compositions = {}
for action_id, action_data in actions.items():
if state_name in action_data:
compositions[action_id] = action_data[state_name]
dead_keys[state_name] = {
"terminator": terminator,
"compositions": compositions,
}
result["deadKeys"] = dead_keys
return result
def parse_modifier_map(root):
"""Parse the modifierMap element."""
mod_map = {}
for mm in root.findall(".//modifierMap"):
mm_id = mm.get("id")
default_index = mm.get("defaultIndex", "")
selects = []
for kms in mm.findall("keyMapSelect"):
map_index = kms.get("mapIndex", "")
modifiers = []
for mod in kms.findall("modifier"):
modifiers.append(mod.get("keys", ""))
selects.append({"mapIndex": map_index, "modifiers": modifiers})
mod_map[mm_id] = {"defaultIndex": default_index, "selects": selects}
return mod_map
def parse_key_map_set(kms_element, all_key_map_sets):
"""Parse a keyMapSet element, resolving baseMapSet/baseIndex references."""
result = {}
for km in kms_element.findall("keyMap"):
index = km.get("index")
keys = {}
# resolve base map set if specified
base_map_set_id = km.get("baseMapSet")
base_index = km.get("baseIndex")
if base_map_set_id and base_index:
base_kms = all_key_map_sets.get(base_map_set_id, {})
base_keys = base_kms.get(base_index, {})
keys.update(base_keys)
# parse keys in this keyMap (override base)
for key in km.findall("key"):
code = key.get("code")
entry = {}
if key.get("output") is not None:
entry["output"] = _restore_control_chars(key.get("output"))
if key.get("action") is not None:
entry["action"] = key.get("action")
keys[code] = entry
result[index] = keys
return result
def parse_actions(root):
"""Parse all action elements into a dict of action_id → {state → output/next}."""
actions = {}
for action in root.findall(".//actions/action"):
action_id = action.get("id")
states = {}
for when in action.findall("when"):
state = when.get("state", "none")
if when.get("output") is not None:
states[state] = _restore_control_chars(when.get("output"))
elif when.get("next") is not None:
if state == "none":
states["next"] = when.get("next")
else:
states[state] = f"{when.get('next')}"
actions[action_id] = states
return actions
def format_char(c):
"""Format a character for display, showing control chars as hex."""
if len(c) == 1:
cp = ord(c)
if cp < 0x20 or cp == 0x7F:
return f"U+{cp:04X}"
if cp == 0xA0:
return "NBSP"
return c
def print_summary(data):
"""Print a human-readable summary of the parsed layout."""
print(f"Layout: {data['name']}")
print(f"Dead key states: {', '.join(data['deadKeys'].keys())}")
print()
for idx_str in sorted(data["keyMaps"].keys(), key=int):
km = data["keyMaps"][idx_str]
print(f"--- {km['label']} (index {idx_str}) ---")
for code_str in sorted(km["keys"].keys(), key=int):
code = int(code_str)
if code not in TYPING_KEY_CODES:
continue
ki = km["keys"][code_str]
key_name = ki["keyName"]
output = ki.get("output", "")
dead = ki.get("deadKey", "")
formatted = format_char(output) if output else ""
extra = f" [dead: {dead}]" if dead else ""
action = f" (action: {ki['action']})" if "action" in ki else ""
print(f" {key_name:>12s} (code {code:>3d}): {formatted}{extra}{action}")
print()
def main():
import argparse
parser = argparse.ArgumentParser(description="Parse Apple .keylayout XML files")
parser.add_argument("keylayout", help="Path to .keylayout file")
parser.add_argument("--output", "-o", help="Output JSON file path")
parser.add_argument("--summary", "-s", action="store_true",
help="Print human-readable summary")
args = parser.parse_args()
data = parse_keylayout(args.keylayout)
if args.summary:
print_summary(data)
if args.output:
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent="\t")
print(f"Written to {output_path}")
elif not args.summary:
json.dump(data, sys.stdout, ensure_ascii=False, indent="\t")
print()
if __name__ == "__main__":
main()

8
scripts/validate-layouts.sh Executable file
View File

@@ -0,0 +1,8 @@
#!/usr/bin/env bash
# Validate all EurKEY keylayout files against the v1.3 reference spec.
# Exit code 0 if all pass, 1 if any unexpected mismatches.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
exec python3 "${SCRIPT_DIR}/validate_layouts.py" "$@"

306
scripts/validate_layouts.py Normal file
View File

@@ -0,0 +1,306 @@
#!/usr/bin/env python3
"""Validate .keylayout files against the EurKEY v1.3 reference spec.
Compares each layout version's key mappings and dead key compositions against
the reference, reporting mismatches. Supports per-version exception lists for
intentional differences.
Usage:
python3 scripts/validate_layouts.py [--verbose]
Exit code 0 if all layouts pass validation, 1 if any unexpected mismatches.
"""
import sys
from pathlib import Path
# import the parser
sys.path.insert(0, str(Path(__file__).parent))
from parse_keylayout import parse_keylayout, TYPING_KEY_CODES, MODIFIER_LABELS, KEY_CODE_NAMES
BUNDLE_DIR = Path(__file__).parent.parent / "EurKey-macOS.bundle" / "Contents" / "Resources"
# modifier indices that contain meaningful typing output
# (exclude index 6 = Command+Option and 7 = Control — these are system shortcuts)
VALIDATED_MODIFIER_INDICES = {"0", "1", "2", "3", "4", "5"}
def load_layout(version):
"""Parse a keylayout file for the given version string."""
path = BUNDLE_DIR / f"EurKEY {version}.keylayout"
if not path.exists():
print(f"ERROR: {path} not found")
sys.exit(2)
return parse_keylayout(str(path))
def compare_key_maps(reference, target, exceptions):
"""Compare key maps between reference and target layouts.
Returns a list of (modifier_label, key_name, code, ref_output, target_output) tuples
for each mismatch that is not in the exceptions list.
"""
mismatches = []
# build terminator→state name maps for dead key comparison
# build state_name→terminator for both versions
ref_state_to_term = {name: dk["terminator"] for name, dk in reference["deadKeys"].items()}
tgt_state_to_term = {name: dk["terminator"] for name, dk in target["deadKeys"].items()}
for idx_str in VALIDATED_MODIFIER_INDICES:
ref_km = reference["keyMaps"].get(idx_str, {}).get("keys", {})
tgt_km = target["keyMaps"].get(idx_str, {}).get("keys", {})
mod_label = MODIFIER_LABELS.get(int(idx_str), f"Index {idx_str}")
for code_str in ref_km:
code = int(code_str)
if code not in TYPING_KEY_CODES:
continue
ref_key = ref_km[code_str]
tgt_key = tgt_km.get(code_str, {})
ref_output = ref_key.get("output", "")
tgt_output = tgt_key.get("output", "")
ref_dead = ref_key.get("deadKey", "")
tgt_dead = tgt_key.get("deadKey", "")
key_name = KEY_CODE_NAMES.get(code, f"code{code}")
# check for exception
exc_key = f"{idx_str}:{code_str}"
if exc_key in exceptions:
expected = exceptions[exc_key]
if expected.get("output") == tgt_output or expected.get("deadKey") == tgt_dead:
continue
# exception exists but value doesn't match → still a mismatch
mismatches.append((
mod_label, key_name, code,
f"{ref_output or ref_dead} (ref)",
f"{tgt_output or tgt_dead} (got, expected exception: {expected})",
))
continue
# compare dead keys by terminator (state names may differ)
if ref_dead or tgt_dead:
ref_term = ref_state_to_term.get(ref_dead, ref_dead)
tgt_term = tgt_state_to_term.get(tgt_dead, tgt_dead)
if ref_term == tgt_term:
continue # same dead key, different name
# dead keys differ
ref_display = f"[dead: {ref_dead}{ref_term}]"
tgt_display = f"[dead: {tgt_dead}{tgt_term}]" if tgt_dead else tgt_output or "[missing]"
mismatches.append((mod_label, key_name, code, ref_display, tgt_display))
continue
# compare regular outputs
if ref_output != tgt_output:
if not tgt_output and not tgt_dead:
tgt_display = "[missing]"
else:
tgt_display = tgt_output
mismatches.append((mod_label, key_name, code, ref_output, tgt_display))
return mismatches
def _build_terminator_map(data):
"""Build a mapping from terminator character → dead key data.
Different layout versions may use different state names (e.g., "dead: ^" vs "7")
but the same terminator character. This allows matching by terminator.
"""
return {dk["terminator"]: (name, dk) for name, dk in data["deadKeys"].items()}
def _composition_output_set(compositions):
"""Extract the set of output characters from a dead key's compositions.
Since action IDs differ between versions (e.g., "a" vs "a61"), we compare
by the set of output characters produced, not by action ID.
"""
return set(compositions.values())
def compare_dead_keys(reference, target, exceptions):
"""Compare dead key compositions between reference and target.
Matches dead key states by their terminator character (since state names
may differ between versions). Compares composition output sets.
Returns a list of (dead_key_state, detail, ref_value, target_value) tuples.
"""
mismatches = []
ref_by_term = _build_terminator_map(reference)
tgt_by_term = _build_terminator_map(target)
for terminator, (ref_name, ref_dk) in ref_by_term.items():
if ref_name in exceptions.get("_dead_key_skip", []):
continue
if terminator not in tgt_by_term:
mismatches.append((ref_name, "*", "present", "missing"))
continue
_, tgt_dk = tgt_by_term[terminator]
# compare composition output sets
ref_outputs = _composition_output_set(ref_dk["compositions"])
tgt_outputs = _composition_output_set(tgt_dk["compositions"])
only_ref = ref_outputs - tgt_outputs
only_tgt = tgt_outputs - ref_outputs
for out in sorted(only_ref):
exc_key = f"dead:{ref_name}:output:{out}"
if exc_key not in exceptions:
mismatches.append((ref_name, f"output {out}", "present", "missing"))
for out in sorted(only_tgt):
exc_key = f"dead:{ref_name}:extra:{out}"
if exc_key not in exceptions:
mismatches.append((ref_name, f"output {out}", "missing", "present"))
return mismatches
def format_char_display(c):
"""Format a character for display."""
if not c or c in ("[missing]", "missing", "present"):
return c
if len(c) == 1:
cp = ord(c)
if cp < 0x20 or cp == 0x7F:
return f"U+{cp:04X}"
return c
# --- per-version exception definitions ---
# format: {"modifierIndex:keyCode": {"output": "expected_value"}}
# or {"_dead_key_skip": ["state_name", ...]} to skip entire dead key states
# v1.2 predates v1.3 — known differences documented here
V1_2_EXCEPTIONS = {
# Shift+Option S: v1.2 has § where v1.3 has ẞ (capital sharp s)
"4:1": {"output": "§"},
# v1.2 does not have the ¬ (negation) dead key — added in v1.3
"_dead_key_skip": ["dead: ¬"],
}
# v1.4 differences from v1.3:
# - §/` key (code 10) in Caps/Caps+Option outputs ẞ instead of §
# - ¬ dead key has an extra ¬ composition (self-referencing)
V1_4_EXCEPTIONS = {
"2:10": {"output": ""}, # Caps: §/` → ẞ (capital sharp s)
"5:10": {"output": ""}, # Caps+Option: §/` → ẞ
"dead:dead: ¬:extra:¬": True, # extra ¬ composition in negation dead key
}
# v2.0 is a custom edition — skip validation for now, just document diffs
V2_0_EXCEPTIONS = {
"_skip_validation": True,
}
VERSIONS = {
"v1.2": {"file": "v1.2", "exceptions": V1_2_EXCEPTIONS, "label": "EurKEY v1.2"},
"v1.3": {"file": "v1.3", "exceptions": {}, "label": "EurKEY v1.3 (reference)"},
"v1.4": {"file": "v1.4", "exceptions": V1_4_EXCEPTIONS, "label": "EurKEY v1.4"},
"v2.0": {"file": "v2.0", "exceptions": V2_0_EXCEPTIONS, "label": "EurKEY v2.0 (custom)"},
}
def validate_version(version_key, reference):
"""Validate a single version against the reference. Returns (pass, mismatch_count)."""
config = VERSIONS[version_key]
exceptions = config["exceptions"]
if exceptions.get("_skip_validation"):
print(f"\n{'='*60}")
print(f" {config['label']} — SKIPPED (custom edition)")
print(f"{'='*60}")
return True, 0
target = load_layout(config["file"])
print(f"\n{'='*60}")
print(f" Validating {config['label']} against v1.3 reference")
print(f"{'='*60}")
# compare key maps
key_mismatches = compare_key_maps(reference, target, exceptions)
dk_mismatches = compare_dead_keys(reference, target, exceptions)
total = len(key_mismatches) + len(dk_mismatches)
if key_mismatches:
print(f"\n Key mapping mismatches ({len(key_mismatches)}):")
for mod_label, key_name, code, ref_out, tgt_out in key_mismatches:
print(f" {mod_label:>14s} | {key_name:>12s} (code {code:>3d}): "
f"ref={format_char_display(ref_out)} got={format_char_display(tgt_out)}")
if dk_mismatches:
print(f"\n Dead key mismatches ({len(dk_mismatches)}):")
for state, action_id, ref_out, tgt_out in dk_mismatches:
print(f" {state:>12s} + {action_id}: "
f"ref={format_char_display(ref_out)} got={format_char_display(tgt_out)}")
if total == 0:
print(f"\n PASS — no unexpected mismatches")
else:
print(f"\n FAIL — {total} unexpected mismatch(es)")
return total == 0, total
def self_validate(reference):
"""Validate that v1.3 matches itself (sanity check)."""
target = load_layout("v1.3")
key_mismatches = compare_key_maps(reference, target, {})
dk_mismatches = compare_dead_keys(reference, target, {})
total = len(key_mismatches) + len(dk_mismatches)
if total > 0:
print("INTERNAL ERROR: v1.3 does not match itself!")
for m in key_mismatches:
print(f" key: {m}")
for m in dk_mismatches:
print(f" dead: {m}")
return False
print(" Self-check: v1.3 matches itself ✓")
return True
def main():
print("EurKEY-macOS Layout Validation")
print("Reference: EurKEY v1.3")
# load reference
reference = load_layout("v1.3")
# sanity check
if not self_validate(reference):
sys.exit(2)
all_pass = True
total_mismatches = 0
for version_key in VERSIONS:
if version_key == "v1.3":
continue # skip self-comparison
passed, count = validate_version(version_key, reference)
if not passed:
all_pass = False
total_mismatches += count
print(f"\n{'='*60}")
if all_pass:
print(" ALL LAYOUTS PASS ✓")
else:
print(f" VALIDATION FAILED — {total_mismatches} total mismatch(es)")
print(f"{'='*60}")
sys.exit(0 if all_pass else 1)
if __name__ == "__main__":
main()

2282
spec/eurkey-v1.2-parsed.json Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

2276
spec/eurkey-v1.4-parsed.json Normal file

File diff suppressed because it is too large Load Diff

2281
spec/eurkey-v2.0-parsed.json Normal file

File diff suppressed because it is too large Load Diff