mirror of
https://github.com/felixfoertsch/EurKEY-macOS.git
synced 2026-04-16 06:28:28 +02:00
add validation infrastructure, fix "6" key bug in v1.3
- add keylayout XML parser (parse_keylayout.py) that extracts all key mappings, dead key compositions, modifier layers from .keylayout files - add validation script (validate_layouts.py) that compares layouts against v1.3 reference with per-version exception support - fix action id="6" outputting "p" instead of "6" in v1.3 - generate reference JSON for all 4 layout versions - document known intentional differences: v1.2 (no ¬ dead key, § vs ẞ), v1.4 (ẞ on §/` caps, extra ¬ composition) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
355
scripts/parse_keylayout.py
Normal file
355
scripts/parse_keylayout.py
Normal file
@@ -0,0 +1,355 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Parse Apple .keylayout XML files into a flat JSON representation.
|
||||
|
||||
Extracts all key mappings across modifier layers (base, Shift, Caps, Option,
|
||||
Shift+Option, Caps+Option, Command+Option) and resolves dead key states to
|
||||
their composed outputs.
|
||||
|
||||
Usage:
|
||||
python3 scripts/parse_keylayout.py <file.keylayout> [--output file.json]
|
||||
|
||||
Output JSON structure:
|
||||
{
|
||||
"name": "EurKEY v1.3",
|
||||
"modifierMap": { ... },
|
||||
"keyMaps": {
|
||||
"0": { "label": "Base", "keys": { "0": {"output": "a", ...}, ... } },
|
||||
...
|
||||
},
|
||||
"actions": {
|
||||
"a": {
|
||||
"none": "a",
|
||||
"dead: ^": "â",
|
||||
...
|
||||
},
|
||||
...
|
||||
},
|
||||
"deadKeys": {
|
||||
"dead: ^": { "terminator": "^", "compositions": { "a": "â", "A": "Â", ... } },
|
||||
...
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
|
||||
# macOS key code → physical key name (US ANSI/ISO layout)
|
||||
KEY_CODE_NAMES = {
|
||||
0: "A", 1: "S", 2: "D", 3: "F", 4: "H", 5: "G",
|
||||
6: "Z", 7: "X", 8: "C", 9: "V", 10: "§/`",
|
||||
11: "B", 12: "Q", 13: "W", 14: "E", 15: "R",
|
||||
16: "Y", 17: "T", 18: "1", 19: "2", 20: "3",
|
||||
21: "4", 22: "6", 23: "5", 24: "=", 25: "9",
|
||||
26: "7", 27: "-", 28: "8", 29: "0", 30: "]",
|
||||
31: "O", 32: "U", 33: "[", 34: "I", 35: "P",
|
||||
36: "Return", 37: "L", 38: "J", 39: "'", 40: "K",
|
||||
41: ";", 42: "\\", 43: ",", 44: "/", 45: "N",
|
||||
46: "M", 47: ".", 48: "Tab", 49: "Space", 50: "`",
|
||||
51: "Delete", 52: "Enter", 53: "Escape",
|
||||
# numpad
|
||||
65: "KP.", 67: "KP*", 69: "KP+", 75: "KP/",
|
||||
76: "KPEnter", 78: "KP-", 81: "KP=",
|
||||
82: "KP0", 83: "KP1", 84: "KP2", 85: "KP3",
|
||||
86: "KP4", 87: "KP5", 88: "KP6", 89: "KP7",
|
||||
91: "KP8", 92: "KP9",
|
||||
# iso extra key
|
||||
93: "ISO§", 94: "ISO_backslash", 95: "ISO_comma",
|
||||
# function/navigation keys
|
||||
96: "F5", 97: "F6", 98: "F7", 99: "F3",
|
||||
100: "F8", 101: "F9", 103: "F11", 105: "F13",
|
||||
107: "F14", 109: "F10", 111: "F12", 113: "F15",
|
||||
114: "Help/Insert", 115: "Home", 116: "PageUp",
|
||||
117: "ForwardDelete", 118: "F4", 119: "End",
|
||||
120: "F2", 121: "PageDown", 122: "F1",
|
||||
123: "Left", 124: "Right", 125: "Down", 126: "Up",
|
||||
}
|
||||
|
||||
# modifier map index → human-readable label
|
||||
MODIFIER_LABELS = {
|
||||
0: "Base",
|
||||
1: "Shift",
|
||||
2: "Caps",
|
||||
3: "Option",
|
||||
4: "Shift+Option",
|
||||
5: "Caps+Option",
|
||||
6: "Command+Option",
|
||||
7: "Control",
|
||||
}
|
||||
|
||||
# key codes that are "typing" keys (not function/navigation/control)
|
||||
TYPING_KEY_CODES = set(range(0, 50)) | {50, 93, 94, 95}
|
||||
|
||||
|
||||
def _read_keylayout_xml(filepath):
|
||||
"""Read a .keylayout file, working around XML 1.1 control character references.
|
||||
|
||||
Apple .keylayout files declare XML 1.1 and use numeric character references
|
||||
for control characters ( through ) that are invalid in XML 1.0.
|
||||
Python's ElementTree only supports XML 1.0, so we convert control character
|
||||
references to placeholder tokens, parse, then restore them.
|
||||
"""
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
# downgrade XML declaration from 1.1 to 1.0
|
||||
content = content.replace('version="1.1"', 'version="1.0"')
|
||||
|
||||
# strip the DOCTYPE (references local DTD that may not exist)
|
||||
content = re.sub(r'<!DOCTYPE[^>]*>', '', content)
|
||||
|
||||
# replace control character references with placeholder strings
|
||||
#  through  and  are problematic in XML 1.0
|
||||
def replace_control_ref(m):
|
||||
code_point = int(m.group(1), 16)
|
||||
return f"__CTRL_U{code_point:04X}__"
|
||||
|
||||
content = re.sub(
|
||||
r'&#x(000[0-9A-Fa-f]|001[0-9A-Fa-f]|007[Ff]);',
|
||||
replace_control_ref,
|
||||
content,
|
||||
)
|
||||
|
||||
return content
|
||||
|
||||
|
||||
def _restore_control_chars(text):
|
||||
"""Restore placeholder tokens back to actual characters."""
|
||||
if text is None:
|
||||
return None
|
||||
|
||||
def restore(m):
|
||||
code_point = int(m.group(1), 16)
|
||||
return chr(code_point)
|
||||
|
||||
return re.sub(r'__CTRL_U([0-9A-F]{4})__', restore, text)
|
||||
|
||||
|
||||
def parse_keylayout(filepath):
|
||||
"""Parse a .keylayout XML file and return a structured dict."""
|
||||
xml_content = _read_keylayout_xml(filepath)
|
||||
root = ET.fromstring(xml_content)
|
||||
|
||||
result = {
|
||||
"name": root.get("name", ""),
|
||||
"group": root.get("group", ""),
|
||||
"id": root.get("id", ""),
|
||||
}
|
||||
|
||||
# parse modifier map
|
||||
result["modifierMap"] = parse_modifier_map(root)
|
||||
|
||||
# parse all keyMapSets
|
||||
key_map_sets = {}
|
||||
for kms in root.findall(".//keyMapSet"):
|
||||
kms_id = kms.get("id")
|
||||
key_map_sets[kms_id] = parse_key_map_set(kms, key_map_sets)
|
||||
|
||||
# parse actions (dead key compositions)
|
||||
actions = parse_actions(root)
|
||||
result["actions"] = actions
|
||||
|
||||
# parse terminators
|
||||
terminators = {}
|
||||
for term in root.findall(".//terminators/when"):
|
||||
state = term.get("state", "")
|
||||
output = _restore_control_chars(term.get("output", ""))
|
||||
terminators[state] = output
|
||||
result["terminators"] = terminators
|
||||
|
||||
# resolve layouts
|
||||
layouts = root.findall(".//layout")
|
||||
|
||||
# build resolved key maps with all key codes from all layout entries
|
||||
resolved = {}
|
||||
for layout in layouts:
|
||||
map_set_id = layout.get("mapSet")
|
||||
first_code = int(layout.get("first", "0"))
|
||||
last_code = int(layout.get("last", "0"))
|
||||
kms = key_map_sets.get(map_set_id, {})
|
||||
|
||||
for idx_str, keys in kms.items():
|
||||
if idx_str not in resolved:
|
||||
resolved[idx_str] = {}
|
||||
for code_str, entry in keys.items():
|
||||
code = int(code_str)
|
||||
if first_code <= code <= last_code:
|
||||
resolved[idx_str][code_str] = entry
|
||||
|
||||
# build the final keyMaps output
|
||||
key_maps = {}
|
||||
for idx_str in sorted(resolved.keys(), key=int):
|
||||
idx = int(idx_str)
|
||||
label = MODIFIER_LABELS.get(idx, f"Index {idx}")
|
||||
keys = {}
|
||||
for code_str in sorted(resolved[idx_str].keys(), key=int):
|
||||
code = int(code_str)
|
||||
entry = resolved[idx_str][code_str]
|
||||
key_name = KEY_CODE_NAMES.get(code, f"code{code}")
|
||||
key_info = {"code": code, "keyName": key_name}
|
||||
|
||||
if "output" in entry:
|
||||
key_info["output"] = entry["output"]
|
||||
if "action" in entry:
|
||||
action_id = entry["action"]
|
||||
key_info["action"] = action_id
|
||||
# resolve the action's base output
|
||||
if action_id in actions:
|
||||
action_data = actions[action_id]
|
||||
if "none" in action_data:
|
||||
key_info["output"] = action_data["none"]
|
||||
elif "next" in action_data:
|
||||
key_info["deadKey"] = action_data["next"]
|
||||
|
||||
keys[code_str] = key_info
|
||||
key_maps[idx_str] = {"label": label, "keys": keys}
|
||||
|
||||
result["keyMaps"] = key_maps
|
||||
|
||||
# build dead key summary
|
||||
dead_keys = {}
|
||||
for state_name, terminator in terminators.items():
|
||||
compositions = {}
|
||||
for action_id, action_data in actions.items():
|
||||
if state_name in action_data:
|
||||
compositions[action_id] = action_data[state_name]
|
||||
dead_keys[state_name] = {
|
||||
"terminator": terminator,
|
||||
"compositions": compositions,
|
||||
}
|
||||
result["deadKeys"] = dead_keys
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def parse_modifier_map(root):
|
||||
"""Parse the modifierMap element."""
|
||||
mod_map = {}
|
||||
for mm in root.findall(".//modifierMap"):
|
||||
mm_id = mm.get("id")
|
||||
default_index = mm.get("defaultIndex", "")
|
||||
selects = []
|
||||
for kms in mm.findall("keyMapSelect"):
|
||||
map_index = kms.get("mapIndex", "")
|
||||
modifiers = []
|
||||
for mod in kms.findall("modifier"):
|
||||
modifiers.append(mod.get("keys", ""))
|
||||
selects.append({"mapIndex": map_index, "modifiers": modifiers})
|
||||
mod_map[mm_id] = {"defaultIndex": default_index, "selects": selects}
|
||||
return mod_map
|
||||
|
||||
|
||||
def parse_key_map_set(kms_element, all_key_map_sets):
|
||||
"""Parse a keyMapSet element, resolving baseMapSet/baseIndex references."""
|
||||
result = {}
|
||||
for km in kms_element.findall("keyMap"):
|
||||
index = km.get("index")
|
||||
keys = {}
|
||||
|
||||
# resolve base map set if specified
|
||||
base_map_set_id = km.get("baseMapSet")
|
||||
base_index = km.get("baseIndex")
|
||||
if base_map_set_id and base_index:
|
||||
base_kms = all_key_map_sets.get(base_map_set_id, {})
|
||||
base_keys = base_kms.get(base_index, {})
|
||||
keys.update(base_keys)
|
||||
|
||||
# parse keys in this keyMap (override base)
|
||||
for key in km.findall("key"):
|
||||
code = key.get("code")
|
||||
entry = {}
|
||||
if key.get("output") is not None:
|
||||
entry["output"] = _restore_control_chars(key.get("output"))
|
||||
if key.get("action") is not None:
|
||||
entry["action"] = key.get("action")
|
||||
keys[code] = entry
|
||||
|
||||
result[index] = keys
|
||||
return result
|
||||
|
||||
|
||||
def parse_actions(root):
|
||||
"""Parse all action elements into a dict of action_id → {state → output/next}."""
|
||||
actions = {}
|
||||
for action in root.findall(".//actions/action"):
|
||||
action_id = action.get("id")
|
||||
states = {}
|
||||
for when in action.findall("when"):
|
||||
state = when.get("state", "none")
|
||||
if when.get("output") is not None:
|
||||
states[state] = _restore_control_chars(when.get("output"))
|
||||
elif when.get("next") is not None:
|
||||
if state == "none":
|
||||
states["next"] = when.get("next")
|
||||
else:
|
||||
states[state] = f"→{when.get('next')}"
|
||||
actions[action_id] = states
|
||||
return actions
|
||||
|
||||
|
||||
def format_char(c):
|
||||
"""Format a character for display, showing control chars as hex."""
|
||||
if len(c) == 1:
|
||||
cp = ord(c)
|
||||
if cp < 0x20 or cp == 0x7F:
|
||||
return f"U+{cp:04X}"
|
||||
if cp == 0xA0:
|
||||
return "NBSP"
|
||||
return c
|
||||
|
||||
|
||||
def print_summary(data):
|
||||
"""Print a human-readable summary of the parsed layout."""
|
||||
print(f"Layout: {data['name']}")
|
||||
print(f"Dead key states: {', '.join(data['deadKeys'].keys())}")
|
||||
print()
|
||||
|
||||
for idx_str in sorted(data["keyMaps"].keys(), key=int):
|
||||
km = data["keyMaps"][idx_str]
|
||||
print(f"--- {km['label']} (index {idx_str}) ---")
|
||||
for code_str in sorted(km["keys"].keys(), key=int):
|
||||
code = int(code_str)
|
||||
if code not in TYPING_KEY_CODES:
|
||||
continue
|
||||
ki = km["keys"][code_str]
|
||||
key_name = ki["keyName"]
|
||||
output = ki.get("output", "")
|
||||
dead = ki.get("deadKey", "")
|
||||
formatted = format_char(output) if output else ""
|
||||
extra = f" [dead: {dead}]" if dead else ""
|
||||
action = f" (action: {ki['action']})" if "action" in ki else ""
|
||||
print(f" {key_name:>12s} (code {code:>3d}): {formatted}{extra}{action}")
|
||||
print()
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Parse Apple .keylayout XML files")
|
||||
parser.add_argument("keylayout", help="Path to .keylayout file")
|
||||
parser.add_argument("--output", "-o", help="Output JSON file path")
|
||||
parser.add_argument("--summary", "-s", action="store_true",
|
||||
help="Print human-readable summary")
|
||||
args = parser.parse_args()
|
||||
|
||||
data = parse_keylayout(args.keylayout)
|
||||
|
||||
if args.summary:
|
||||
print_summary(data)
|
||||
|
||||
if args.output:
|
||||
output_path = Path(args.output)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent="\t")
|
||||
print(f"Written to {output_path}")
|
||||
elif not args.summary:
|
||||
json.dump(data, sys.stdout, ensure_ascii=False, indent="\t")
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user