Files
EurKEY-macOS/scripts/parse_keylayout.py
Felix Förtsch 92422e1bf1 fix modifier key order, add EU badge template icons, rename build scripts
- fix modifier key order to Apple canonical: Option+Shift (not Shift+Option)
  across parser, validator, PDF generator, website keyboard viewer, README
- add EU badge template icons for v1.2/v1.3/v1.4 matching Apple's built-in
  keyboard layout icon style (edge-to-edge rounded square, text knockout)
- add build-icons.sh to generate .icns from SVG source via rsvg-convert
- rename create-dmg.sh → build-dmg.sh, update CI workflows
- add website feature icons (install, pdf, versions)
- update website icon to star-on-key design

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-14 11:01:56 +01:00

364 lines
11 KiB
Python

#!/usr/bin/env python3
"""Parse Apple .keylayout XML files into a flat JSON representation.
Extracts all key mappings across modifier layers (base, Shift, Caps, Option,
Shift+Option, Caps+Option, Command+Option) and resolves dead key states to
their composed outputs.
Usage:
python3 scripts/parse_keylayout.py <file.keylayout> [--output file.json]
Output JSON structure:
{
"name": "EurKEY v1.3",
"modifierMap": { ... },
"keyMaps": {
"0": { "label": "Base", "keys": { "0": {"output": "a", ...}, ... } },
...
},
"actions": {
"a": {
"none": "a",
"dead: ^": "â",
...
},
...
},
"deadKeys": {
"dead: ^": { "terminator": "^", "compositions": { "a": "â", "A": "Â", ... } },
...
}
}
"""
import json
import re
import sys
import xml.etree.ElementTree as ET
from pathlib import Path
# macOS key code → physical key name (US ANSI/ISO layout)
KEY_CODE_NAMES = {
0: "A", 1: "S", 2: "D", 3: "F", 4: "H", 5: "G",
6: "Z", 7: "X", 8: "C", 9: "V", 10: "§/`",
11: "B", 12: "Q", 13: "W", 14: "E", 15: "R",
16: "Y", 17: "T", 18: "1", 19: "2", 20: "3",
21: "4", 22: "6", 23: "5", 24: "=", 25: "9",
26: "7", 27: "-", 28: "8", 29: "0", 30: "]",
31: "O", 32: "U", 33: "[", 34: "I", 35: "P",
36: "Return", 37: "L", 38: "J", 39: "'", 40: "K",
41: ";", 42: "\\", 43: ",", 44: "/", 45: "N",
46: "M", 47: ".", 48: "Tab", 49: "Space", 50: "`",
51: "Delete", 52: "Enter", 53: "Escape",
# numpad
65: "KP.", 67: "KP*", 69: "KP+", 75: "KP/",
76: "KPEnter", 78: "KP-", 81: "KP=",
82: "KP0", 83: "KP1", 84: "KP2", 85: "KP3",
86: "KP4", 87: "KP5", 88: "KP6", 89: "KP7",
91: "KP8", 92: "KP9",
# iso extra key
93: "ISO§", 94: "ISO_backslash", 95: "ISO_comma",
# function/navigation keys
96: "F5", 97: "F6", 98: "F7", 99: "F3",
100: "F8", 101: "F9", 103: "F11", 105: "F13",
107: "F14", 109: "F10", 111: "F12", 113: "F15",
114: "Help/Insert", 115: "Home", 116: "PageUp",
117: "ForwardDelete", 118: "F4", 119: "End",
120: "F2", 121: "PageDown", 122: "F1",
123: "Left", 124: "Right", 125: "Down", 126: "Up",
}
# modifier map index → human-readable label
MODIFIER_LABELS = {
0: "Base",
1: "Shift",
2: "Caps",
3: "Option",
4: "Option+Shift",
5: "Caps+Option",
6: "Option+Command",
7: "Control",
}
# key codes that are "typing" keys (not function/navigation/control)
TYPING_KEY_CODES = set(range(0, 50)) | {50, 93, 94, 95}
def _read_keylayout_xml(filepath):
"""Read a .keylayout file, working around XML 1.1 control character references.
Apple .keylayout files declare XML 1.1 and use numeric character references
for control characters (&#x0001; through &#x001F;) that are invalid in XML 1.0.
Python's ElementTree only supports XML 1.0, so we convert control character
references to placeholder tokens, parse, then restore them.
"""
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
# downgrade XML declaration from 1.1 to 1.0
content = content.replace('version="1.1"', 'version="1.0"')
# strip the DOCTYPE (references local DTD that may not exist)
content = re.sub(r'<!DOCTYPE[^>]*>', '', content)
# replace control character references with placeholder strings
# &#x0001; through &#x001F; and &#x007F; are problematic in XML 1.0
def replace_control_ref(m):
code_point = int(m.group(1), 16)
return f"__CTRL_U{code_point:04X}__"
content = re.sub(
r'&#x(000[0-9A-Fa-f]|001[0-9A-Fa-f]|007[Ff]);',
replace_control_ref,
content,
)
return content
def _restore_control_chars(text):
"""Restore placeholder tokens back to actual characters."""
if text is None:
return None
def restore(m):
code_point = int(m.group(1), 16)
return chr(code_point)
return re.sub(r'__CTRL_U([0-9A-F]{4})__', restore, text)
def parse_keylayout(filepath, keyboard_type=0):
"""Parse a .keylayout XML file and return a structured dict.
keyboard_type selects which mapSet to use. Each <layout> element
covers a range of hardware keyboard types (first..last). The mapSet
matching the requested type is used. Default 0 = MacBook built-in.
"""
xml_content = _read_keylayout_xml(filepath)
root = ET.fromstring(xml_content)
result = {
"name": root.get("name", ""),
"group": root.get("group", ""),
"id": root.get("id", ""),
}
# parse modifier map
result["modifierMap"] = parse_modifier_map(root)
# parse all keyMapSets
key_map_sets = {}
for kms in root.findall(".//keyMapSet"):
kms_id = kms.get("id")
key_map_sets[kms_id] = parse_key_map_set(kms, key_map_sets)
# parse actions (dead key compositions)
actions = parse_actions(root)
result["actions"] = actions
# parse terminators
terminators = {}
for term in root.findall(".//terminators/when"):
state = term.get("state", "")
output = _restore_control_chars(term.get("output", ""))
terminators[state] = output
result["terminators"] = terminators
# find the mapSet for the requested keyboard type
target_map_set = None
for layout in root.findall(".//layout"):
first = int(layout.get("first", "0"))
last = int(layout.get("last", "0"))
if first <= keyboard_type <= last:
target_map_set = layout.get("mapSet")
break
if target_map_set is None:
# fall back to first layout entry
first_layout = root.find(".//layout")
target_map_set = first_layout.get("mapSet") if first_layout is not None else None
# resolve keys from the selected mapSet
resolved = {}
kms = key_map_sets.get(target_map_set, {})
for idx_str, keys in kms.items():
resolved[idx_str] = dict(keys)
# build the final keyMaps output
key_maps = {}
for idx_str in sorted(resolved.keys(), key=int):
idx = int(idx_str)
label = MODIFIER_LABELS.get(idx, f"Index {idx}")
keys = {}
for code_str in sorted(resolved[idx_str].keys(), key=int):
code = int(code_str)
entry = resolved[idx_str][code_str]
key_name = KEY_CODE_NAMES.get(code, f"code{code}")
key_info = {"code": code, "keyName": key_name}
if "output" in entry:
key_info["output"] = entry["output"]
if "action" in entry:
action_id = entry["action"]
key_info["action"] = action_id
# resolve the action's base output
if action_id in actions:
action_data = actions[action_id]
if "none" in action_data:
key_info["output"] = action_data["none"]
elif "next" in action_data:
key_info["deadKey"] = action_data["next"]
keys[code_str] = key_info
key_maps[idx_str] = {"label": label, "keys": keys}
result["keyMaps"] = key_maps
# build dead key summary
dead_keys = {}
for state_name, terminator in terminators.items():
compositions = {}
for action_id, action_data in actions.items():
if state_name in action_data:
compositions[action_id] = action_data[state_name]
dead_keys[state_name] = {
"terminator": terminator,
"compositions": compositions,
}
result["deadKeys"] = dead_keys
return result
def parse_modifier_map(root):
"""Parse the modifierMap element."""
mod_map = {}
for mm in root.findall(".//modifierMap"):
mm_id = mm.get("id")
default_index = mm.get("defaultIndex", "")
selects = []
for kms in mm.findall("keyMapSelect"):
map_index = kms.get("mapIndex", "")
modifiers = []
for mod in kms.findall("modifier"):
modifiers.append(mod.get("keys", ""))
selects.append({"mapIndex": map_index, "modifiers": modifiers})
mod_map[mm_id] = {"defaultIndex": default_index, "selects": selects}
return mod_map
def parse_key_map_set(kms_element, all_key_map_sets):
"""Parse a keyMapSet element, resolving baseMapSet/baseIndex references."""
result = {}
for km in kms_element.findall("keyMap"):
index = km.get("index")
keys = {}
# resolve base map set if specified
base_map_set_id = km.get("baseMapSet")
base_index = km.get("baseIndex")
if base_map_set_id and base_index:
base_kms = all_key_map_sets.get(base_map_set_id, {})
base_keys = base_kms.get(base_index, {})
keys.update(base_keys)
# parse keys in this keyMap (override base)
for key in km.findall("key"):
code = key.get("code")
entry = {}
if key.get("output") is not None:
entry["output"] = _restore_control_chars(key.get("output"))
if key.get("action") is not None:
entry["action"] = key.get("action")
keys[code] = entry
result[index] = keys
return result
def parse_actions(root):
"""Parse all action elements into a dict of action_id → {state → output/next}."""
actions = {}
for action in root.findall(".//actions/action"):
action_id = action.get("id")
states = {}
for when in action.findall("when"):
state = when.get("state", "none")
if when.get("output") is not None:
states[state] = _restore_control_chars(when.get("output"))
elif when.get("next") is not None:
if state == "none":
states["next"] = when.get("next")
else:
states[state] = f"{when.get('next')}"
actions[action_id] = states
return actions
def format_char(c):
"""Format a character for display, showing control chars as hex."""
if len(c) == 1:
cp = ord(c)
if cp < 0x20 or cp == 0x7F:
return f"U+{cp:04X}"
if cp == 0xA0:
return "NBSP"
return c
def print_summary(data):
"""Print a human-readable summary of the parsed layout."""
print(f"Layout: {data['name']}")
print(f"Dead key states: {', '.join(data['deadKeys'].keys())}")
print()
for idx_str in sorted(data["keyMaps"].keys(), key=int):
km = data["keyMaps"][idx_str]
print(f"--- {km['label']} (index {idx_str}) ---")
for code_str in sorted(km["keys"].keys(), key=int):
code = int(code_str)
if code not in TYPING_KEY_CODES:
continue
ki = km["keys"][code_str]
key_name = ki["keyName"]
output = ki.get("output", "")
dead = ki.get("deadKey", "")
formatted = format_char(output) if output else ""
extra = f" [dead: {dead}]" if dead else ""
action = f" (action: {ki['action']})" if "action" in ki else ""
print(f" {key_name:>12s} (code {code:>3d}): {formatted}{extra}{action}")
print()
def main():
import argparse
parser = argparse.ArgumentParser(description="Parse Apple .keylayout XML files")
parser.add_argument("keylayout", help="Path to .keylayout file")
parser.add_argument("--output", "-o", help="Output JSON file path")
parser.add_argument("--summary", "-s", action="store_true",
help="Print human-readable summary")
parser.add_argument("--keyboard-type", "-k", type=int, default=0,
help="Hardware keyboard type ID (default: 0 = MacBook built-in)")
args = parser.parse_args()
data = parse_keylayout(args.keylayout, keyboard_type=args.keyboard_type)
if args.summary:
print_summary(data)
if args.output:
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent="\t")
print(f"Written to {output_path}")
elif not args.summary:
json.dump(data, sys.stdout, ensure_ascii=False, indent="\t")
print()
if __name__ == "__main__":
main()