Files
vorleser/docs/superpowers/plans/2026-03-13-vorleser-greenfield.md
2026-03-13 21:36:59 +01:00

64 KiB

Vorleser Greenfield Implementation Plan

For agentic workers: REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (- [ ]) syntax for tracking.

Goal: Build a macOS + iOS app that reads EPUB and plain text books aloud using on-device Kokoro TTS via MLX Swift, with tap-to-play-from-here and position memory.

Architecture: VorleserKit Swift Package contains all logic (parsing, synthesis, playback, storage). Thin SwiftUI app shells for iOS and macOS. KokoroSwift handles TTS inference, MisakiSwift handles phonemization — both pulled in as SPM dependencies.

Tech Stack: Swift 6.2, KokoroSwift (MLX), MisakiSwift, SwiftData, ZIPFoundation, SwiftSoup, XcodeGen

Spec: docs/superpowers/specs/2026-03-13-vorleser-greenfield-design.md


Chunk 1: Project Skeleton + BookParser

Task 1: Project scaffold

Files:

  • Create: VorleserKit/Package.swift

  • Create: VorleserKit/Sources/VorleserKit/VorleserKit.swift (shared types)

  • Create: VorleserKit/Sources/BookParser/BookParser.swift

  • Create: VorleserKit/Sources/BookParser/Book.swift

  • Create: VorleserKit/Sources/BookParser/Chapter.swift

  • Create: VorleserKit/Sources/BookParser/EPUBParser.swift

  • Create: VorleserKit/Sources/BookParser/PlainTextParser.swift

  • Create: VorleserKit/Sources/VorleserKit/SentenceSegmenter.swift

  • Create: VorleserKit/Sources/VorleserKit/Sentence.swift

  • Create: VorleserKit/Tests/BookParserTests/BookParserTests.swift

  • Create: VorleserKit/Tests/VorleserKitTests/SentenceSegmenterTests.swift

  • Create: project.yml

  • Step 1: Create Package.swift

// swift-tools-version: 6.2
import PackageDescription

let package = Package(
	name: "VorleserKit",
	platforms: [
		.iOS(.v18),
		.macOS(.v15),
	],
	products: [
		.library(name: "VorleserKit", targets: ["VorleserKit"]),
		.library(name: "BookParser", targets: ["BookParser"]),
	],
	dependencies: [
		.package(url: "https://github.com/weichsel/ZIPFoundation.git", from: "0.9.0"),
		.package(url: "https://github.com/scinfu/SwiftSoup.git", from: "2.7.0"),
	],
	targets: [
		.target(
			name: "VorleserKit",
			dependencies: ["BookParser"]
		),
		.target(
			name: "BookParser",
			dependencies: ["ZIPFoundation", "SwiftSoup"]
		),
		.testTarget(
			name: "BookParserTests",
			dependencies: ["BookParser"],
			resources: [.copy("Fixtures")]
		),
		.testTarget(
			name: "VorleserKitTests",
			dependencies: ["VorleserKit"]
		),
	]
)

Note: Synthesizer and AudioEngine targets are added in later tasks. Start lean — only what's needed now.

  • Step 2: Create shared types

Create VorleserKit/Sources/VorleserKit/VorleserKit.swift:

/// A position in a book, measured in characters from the start of the first chapter.
public typealias CharacterOffset = Int

Create VorleserKit/Sources/VorleserKit/Sentence.swift:

public struct Sentence: Sendable {
	public let text: String
	public let range: Range<CharacterOffset>

	public init(text: String, range: Range<CharacterOffset>) {
		self.text = text
		self.range = range
	}
}

Create VorleserKit/Sources/VorleserKit/SentenceSegmenter.swift:

import NaturalLanguage

public struct SentenceSegmenter: Sendable {
	public static func segment(_ text: String, globalOffset: CharacterOffset = 0) -> [Sentence] {
		let tokenizer = NLTokenizer(unit: .sentence)
		tokenizer.string = text
		var sentences: [Sentence] = []
		tokenizer.enumerateTokens(in: text.startIndex..<text.endIndex) { range, _ in
			let sentenceText = String(text[range]).trimmingCharacters(in: .whitespacesAndNewlines)
			guard !sentenceText.isEmpty else { return true }
			let start = text.distance(from: text.startIndex, to: range.lowerBound) + globalOffset
			let end = text.distance(from: text.startIndex, to: range.upperBound) + globalOffset
			sentences.append(Sentence(text: sentenceText, range: start..<end))
			return true
		}
		return sentences
	}
}
  • Step 3: Create Book and Chapter types

Create VorleserKit/Sources/BookParser/Chapter.swift:

public struct Chapter: Sendable {
	public let index: Int
	public let title: String
	public let text: String

	public init(index: Int, title: String, text: String) {
		self.index = index
		self.title = title
		self.text = text
	}
}

Create VorleserKit/Sources/BookParser/Book.swift:

import VorleserKit

public struct Book: Sendable {
	public let id: UUID
	public let title: String
	public let author: String?
	public let chapters: [Chapter]

	public init(id: UUID = UUID(), title: String, author: String?, chapters: [Chapter]) {
		self.id = id
		self.title = title
		self.author = author
		self.chapters = chapters
	}

	/// All sentences across all chapters, with global character offsets.
	public var sentences: [Sentence] {
		var result: [Sentence] = []
		var offset: CharacterOffset = 0
		for chapter in chapters {
			let chapterSentences = SentenceSegmenter.segment(chapter.text, globalOffset: offset)
			result.append(contentsOf: chapterSentences)
			offset += chapter.text.count
		}
		return result
	}

	/// Returns the sentence index containing the given global character offset.
	public func sentenceIndex(containing offset: CharacterOffset) -> Int? {
		let allSentences = sentences
		return allSentences.firstIndex { $0.range.contains(offset) }
	}

	/// Maps a global character offset to (chapter index, local offset within chapter).
	public func chapterAndLocalOffset(for globalOffset: CharacterOffset) -> (chapterIndex: Int, localOffset: Int)? {
		var offset = 0
		for chapter in chapters {
			let chapterEnd = offset + chapter.text.count
			if globalOffset < chapterEnd {
				return (chapter.index, globalOffset - offset)
			}
			offset = chapterEnd
		}
		return nil
	}

	/// Total character count across all chapters.
	public var totalCharacters: Int {
		chapters.reduce(0) { $0 + $1.text.count }
	}
}

Note: sentences is a computed property (recomputed each call). This is fine for sentence navigation — AudioEngine will cache the result once per playback session. Avoids the lazy var on struct issue flagged in spec review.

  • Step 4: Write SentenceSegmenter tests

Create VorleserKit/Tests/VorleserKitTests/SentenceSegmenterTests.swift:

import Testing
@testable import VorleserKit

@Suite("SentenceSegmenter")
struct SentenceSegmenterTests {
	@Test func segmentsSimpleSentences() {
		let text = "Hello world. How are you? I am fine."
		let sentences = SentenceSegmenter.segment(text)
		#expect(sentences.count == 3)
		#expect(sentences[0].text == "Hello world.")
		#expect(sentences[1].text == "How are you?")
		#expect(sentences[2].text == "I am fine.")
	}

	@Test func handlesAbbreviations() {
		let text = "Dr. Smith went to Washington. He arrived at 3 p.m."
		let sentences = SentenceSegmenter.segment(text)
		// NLTokenizer should handle "Dr." without splitting
		#expect(sentences.count == 2)
	}

	@Test func appliesGlobalOffset() {
		let text = "First sentence. Second sentence."
		let sentences = SentenceSegmenter.segment(text, globalOffset: 100)
		#expect(sentences[0].range.lowerBound >= 100)
	}

	@Test func handlesEmptyText() {
		let sentences = SentenceSegmenter.segment("")
		#expect(sentences.isEmpty)
	}

	@Test func handlesSingleSentence() {
		let text = "Just one sentence."
		let sentences = SentenceSegmenter.segment(text)
		#expect(sentences.count == 1)
		#expect(sentences[0].text == "Just one sentence.")
	}
}
  • Step 5: Run tests to verify they pass

Run: cd VorleserKit && swift test --filter SentenceSegmenterTests Expected: All 5 tests pass. If handlesAbbreviations fails (NLTokenizer splits on "Dr."), adjust the expectation — document the actual NLTokenizer behavior rather than fighting it.

  • Step 6: Commit
git add VorleserKit/
git commit -m "scaffold VorleserKit package with shared types, sentence segmenter, tests"

Task 2: EPUB parser

Files:

  • Create: VorleserKit/Sources/BookParser/EPUBParser.swift

  • Create: VorleserKit/Tests/BookParserTests/EPUBParserTests.swift

  • Create: VorleserKit/Tests/BookParserTests/Fixtures/ (test EPUB)

  • Step 1: Create a minimal test EPUB fixture

Create a minimal valid EPUB for testing. An EPUB is a ZIP with specific structure:

cd VorleserKit/Tests/BookParserTests
mkdir -p Fixtures

Write a script that creates a minimal EPUB file programmatically. The EPUB must contain:

  • META-INF/container.xml pointing to content.opf
  • content.opf with manifest + spine (2 chapters)
  • chapter1.xhtml with <h1>Chapter One</h1><p>This is the first chapter.</p>
  • chapter2.xhtml with <h1>Chapter Two</h1><p>This is the second chapter.</p>

Save as Fixtures/test.epub.

  • Step 2: Write EPUBParser tests

Create VorleserKit/Tests/BookParserTests/EPUBParserTests.swift:

import Testing
import Foundation
@testable import BookParser

@Suite("EPUBParser")
struct EPUBParserTests {
	let fixtureURL: URL = {
		Bundle.module.url(forResource: "test", withExtension: "epub", subdirectory: "Fixtures")!
	}()

	@Test func parsesTestEPUB() throws {
		let book = try EPUBParser.parse(url: fixtureURL)
		#expect(book.chapters.count == 2)
		#expect(book.chapters[0].title == "Chapter One")
		#expect(book.chapters[0].text.contains("first chapter"))
		#expect(book.chapters[1].title == "Chapter Two")
	}

	@Test func extractsTitle() throws {
		let book = try EPUBParser.parse(url: fixtureURL)
		#expect(!book.title.isEmpty)
	}

	@Test func throwsOnInvalidFile() {
		let badURL = URL(fileURLWithPath: "/tmp/nonexistent.epub")
		#expect(throws: EPUBParserError.self) {
			try EPUBParser.parse(url: badURL)
		}
	}

	@Test func chaptersHaveSequentialIndices() throws {
		let book = try EPUBParser.parse(url: fixtureURL)
		for (i, chapter) in book.chapters.enumerated() {
			#expect(chapter.index == i)
		}
	}
}
  • Step 3: Run tests to verify they fail

Run: cd VorleserKit && swift test --filter EPUBParserTests Expected: FAIL — EPUBParser doesn't exist yet.

  • Step 4: Implement EPUBParser

Create VorleserKit/Sources/BookParser/EPUBParser.swift:

import Foundation
import ZIPFoundation
import SwiftSoup

public enum EPUBParserError: Error, CustomStringConvertible {
	case cannotOpenArchive(URL)
	case missingContainerXML
	case missingOPF(String)
	case malformedOPF
	case noSpineItems

	public var description: String {
		switch self {
		case .cannotOpenArchive(let url): "cannot open EPUB archive at \(url.path)"
		case .missingContainerXML: "missing META-INF/container.xml"
		case .missingOPF(let path): "missing OPF file at \(path)"
		case .malformedOPF: "malformed OPF (package document)"
		case .noSpineItems: "EPUB has no spine items"
		}
	}
}

public struct EPUBParser {
	public static func parse(url: URL) throws -> Book {
		guard let archive = Archive(url: url, accessMode: .read) else {
			throw EPUBParserError.cannotOpenArchive(url)
		}

		// 1. Find OPF path from container.xml
		let opfPath = try findOPFPath(in: archive)

		// 2. Parse OPF to get manifest + spine
		let opfDir = (opfPath as NSString).deletingLastPathComponent
		let opfData = try extractData(from: archive, path: opfPath)
		let (title, author, manifest, spine) = try parseOPF(data: opfData)

		guard !spine.isEmpty else { throw EPUBParserError.noSpineItems }

		// 3. Extract chapters from spine
		var chapters: [Chapter] = []
		for (index, itemRef) in spine.enumerated() {
			guard let href = manifest[itemRef] else { continue }
			let fullPath = opfDir.isEmpty ? href : "\(opfDir)/\(href)"

			let chapterTitle: String
			let chapterText: String
			do {
				let htmlData = try extractData(from: archive, path: fullPath)
				let html = String(data: htmlData, encoding: .utf8) ?? ""
				let doc = try SwiftSoup.parse(html)
				chapterTitle = try doc.select("h1, h2, h3, title").first()?.text() ?? "Chapter \(index + 1)"
				let body = try doc.body()?.text() ?? ""
				chapterText = normalizeWhitespace(body)
			} catch {
				// Broken chapter — include with empty text per spec
				chapterTitle = "Chapter \(index + 1) (parse error)"
				chapterText = ""
			}

			chapters.append(Chapter(index: index, title: chapterTitle, text: chapterText))
		}

		return Book(
			title: title ?? url.deletingPathExtension().lastPathComponent,
			author: author,
			chapters: chapters
		)
	}

	// MARK: - Private

	private static func findOPFPath(in archive: Archive) throws -> String {
		let containerData = try extractData(from: archive, path: "META-INF/container.xml")
		let parser = ContainerXMLParser(data: containerData)
		guard let opfPath = parser.parse() else {
			throw EPUBParserError.missingContainerXML
		}
		return opfPath
	}

	private static func parseOPF(data: Data) throws -> (title: String?, author: String?, manifest: [String: String], spine: [String]) {
		let parser = OPFParser(data: data)
		guard let result = parser.parse() else {
			throw EPUBParserError.malformedOPF
		}
		return result
	}

	private static func extractData(from archive: Archive, path: String) throws -> Data {
		guard let entry = archive[path] else {
			throw EPUBParserError.missingOPF(path)
		}
		var data = Data()
		_ = try archive.extract(entry) { chunk in
			data.append(chunk)
		}
		return data
	}

	private static func normalizeWhitespace(_ text: String) -> String {
		text.components(separatedBy: .whitespacesAndNewlines)
			.filter { !$0.isEmpty }
			.joined(separator: " ")
	}
}

// MARK: - XML Parsers

/// Parses META-INF/container.xml to find the OPF path.
private class ContainerXMLParser: NSObject, XMLParserDelegate {
	private let data: Data
	private var opfPath: String?

	init(data: Data) {
		self.data = data
	}

	func parse() -> String? {
		let parser = XMLParser(data: data)
		parser.delegate = self
		parser.parse()
		return opfPath
	}

	func parser(_ parser: XMLParser, didStartElement element: String, namespaceURI: String?,
	            qualifiedName: String?, attributes: [String: String]) {
		if element == "rootfile", let path = attributes["full-path"] {
			opfPath = path
		}
	}
}

/// Parses the OPF (package document) for title, author, manifest, and spine.
private class OPFParser: NSObject, XMLParserDelegate {
	private let data: Data
	private var title: String?
	private var author: String?
	private var manifest: [String: String] = [:]  // id → href
	private var spine: [String] = []              // ordered item refs
	private var currentElement = ""
	private var currentText = ""
	private var inMetadata = false

	init(data: Data) {
		self.data = data
	}

	func parse() -> (String?, String?, [String: String], [String])? {
		let parser = XMLParser(data: data)
		parser.delegate = self
		parser.parse()
		return (title, author, manifest, spine)
	}

	func parser(_ parser: XMLParser, didStartElement element: String, namespaceURI: String?,
	            qualifiedName: String?, attributes: [String: String]) {
		let localName = element.components(separatedBy: ":").last ?? element
		currentElement = localName
		currentText = ""

		switch localName {
		case "metadata":
			inMetadata = true
		case "item":
			if let id = attributes["id"], let href = attributes["href"] {
				manifest[id] = href
			}
		case "itemref":
			if let idref = attributes["idref"] {
				spine.append(idref)
			}
		default:
			break
		}
	}

	func parser(_ parser: XMLParser, foundCharacters string: String) {
		currentText += string
	}

	func parser(_ parser: XMLParser, didEndElement element: String, namespaceURI: String?,
	            qualifiedName: String?) {
		let localName = element.components(separatedBy: ":").last ?? element
		if inMetadata {
			let trimmed = currentText.trimmingCharacters(in: .whitespacesAndNewlines)
			if localName == "title" && title == nil && !trimmed.isEmpty {
				title = trimmed
			} else if localName == "creator" && author == nil && !trimmed.isEmpty {
				author = trimmed
			} else if localName == "metadata" {
				inMetadata = false
			}
		}
	}
}
  • Step 5: Run tests to verify they pass

Run: cd VorleserKit && swift test --filter EPUBParserTests Expected: All 4 tests pass.

  • Step 6: Commit
git add VorleserKit/Sources/BookParser/ VorleserKit/Tests/BookParserTests/
git commit -m "add EPUB parser with ZIP extraction, OPF/spine parsing, HTML-to-text"

Task 3: Plain text parser

Files:

  • Create: VorleserKit/Sources/BookParser/PlainTextParser.swift

  • Create: VorleserKit/Tests/BookParserTests/PlainTextParserTests.swift

  • Step 1: Write tests

Create VorleserKit/Tests/BookParserTests/PlainTextParserTests.swift:

import Testing
import Foundation
@testable import BookParser

@Suite("PlainTextParser")
struct PlainTextParserTests {
	@Test func parsesMultipleChapters() throws {
		let text = "First chapter content.\n\nSecond chapter content.\n\nThird chapter."
		let book = PlainTextParser.parse(text: text, title: "Test Book")
		#expect(book.chapters.count == 3)
		#expect(book.chapters[0].text == "First chapter content.")
		#expect(book.chapters[1].text == "Second chapter content.")
	}

	@Test func parsesSingleParagraphAsOneChapter() {
		let text = "Just a single paragraph with no double newlines."
		let book = PlainTextParser.parse(text: text, title: "Test")
		#expect(book.chapters.count == 1)
		#expect(book.chapters[0].text == text)
	}

	@Test func setsTitle() {
		let book = PlainTextParser.parse(text: "Hello", title: "My Book")
		#expect(book.title == "My Book")
	}

	@Test func parsesFromFile() throws {
		let tmpFile = URL(fileURLWithPath: NSTemporaryDirectory()).appendingPathComponent("test.txt")
		try "Line one.\n\nLine two.".write(to: tmpFile, atomically: true, encoding: .utf8)
		defer { try? FileManager.default.removeItem(at: tmpFile) }

		let book = try PlainTextParser.parse(url: tmpFile)
		#expect(book.chapters.count == 2)
		#expect(book.title == "test")
	}
}
  • Step 2: Run to verify failure

Run: cd VorleserKit && swift test --filter PlainTextParserTests Expected: FAIL — PlainTextParser doesn't exist.

  • Step 3: Implement PlainTextParser

Create VorleserKit/Sources/BookParser/PlainTextParser.swift:

import Foundation

public struct PlainTextParser {
	public static func parse(text: String, title: String, author: String? = nil) -> Book {
		let paragraphs = text.components(separatedBy: "\n\n")
			.map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
			.filter { !$0.isEmpty }

		let chapters = paragraphs.enumerated().map { index, text in
			Chapter(index: index, title: "Section \(index + 1)", text: text)
		}

		return Book(title: title, author: author, chapters: chapters)
	}

	public static func parse(url: URL) throws -> Book {
		let text = try String(contentsOf: url, encoding: .utf8)
		let title = url.deletingPathExtension().lastPathComponent
		return parse(text: text, title: title)
	}
}
  • Step 4: Run tests

Run: cd VorleserKit && swift test --filter PlainTextParserTests Expected: All 4 tests pass.

  • Step 5: Commit
git add VorleserKit/Sources/BookParser/PlainTextParser.swift VorleserKit/Tests/BookParserTests/PlainTextParserTests.swift
git commit -m "add plain text parser with paragraph-based chapter splitting"

Task 4: BookParser facade + Book integration tests

Files:

  • Create: VorleserKit/Sources/BookParser/BookParser.swift

  • Create: VorleserKit/Tests/BookParserTests/BookTests.swift

  • Step 1: Write BookParser facade tests

Create VorleserKit/Tests/BookParserTests/BookTests.swift:

import Testing
import Foundation
@testable import BookParser
@testable import VorleserKit

@Suite("BookParser")
struct BookParserFacadeTests {
	@Test func detectsEPUBByExtension() throws {
		let url = Bundle.module.url(forResource: "test", withExtension: "epub", subdirectory: "Fixtures")!
		let book = try BookParser.parse(url: url)
		#expect(book.chapters.count == 2)
	}

	@Test func detectsTextByExtension() throws {
		let tmpFile = URL(fileURLWithPath: NSTemporaryDirectory()).appendingPathComponent("test.txt")
		try "Paragraph one.\n\nParagraph two.".write(to: tmpFile, atomically: true, encoding: .utf8)
		defer { try? FileManager.default.removeItem(at: tmpFile) }

		let book = try BookParser.parse(url: tmpFile)
		#expect(book.chapters.count == 2)
	}
}

@Suite("Book character addressing")
struct BookCharacterAddressingTests {
	let book = Book(
		title: "Test",
		author: nil,
		chapters: [
			Chapter(index: 0, title: "Ch 1", text: "Hello world."),  // 12 chars
			Chapter(index: 1, title: "Ch 2", text: "Second chapter."),  // 15 chars
		]
	)

	@Test func totalCharacters() {
		#expect(book.totalCharacters == 27)
	}

	@Test func chapterAndLocalOffset() {
		// Offset 0 → chapter 0, local 0
		let first = book.chapterAndLocalOffset(for: 0)
		#expect(first?.chapterIndex == 0)
		#expect(first?.localOffset == 0)

		// Offset 12 → chapter 1, local 0
		let second = book.chapterAndLocalOffset(for: 12)
		#expect(second?.chapterIndex == 1)
		#expect(second?.localOffset == 0)
	}

	@Test func sentenceIndex() {
		let idx = book.sentenceIndex(containing: 0)
		#expect(idx != nil)
	}

	@Test func outOfRangeReturnsNil() {
		#expect(book.chapterAndLocalOffset(for: 9999) == nil)
	}
}
  • Step 2: Implement BookParser facade

Create VorleserKit/Sources/BookParser/BookParser.swift:

import Foundation

public enum BookParserError: Error, CustomStringConvertible {
	case unsupportedFormat(String)

	public var description: String {
		switch self {
		case .unsupportedFormat(let ext): "unsupported file format: \(ext)"
		}
	}
}

public struct BookParser {
	public static func parse(url: URL) throws -> Book {
		switch url.pathExtension.lowercased() {
		case "epub":
			return try EPUBParser.parse(url: url)
		case "txt", "text":
			return try PlainTextParser.parse(url: url)
		default:
			throw BookParserError.unsupportedFormat(url.pathExtension)
		}
	}
}
  • Step 3: Run all BookParser tests

Run: cd VorleserKit && swift test --filter "BookParser|Book" Expected: All tests pass.

  • Step 4: Commit
git add VorleserKit/Sources/BookParser/BookParser.swift VorleserKit/Tests/BookParserTests/BookTests.swift
git commit -m "add BookParser facade with format detection, Book character addressing tests"

Chunk 2: Storage

Task 5: SwiftData storage

Files:

  • Create: VorleserKit/Sources/Storage/StoredBook.swift

  • Create: VorleserKit/Sources/Storage/BookStore.swift

  • Create: VorleserKit/Tests/StorageTests/BookStoreTests.swift

  • Modify: VorleserKit/Package.swift — add Storage target

  • Step 1: Add Storage target to Package.swift

Add to the targets array in Package.swift:

.target(
	name: "Storage",
	dependencies: []
),
.testTarget(
	name: "StorageTests",
	dependencies: ["Storage"]
),

Add "Storage" to the VorleserKit target's dependencies.

  • Step 2: Write StoredBook model

Create VorleserKit/Sources/Storage/StoredBook.swift:

import Foundation
import SwiftData

@Model
public class StoredBook {
	public var bookID: UUID
	public var title: String
	public var author: String?
	public var sourceFileName: String
	public var dateAdded: Date
	public var lastPosition: Int
	public var lastRead: Date?
	public var voiceName: String?

	public init(
		bookID: UUID = UUID(),
		title: String,
		author: String? = nil,
		sourceFileName: String,
		dateAdded: Date = .now,
		lastPosition: Int = 0,
		lastRead: Date? = nil,
		voiceName: String? = nil
	) {
		self.bookID = bookID
		self.title = title
		self.author = author
		self.sourceFileName = sourceFileName
		self.dateAdded = dateAdded
		self.lastPosition = lastPosition
		self.lastRead = lastRead
		self.voiceName = voiceName
	}
}
  • Step 3: Write BookStore

Create VorleserKit/Sources/Storage/BookStore.swift:

import Foundation
import SwiftData

public struct BookStore {
	private let modelContainer: ModelContainer
	private let documentsDirectory: URL

	public init(modelContainer: ModelContainer, documentsDirectory: URL) {
		self.modelContainer = modelContainer
		self.documentsDirectory = documentsDirectory
	}

	@MainActor
	public func importBook(from sourceURL: URL, title: String, author: String?) throws -> StoredBook {
		let fileName = "\(UUID().uuidString)_\(sourceURL.lastPathComponent)"
		let destination = documentsDirectory.appendingPathComponent(fileName)
		try FileManager.default.copyItem(at: sourceURL, to: destination)

		let stored = StoredBook(title: title, author: author, sourceFileName: fileName)
		let context = modelContainer.mainContext
		context.insert(stored)
		try context.save()
		return stored
	}

	@MainActor
	public func allBooks() throws -> [StoredBook] {
		let context = modelContainer.mainContext
		let descriptor = FetchDescriptor<StoredBook>(
			sortBy: [SortDescriptor(\.lastRead, order: .reverse), SortDescriptor(\.dateAdded, order: .reverse)]
		)
		return try context.fetch(descriptor)
	}

	@MainActor
	public func updatePosition(_ book: StoredBook, position: Int) throws {
		book.lastPosition = position
		book.lastRead = .now
		try modelContainer.mainContext.save()
	}

	@MainActor
	public func deleteBook(_ book: StoredBook) throws {
		let filePath = documentsDirectory.appendingPathComponent(book.sourceFileName)
		try? FileManager.default.removeItem(at: filePath)
		modelContainer.mainContext.delete(book)
		try modelContainer.mainContext.save()
	}

	public func fileURL(for book: StoredBook) -> URL {
		documentsDirectory.appendingPathComponent(book.sourceFileName)
	}

	public func fileExists(for book: StoredBook) -> Bool {
		FileManager.default.fileExists(atPath: fileURL(for: book).path)
	}
}
  • Step 4: Write tests

Create VorleserKit/Tests/StorageTests/BookStoreTests.swift:

import Testing
import Foundation
import SwiftData
@testable import Storage

@Suite("BookStore")
struct BookStoreTests {
	@MainActor
	func makeStore() throws -> (BookStore, URL) {
		let config = ModelConfiguration(isStoredInMemoryOnly: true)
		let container = try ModelContainer(for: StoredBook.self, configurations: config)
		let tmpDir = URL(fileURLWithPath: NSTemporaryDirectory())
			.appendingPathComponent(UUID().uuidString)
		try FileManager.default.createDirectory(at: tmpDir, withIntermediateDirectories: true)
		return (BookStore(modelContainer: container, documentsDirectory: tmpDir), tmpDir)
	}

	@Test @MainActor func importAndList() throws {
		let (store, tmpDir) = try makeStore()
		defer { try? FileManager.default.removeItem(at: tmpDir) }

		// Create a fake source file
		let sourceFile = tmpDir.appendingPathComponent("source.epub")
		try "fake epub".write(to: sourceFile, atomically: true, encoding: .utf8)

		let stored = try store.importBook(from: sourceFile, title: "Test Book", author: "Author")
		#expect(stored.title == "Test Book")
		#expect(stored.lastPosition == 0)

		let books = try store.allBooks()
		#expect(books.count == 1)
		#expect(store.fileExists(for: books[0]))
	}

	@Test @MainActor func updatePosition() throws {
		let (store, tmpDir) = try makeStore()
		defer { try? FileManager.default.removeItem(at: tmpDir) }

		let sourceFile = tmpDir.appendingPathComponent("source.txt")
		try "text".write(to: sourceFile, atomically: true, encoding: .utf8)

		let stored = try store.importBook(from: sourceFile, title: "Book", author: nil)
		try store.updatePosition(stored, position: 500)
		#expect(stored.lastPosition == 500)
		#expect(stored.lastRead != nil)
	}

	@Test @MainActor func deleteBook() throws {
		let (store, tmpDir) = try makeStore()
		defer { try? FileManager.default.removeItem(at: tmpDir) }

		let sourceFile = tmpDir.appendingPathComponent("source.txt")
		try "text".write(to: sourceFile, atomically: true, encoding: .utf8)

		let stored = try store.importBook(from: sourceFile, title: "Book", author: nil)
		#expect(store.fileExists(for: stored))

		try store.deleteBook(stored)
		let books = try store.allBooks()
		#expect(books.isEmpty)
	}
}
  • Step 5: Run tests

Run: cd VorleserKit && swift test --filter StorageTests Expected: All 3 tests pass.

  • Step 6: Commit
git add VorleserKit/Sources/Storage/ VorleserKit/Tests/StorageTests/ VorleserKit/Package.swift
git commit -m "add SwiftData storage with BookStore for library management, position tracking"

Chunk 3: Synthesizer

Task 6: Synthesizer wrapper around KokoroSwift

Files:

  • Create: VorleserKit/Sources/Synthesizer/Synthesizer.swift
  • Create: VorleserKit/Sources/Synthesizer/VoicePack.swift
  • Create: VorleserKit/Sources/Synthesizer/SynthesizerError.swift
  • Modify: VorleserKit/Package.swift — add Synthesizer target + KokoroSwift dependency

Note: The Synthesizer wraps KokoroSwift. Tests for this module require a real device with MLX Metal support — they cannot run in the iOS Simulator. Write integration tests that are clearly marked and can be skipped in CI.

  • Step 1: Add KokoroSwift dependency and Synthesizer target to Package.swift

Add to dependencies array:

.package(url: "https://github.com/mlalma/kokoro-ios.git", exact: "1.0.6"),

Note: Pin the exact version to match MisakiSwift's expectations. Check the latest tag on the repo and use that.

Add to targets array:

.target(
	name: "Synthesizer",
	dependencies: [
		.product(name: "KokoroSwift", package: "kokoro-ios"),
	]
),

Add "Synthesizer" to VorleserKit target's dependencies.

  • Step 2: Create VoicePack

Create VorleserKit/Sources/Synthesizer/VoicePack.swift:

import Foundation

public struct VoicePack: Sendable, Identifiable {
	public let name: String
	public let displayName: String
	public let language: Language

	public var id: String { name }

	public enum Language: String, Sendable {
		case enUS = "en-us"
		case enGB = "en-gb"
	}

	/// Curated voices bundled with the app.
	/// The app shell must bundle `voices.npz` containing these voice embeddings.
	public static let curated: [VoicePack] = [
		VoicePack(name: "af_heart", displayName: "Heart", language: .enUS),
		VoicePack(name: "af_bella", displayName: "Bella", language: .enUS),
		VoicePack(name: "am_michael", displayName: "Michael", language: .enUS),
	]
}
  • Step 3: Create SynthesizerError

Create VorleserKit/Sources/Synthesizer/SynthesizerError.swift:

public enum SynthesizerError: Error, CustomStringConvertible {
	case modelNotFound(String)
	case voicesNotFound(String)
	case voiceNotAvailable(String)
	case synthesisFailure(String, Error)

	public var description: String {
		switch self {
		case .modelNotFound(let path): "kokoro model not found at \(path)"
		case .voicesNotFound(let path): "voices.npz not found at \(path)"
		case .voiceNotAvailable(let name): "voice '\(name)' not found in voices.npz"
		case .synthesisFailure(let text, let error): "synthesis failed for '\(text.prefix(50))...': \(error)"
		}
	}
}
  • Step 4: Implement Synthesizer

Create VorleserKit/Sources/Synthesizer/Synthesizer.swift:

import Foundation
import KokoroSwift
import MLX

public final class Synthesizer: Sendable {
	private let tts: KokoroTTS
	private let voiceEmbedding: MLXArray
	private let language: KokoroSwift.Language
	private let voicePack: VoicePack

	/// Initialize the synthesizer with a specific voice.
	/// - Parameters:
	///   - voice: The voice pack to use.
	///   - modelURL: URL to `kokoro-v1_0.safetensors`.
	///   - voicesURL: URL to `voices.npz`.
	public init(voice: VoicePack, modelURL: URL, voicesURL: URL) throws {
		guard FileManager.default.fileExists(atPath: modelURL.path) else {
			throw SynthesizerError.modelNotFound(modelURL.path)
		}
		guard FileManager.default.fileExists(atPath: voicesURL.path) else {
			throw SynthesizerError.voicesNotFound(voicesURL.path)
		}

		self.tts = KokoroTTS(modelPath: modelURL, g2p: .misaki)

		// Load voice embedding from voices.npz
		guard let voices = NpyzReader.read(fileFromPath: voicesURL),
		      let embedding = voices["\(voice.name).npy"] else {
			throw SynthesizerError.voiceNotAvailable(voice.name)
		}

		self.voiceEmbedding = embedding
		self.voicePack = voice
		self.language = voice.language == .enUS ? .enUS : .enGB
	}

	/// Synthesize a sentence to PCM audio.
	/// - Parameter text: Sentence-length text to synthesize.
	/// - Returns: PCM Float32 samples at 24,000 Hz, mono.
	public func synthesize(text: String) throws -> [Float] {
		do {
			let (samples, _) = try tts.generateAudio(
				voice: voiceEmbedding,
				language: language,
				text: text,
				speed: 1.0
			)
			return samples
		} catch {
			throw SynthesizerError.synthesisFailure(text, error)
		}
	}

	/// Sample rate of the generated audio.
	public static let sampleRate: Double = 24_000
}

Note: The import MLXUtilsLibrary for NpyzReader may need to be added depending on how KokoroSwift re-exports it. Adjust imports during implementation based on what compiles.

  • Step 5: Verify it compiles

Run: cd VorleserKit && swift build Expected: Compiles successfully. If KokoroSwift's API differs from what the research found, adjust the wrapper. The key integration points to verify:

  • KokoroTTS(modelPath:, g2p:) constructor

  • tts.generateAudio(voice:, language:, text:, speed:) method signature

  • NpyzReader import path

  • Step 6: Commit

git add VorleserKit/Sources/Synthesizer/ VorleserKit/Package.swift
git commit -m "add Synthesizer wrapper around KokoroSwift with voice pack support"

Chunk 4: AudioEngine

Task 7: AudioEngine playback + position tracking

Files:

  • Create: VorleserKit/Sources/AudioEngine/AudioEngine.swift

  • Create: VorleserKit/Sources/AudioEngine/PlaybackState.swift

  • Modify: VorleserKit/Package.swift — add AudioEngine target

  • Step 1: Add AudioEngine target to Package.swift

Add to targets:

.target(
	name: "AudioEngine",
	dependencies: ["VorleserKit", "Synthesizer", "BookParser"]
),

Add "AudioEngine" to VorleserKit target's dependencies.

  • Step 2: Create PlaybackState

Create VorleserKit/Sources/AudioEngine/PlaybackState.swift:

public enum PlaybackState: Sendable {
	case idle
	case synthesizing
	case playing
	case paused
}
  • Step 3: Implement AudioEngine

Create VorleserKit/Sources/AudioEngine/AudioEngine.swift:

import Foundation
import AVFoundation
import Observation
import BookParser
import VorleserKit
import Synthesizer

@Observable
@MainActor
public final class AudioEngine {
	public private(set) var currentPosition: CharacterOffset = 0
	public private(set) var state: PlaybackState = .idle

	private var avEngine: AVAudioEngine?
	private var playerNode: AVAudioPlayerNode?
	private var sentences: [Sentence] = []
	private var currentSentenceIndex: Int = 0
	private var synthesizer: Synthesizer?
	private var book: Book?
	private var nextBuffer: AVAudioPCMBuffer?
	private var playbackTask: Task<Void, Never>?

	public init() {}

	public func play(book: Book, from offset: CharacterOffset, using synthesizer: Synthesizer) async throws {
		stop()

		self.book = book
		self.synthesizer = synthesizer
		self.sentences = book.sentences

		guard let startIndex = book.sentenceIndex(containing: offset) ?? sentences.indices.first else {
			return
		}

		self.currentSentenceIndex = startIndex
		self.currentPosition = sentences[startIndex].range.lowerBound

		#if os(iOS)
		try AVAudioSession.sharedInstance().setCategory(.playback, mode: .spokenAudio)
		try AVAudioSession.sharedInstance().setActive(true)
		#endif

		let engine = AVAudioEngine()
		let player = AVAudioPlayerNode()
		engine.attach(player)

		let format = AVAudioFormat(standardFormatWithSampleRate: Synthesizer.sampleRate, channels: 1)!
		engine.connect(player, to: engine.mainMixerNode, format: format)
		try engine.start()
		player.play()

		self.avEngine = engine
		self.playerNode = player

		playbackTask = Task { [weak self] in
			await self?.playbackLoop()
		}
	}

	public func pause() {
		playerNode?.pause()
		state = .paused
	}

	public func resume() {
		playerNode?.play()
		state = .playing
	}

	public func stop() {
		playbackTask?.cancel()
		playbackTask = nil
		playerNode?.stop()
		avEngine?.stop()
		avEngine = nil
		playerNode = nil
		nextBuffer = nil
		state = .idle
	}

	public func skipForward() {
		guard currentSentenceIndex + 1 < sentences.count else { return }
		let nextIndex = currentSentenceIndex + 1
		playerNode?.stop()
		currentSentenceIndex = nextIndex
		currentPosition = sentences[nextIndex].range.lowerBound
		nextBuffer = nil

		playbackTask?.cancel()
		playbackTask = Task { [weak self] in
			await self?.playbackLoop()
		}
	}

	public func skipBackward() {
		guard currentSentenceIndex > 0 else { return }
		let prevIndex = currentSentenceIndex - 1
		playerNode?.stop()
		currentSentenceIndex = prevIndex
		currentPosition = sentences[prevIndex].range.lowerBound
		nextBuffer = nil

		playbackTask?.cancel()
		playbackTask = Task { [weak self] in
			await self?.playbackLoop()
		}
	}

	// MARK: - Private

	private func playbackLoop() async {
		guard let synthesizer, let playerNode else { return }

		while currentSentenceIndex < sentences.count {
			if Task.isCancelled { return }

			let sentence = sentences[currentSentenceIndex]
			currentPosition = sentence.range.lowerBound
			state = .synthesizing

			let buffer: AVAudioPCMBuffer
			if let prefetched = nextBuffer {
				buffer = prefetched
				nextBuffer = nil
			} else {
				do {
					let samples = try synthesizer.synthesize(text: sentence.text)
					buffer = Self.makePCMBuffer(from: samples)
				} catch {
					// Skip failed sentence
					currentSentenceIndex += 1
					continue
				}
			}

			state = .playing

			// Start prefetching next sentence
			let prefetchTask: Task<AVAudioPCMBuffer?, Never>? = {
				let nextIdx = currentSentenceIndex + 1
				guard nextIdx < sentences.count else { return nil }
				let nextText = sentences[nextIdx].text
				return Task.detached { [synthesizer] in
					guard let samples = try? synthesizer.synthesize(text: nextText) else { return nil }
					return Self.makePCMBuffer(from: samples)
				}
			}()

			// Play current buffer and wait for completion
			await withCheckedContinuation { continuation in
				playerNode.scheduleBuffer(buffer) {
					continuation.resume()
				}
			}

			if Task.isCancelled { return }

			// Collect prefetched buffer
			if let prefetchTask {
				nextBuffer = await prefetchTask.value
			}

			currentSentenceIndex += 1
		}

		state = .idle
	}

	private static func makePCMBuffer(from samples: [Float]) -> AVAudioPCMBuffer {
		let format = AVAudioFormat(standardFormatWithSampleRate: Synthesizer.sampleRate, channels: 1)!
		let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(samples.count))!
		buffer.frameLength = AVAudioFrameCount(samples.count)
		samples.withUnsafeBufferPointer { src in
			buffer.floatChannelData![0].update(from: src.baseAddress!, count: samples.count)
		}
		return buffer
	}
}
  • Step 4: Verify it compiles

Run: cd VorleserKit && swift build Expected: Compiles. The prefetch task closure may need adjustment based on Swift 6.2 concurrency rules — if there are Sendable issues, make Synthesizer conform to @unchecked Sendable and document why (MLX internals are thread-safe for inference).

  • Step 5: Commit
git add VorleserKit/Sources/AudioEngine/ VorleserKit/Package.swift
git commit -m "add AudioEngine with AVAudioEngine playback, one-ahead buffering, skip controls"

Chunk 5: XcodeGen Project + App Shells

Task 8: XcodeGen project configuration

Files:

  • Create: project.yml

  • Step 1: Create project.yml

name: Vorleser
options:
  bundleIdPrefix: de.felixfoertsch
  deploymentTarget:
    iOS: "18.0"
    macOS: "15.0"
  xcodeVersion: "16.0"
  createIntermediateGroups: true

packages:
  VorleserKit:
    path: VorleserKit

targets:
  Vorleser-iOS:
    type: application
    platform: iOS
    sources:
      - Vorleser-iOS
    dependencies:
      - package: VorleserKit
        product: VorleserKit
    settings:
      base:
        PRODUCT_BUNDLE_IDENTIFIER: de.felixfoertsch.vorleser
        INFOPLIST_VALUES: >-
          UIBackgroundModes=(audio);
          UILaunchScreen={};
          UIFileSharingEnabled=YES;
          LSSupportsOpeningDocumentsInPlace=YES;
    resources:
      - path: Resources/Models
        buildPhase: resources
      - path: Resources/Voices
        buildPhase: resources

  Vorleser-macOS:
    type: application
    platform: macOS
    sources:
      - Vorleser-macOS
    dependencies:
      - package: VorleserKit
        product: VorleserKit
    settings:
      base:
        PRODUCT_BUNDLE_IDENTIFIER: de.felixfoertsch.vorleser.mac
    resources:
      - path: Resources/Models
        buildPhase: resources
      - path: Resources/Voices
        buildPhase: resources
  • Step 2: Create shared Resources directories
mkdir -p Resources/Models Resources/Voices

Add .gitkeep files so the empty directories are tracked:

touch Resources/Models/.gitkeep Resources/Voices/.gitkeep
  • Step 3: Commit
git add project.yml Resources/
git commit -m "add XcodeGen project config with iOS and macOS targets, resource directories"

Task 9: iOS app shell

Files:

  • Create: Vorleser-iOS/VorleserApp.swift

  • Create: Vorleser-iOS/LibraryView.swift

  • Create: Vorleser-iOS/ReaderView.swift

  • Create: Vorleser-iOS/PlaybackControls.swift

  • Create: Vorleser-iOS/ReadingTextView.swift

  • Step 1: Create app entry point

Create Vorleser-iOS/VorleserApp.swift:

import SwiftUI
import SwiftData
import Storage

@main
struct VorleserApp: App {
	var body: some Scene {
		WindowGroup {
			LibraryView()
		}
		.modelContainer(for: StoredBook.self)
	}
}
  • Step 2: Create LibraryView

Create Vorleser-iOS/LibraryView.swift:

import SwiftUI
import SwiftData
import Storage
import BookParser

struct LibraryView: View {
	@Environment(\.modelContext) private var modelContext
	@Query(sort: \StoredBook.lastRead, order: .reverse) private var books: [StoredBook]
	@State private var showFileImporter = false

	private var bookStore: BookStore {
		BookStore(
			modelContainer: modelContext.container,
			documentsDirectory: FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
		)
	}

	var body: some View {
		NavigationStack {
			List {
				ForEach(books) { book in
					NavigationLink(value: book) {
						VStack(alignment: .leading) {
							Text(book.title)
								.font(.headline)
							if let author = book.author {
								Text(author)
									.font(.subheadline)
									.foregroundStyle(.secondary)
							}
						}
					}
				}
				.onDelete(perform: deleteBooks)
			}
			.navigationTitle("Library")
			.navigationDestination(for: StoredBook.self) { storedBook in
				ReaderView(storedBook: storedBook)
			}
			.toolbar {
				Button("Import", systemImage: "plus") {
					showFileImporter = true
				}
			}
			.fileImporter(
				isPresented: $showFileImporter,
				allowedContentTypes: [.epub, .plainText],
				allowsMultipleSelection: false
			) { result in
				handleImport(result)
			}
		}
	}

	private func handleImport(_ result: Result<[URL], Error>) {
		guard case .success(let urls) = result, let url = urls.first else { return }
		guard url.startAccessingSecurityScopedResource() else { return }
		defer { url.stopAccessingSecurityScopedResource() }

		Task {
			do {
				let parsed = try BookParser.parse(url: url)
				try bookStore.importBook(from: url, title: parsed.title, author: parsed.author)
			} catch {
				// Surface error to user (add alert state if needed)
				print("Import failed: \(error)")
			}
		}
	}

	private func deleteBooks(at offsets: IndexSet) {
		for index in offsets {
			try? bookStore.deleteBook(books[index])
		}
	}
}
  • Step 3: Create ReadingTextView (UITextView wrapper for tap-to-character)

Create Vorleser-iOS/ReadingTextView.swift:

import SwiftUI
import UIKit
import VorleserKit

struct ReadingTextView: UIViewRepresentable {
	let text: String
	let highlightedRange: Range<Int>?
	let onTapCharacter: (CharacterOffset) -> Void

	func makeUIView(context: Context) -> UITextView {
		let textView = UITextView()
		textView.isEditable = false
		textView.isSelectable = false
		textView.font = .preferredFont(forTextStyle: .body)
		textView.textContainerInset = UIEdgeInsets(top: 16, left: 16, bottom: 16, right: 16)

		let tap = UITapGestureRecognizer(target: context.coordinator, action: #selector(Coordinator.handleTap(_:)))
		textView.addGestureRecognizer(tap)

		return textView
	}

	func updateUIView(_ textView: UITextView, context: Context) {
		let attributed = NSMutableAttributedString(
			string: text,
			attributes: [
				.font: UIFont.preferredFont(forTextStyle: .body),
				.foregroundColor: UIColor.label,
			]
		)

		if let range = highlightedRange,
		   range.lowerBound >= 0,
		   range.upperBound <= text.count {
			let nsRange = NSRange(location: range.lowerBound, length: range.upperBound - range.lowerBound)
			attributed.addAttribute(.backgroundColor, value: UIColor.systemYellow.withAlphaComponent(0.3), range: nsRange)
		}

		textView.attributedText = attributed
	}

	func makeCoordinator() -> Coordinator {
		Coordinator(onTapCharacter: onTapCharacter)
	}

	class Coordinator: NSObject {
		let onTapCharacter: (CharacterOffset) -> Void

		init(onTapCharacter: @escaping (CharacterOffset) -> Void) {
			self.onTapCharacter = onTapCharacter
		}

		@objc func handleTap(_ gesture: UITapGestureRecognizer) {
			guard let textView = gesture.view as? UITextView else { return }
			let point = gesture.location(in: textView)
			// Use TextKit2-compatible API (TextKit1's layoutManager is deprecated on iOS 16+)
			let characterIndex = textView.offset(
				from: textView.beginningOfDocument,
				to: textView.closestPosition(to: point) ?? textView.beginningOfDocument
			)
			if characterIndex < textView.text.count {
				onTapCharacter(characterIndex)
			}
		}
	}
}
  • Step 4: Create PlaybackControls

Create Vorleser-iOS/PlaybackControls.swift:

import SwiftUI
import AudioEngine

struct PlaybackControls: View {
	@Bindable var engine: AudioEngine

	var body: some View {
		HStack(spacing: 32) {
			Button(action: { engine.skipBackward() }) {
				Image(systemName: "backward.fill")
					.font(.title2)
			}
			.disabled(engine.state == .idle)

			Button(action: togglePlayback) {
				Image(systemName: playButtonIcon)
					.font(.title)
			}

			Button(action: { engine.skipForward() }) {
				Image(systemName: "forward.fill")
					.font(.title2)
			}
			.disabled(engine.state == .idle)
		}
		.padding()
	}

	private var playButtonIcon: String {
		switch engine.state {
		case .playing: "pause.fill"
		case .synthesizing: "hourglass"
		case .paused: "play.fill"
		case .idle: "play.fill"
		}
	}

	private func togglePlayback() {
		switch engine.state {
		case .playing: engine.pause()
		case .paused: engine.resume()
		default: break
		}
	}
}
  • Step 5: Create ReaderView

Create Vorleser-iOS/ReaderView.swift:

import SwiftUI
import SwiftData
import Storage
import BookParser
import AudioEngine as AudioEngineModule
import Synthesizer as SynthesizerModule
import VorleserKit

struct ReaderView: View {
	let storedBook: StoredBook
	@State private var book: Book?
	@State private var error: String?
	@State private var engine = AudioEngine()
	@State private var synthesizer: SynthesizerModule.Synthesizer?
	@State private var selectedChapterIndex: Int = 0
	@Environment(\.modelContext) private var modelContext

	private var bookStore: BookStore {
		BookStore(
			modelContainer: modelContext.container,
			documentsDirectory: FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
		)
	}

	var body: some View {
		VStack {
			if let error {
				ContentUnavailableView("Error", systemImage: "exclamationmark.triangle", description: Text(error))
			} else if let book {
				chapterPicker(book: book)
				readingContent(book: book)
				PlaybackControls(engine: engine)
			} else {
				ProgressView("Loading…")
			}
		}
		.navigationTitle(storedBook.title)
		.task { await loadBook() }
		.onDisappear {
			engine.stop()
			try? bookStore.updatePosition(storedBook, position: engine.currentPosition)
		}
	}

	@ViewBuilder
	private func chapterPicker(book: Book) -> some View {
		if book.chapters.count > 1 {
			Picker("Chapter", selection: $selectedChapterIndex) {
				ForEach(book.chapters, id: \.index) { chapter in
					Text(chapter.title).tag(chapter.index)
				}
			}
			.pickerStyle(.menu)
			.padding(.horizontal)
		}
	}

	@ViewBuilder
	private func readingContent(book: Book) -> some View {
		let chapter = book.chapters[selectedChapterIndex]
		let highlightRange = currentSentenceRange(in: book)

		ReadingTextView(
			text: chapter.text,
			highlightedRange: highlightRange,
			onTapCharacter: { localOffset in
				let globalOffset = globalOffset(forLocalOffset: localOffset, in: book)
				Task {
					try await startPlayback(from: globalOffset, book: book)
				}
			}
		)
	}

	private func loadBook() async {
		let fileURL = bookStore.fileURL(for: storedBook)
		guard bookStore.fileExists(for: storedBook) else {
			error = "Book file is missing. Please re-import."
			return
		}
		do {
			self.book = try BookParser.parse(url: fileURL)
			// Restore position
			if let book, storedBook.lastPosition > 0 {
				if let (chIdx, _) = book.chapterAndLocalOffset(for: storedBook.lastPosition) {
					selectedChapterIndex = chIdx
				}
			}
			// Load synthesizer
			if let modelURL = Bundle.main.url(forResource: "kokoro-v1_0", withExtension: "safetensors"),
			   let voicesURL = Bundle.main.url(forResource: "voices", withExtension: "npz") {
				let voice = VoicePack.curated.first!
				self.synthesizer = try SynthesizerModule.Synthesizer(voice: voice, modelURL: modelURL, voicesURL: voicesURL)
			} else {
				error = "TTS model files not found in app bundle."
			}
		} catch {
			self.error = "Failed to load book: \(error)"
		}
	}

	private func startPlayback(from offset: CharacterOffset, book: Book) async throws {
		guard let synthesizer else { return }
		try await engine.play(book: book, from: offset, using: synthesizer)
	}

	private func globalOffset(forLocalOffset local: Int, in book: Book) -> CharacterOffset {
		var offset = 0
		for chapter in book.chapters where chapter.index < selectedChapterIndex {
			offset += chapter.text.count
		}
		return offset + local
	}

	private func currentSentenceRange(in book: Book) -> Range<Int>? {
		guard engine.state == .playing || engine.state == .synthesizing else { return nil }
		let sentences = book.sentences
		guard let idx = book.sentenceIndex(containing: engine.currentPosition) else { return nil }
		let sentence = sentences[idx]
		// Convert global range to local range within current chapter
		var chapterStart = 0
		for chapter in book.chapters where chapter.index < selectedChapterIndex {
			chapterStart += chapter.text.count
		}
		let localStart = sentence.range.lowerBound - chapterStart
		let localEnd = sentence.range.upperBound - chapterStart
		guard localStart >= 0 else { return nil }
		return localStart..<localEnd
	}
}
  • Step 6: Generate Xcode project and verify build
cd /Users/felixfoertsch/Developer/vorleser
xcodegen generate
open Vorleser.xcodeproj

Build the iOS target in Xcode. Fix any import or module visibility issues. Common issues:

  • Module names may conflict with type names (e.g. AudioEngine module vs AudioEngine class). Use module aliases in imports if needed.

  • SwiftData @Query requires the model container to be in the environment.

  • Step 7: Commit

git add Vorleser-iOS/
git commit -m "add iOS app shell with library, reader, tap-to-play, playback controls"

Task 10: macOS app shell

Files:

  • Create: Vorleser-macOS/VorleserMacApp.swift

  • Create: Vorleser-macOS/MacLibraryView.swift

  • Create: Vorleser-macOS/MacReaderView.swift

  • Create: Vorleser-macOS/MacReadingTextView.swift

  • Create: Vorleser-macOS/MacPlaybackControls.swift

  • Step 1: Create macOS app entry point

Create Vorleser-macOS/VorleserMacApp.swift:

import SwiftUI
import SwiftData
import Storage

@main
struct VorleserMacApp: App {
	var body: some Scene {
		WindowGroup {
			MacLibraryView()
		}
		.modelContainer(for: StoredBook.self)
	}
}
  • Step 2: Create MacLibraryView with sidebar layout

Create Vorleser-macOS/MacLibraryView.swift:

import SwiftUI
import SwiftData
import Storage
import BookParser

struct MacLibraryView: View {
	@Environment(\.modelContext) private var modelContext
	@Query(sort: \StoredBook.lastRead, order: .reverse) private var books: [StoredBook]
	@State private var selectedBook: StoredBook?
	@State private var showFileImporter = false

	private var bookStore: BookStore {
		BookStore(
			modelContainer: modelContext.container,
			documentsDirectory: FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
		)
	}

	var body: some View {
		NavigationSplitView {
			List(books, selection: $selectedBook) { book in
				VStack(alignment: .leading) {
					Text(book.title).font(.headline)
					if let author = book.author {
						Text(author).font(.subheadline).foregroundStyle(.secondary)
					}
				}
				.tag(book)
				.contextMenu {
					Button("Delete", role: .destructive) {
						try? bookStore.deleteBook(book)
					}
				}
			}
			.navigationTitle("Library")
			.toolbar {
				Button("Import", systemImage: "plus") {
					showFileImporter = true
				}
			}
			.fileImporter(
				isPresented: $showFileImporter,
				allowedContentTypes: [.epub, .plainText],
				allowsMultipleSelection: false
			) { result in
				handleImport(result)
			}
		} detail: {
			if let selectedBook {
				MacReaderView(storedBook: selectedBook)
			} else {
				ContentUnavailableView("Select a Book", systemImage: "book", description: Text("Choose a book from the sidebar or import one."))
			}
		}
	}

	private func handleImport(_ result: Result<[URL], Error>) {
		guard case .success(let urls) = result, let url = urls.first else { return }
		guard url.startAccessingSecurityScopedResource() else { return }
		defer { url.stopAccessingSecurityScopedResource() }

		Task {
			do {
				let parsed = try BookParser.parse(url: url)
				try bookStore.importBook(from: url, title: parsed.title, author: parsed.author)
			} catch {
				print("Import failed: \(error)")
			}
		}
	}
}
  • Step 3: Create MacReadingTextView (NSTextView wrapper)

Create Vorleser-macOS/MacReadingTextView.swift:

import SwiftUI
import AppKit
import VorleserKit

struct MacReadingTextView: NSViewRepresentable {
	let text: String
	let highlightedRange: Range<Int>?
	let onClickCharacter: (CharacterOffset) -> Void

	func makeNSView(context: Context) -> NSScrollView {
		let scrollView = NSTextView.scrollableTextView()
		let textView = scrollView.documentView as! NSTextView
		textView.isEditable = false
		textView.isSelectable = false
		textView.font = .preferredFont(forTextStyle: .body)
		textView.textContainerInset = NSSize(width: 16, height: 16)

		let click = NSClickGestureRecognizer(target: context.coordinator, action: #selector(Coordinator.handleClick(_:)))
		textView.addGestureRecognizer(click)
		context.coordinator.textView = textView

		return scrollView
	}

	func updateNSView(_ scrollView: NSScrollView, context: Context) {
		let textView = scrollView.documentView as! NSTextView
		let attributed = NSMutableAttributedString(
			string: text,
			attributes: [
				.font: NSFont.preferredFont(forTextStyle: .body),
				.foregroundColor: NSColor.textColor,
			]
		)

		if let range = highlightedRange,
		   range.lowerBound >= 0,
		   range.upperBound <= text.count {
			let nsRange = NSRange(location: range.lowerBound, length: range.upperBound - range.lowerBound)
			attributed.addAttribute(.backgroundColor, value: NSColor.systemYellow.withAlphaComponent(0.3), range: nsRange)
		}

		textView.textStorage?.setAttributedString(attributed)
	}

	func makeCoordinator() -> Coordinator {
		Coordinator(onClickCharacter: onClickCharacter)
	}

	class Coordinator: NSObject {
		weak var textView: NSTextView?
		let onClickCharacter: (CharacterOffset) -> Void

		init(onClickCharacter: @escaping (CharacterOffset) -> Void) {
			self.onClickCharacter = onClickCharacter
		}

		@objc func handleClick(_ gesture: NSClickGestureRecognizer) {
			guard let textView else { return }
			let point = gesture.location(in: textView)
			let characterIndex = textView.characterIndexForInsertion(at: point)
			if characterIndex < textView.string.count {
				onClickCharacter(characterIndex)
			}
		}
	}
}
  • Step 4: Create MacPlaybackControls

Create Vorleser-macOS/MacPlaybackControls.swift:

import SwiftUI
import AudioEngine

struct MacPlaybackControls: View {
	@Bindable var engine: AudioEngine

	var body: some View {
		HStack(spacing: 32) {
			Button(action: { engine.skipBackward() }) {
				Image(systemName: "backward.fill")
					.font(.title2)
			}
			.disabled(engine.state == .idle)

			Button(action: togglePlayback) {
				Image(systemName: playButtonIcon)
					.font(.title)
			}

			Button(action: { engine.skipForward() }) {
				Image(systemName: "forward.fill")
					.font(.title2)
			}
			.disabled(engine.state == .idle)
		}
		.padding()
	}

	private var playButtonIcon: String {
		switch engine.state {
		case .playing: "pause.fill"
		case .synthesizing: "hourglass"
		case .paused: "play.fill"
		case .idle: "play.fill"
		}
	}

	private func togglePlayback() {
		switch engine.state {
		case .playing: engine.pause()
		case .paused: engine.resume()
		default: break
		}
	}
}
  • Step 5: Create MacReaderView (full code — shares logic with iOS ReaderView but uses MacReadingTextView and MacPlaybackControls)

Create Vorleser-macOS/MacReaderView.swift:

import SwiftUI
import SwiftData
import Storage
import BookParser
import AudioEngine as AudioEngineModule
import Synthesizer as SynthesizerModule
import VorleserKit

struct MacReaderView: View {
	let storedBook: StoredBook
	@State private var book: Book?
	@State private var error: String?
	@State private var engine = AudioEngine()
	@State private var synthesizer: SynthesizerModule.Synthesizer?
	@State private var selectedChapterIndex: Int = 0
	@Environment(\.modelContext) private var modelContext

	private var bookStore: BookStore {
		BookStore(
			modelContainer: modelContext.container,
			documentsDirectory: FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
		)
	}

	var body: some View {
		VStack {
			if let error {
				ContentUnavailableView("Error", systemImage: "exclamationmark.triangle", description: Text(error))
			} else if let book {
				chapterPicker(book: book)
				readingContent(book: book)
				MacPlaybackControls(engine: engine)
			} else {
				ProgressView("Loading…")
			}
		}
		.navigationTitle(storedBook.title)
		.task { await loadBook() }
		.onDisappear {
			engine.stop()
			try? bookStore.updatePosition(storedBook, position: engine.currentPosition)
		}
	}

	@ViewBuilder
	private func chapterPicker(book: Book) -> some View {
		if book.chapters.count > 1 {
			Picker("Chapter", selection: $selectedChapterIndex) {
				ForEach(book.chapters, id: \.index) { chapter in
					Text(chapter.title).tag(chapter.index)
				}
			}
			.pickerStyle(.menu)
			.padding(.horizontal)
		}
	}

	@ViewBuilder
	private func readingContent(book: Book) -> some View {
		let chapter = book.chapters[selectedChapterIndex]
		let highlightRange = currentSentenceRange(in: book)

		MacReadingTextView(
			text: chapter.text,
			highlightedRange: highlightRange,
			onClickCharacter: { localOffset in
				let globalOffset = globalOffset(forLocalOffset: localOffset, in: book)
				Task {
					try await startPlayback(from: globalOffset, book: book)
				}
			}
		)
	}

	private func loadBook() async {
		let fileURL = bookStore.fileURL(for: storedBook)
		guard bookStore.fileExists(for: storedBook) else {
			error = "Book file is missing. Please re-import."
			return
		}
		do {
			self.book = try BookParser.parse(url: fileURL)
			if let book, storedBook.lastPosition > 0 {
				if let (chIdx, _) = book.chapterAndLocalOffset(for: storedBook.lastPosition) {
					selectedChapterIndex = chIdx
				}
			}
			if let modelURL = Bundle.main.url(forResource: "kokoro-v1_0", withExtension: "safetensors"),
			   let voicesURL = Bundle.main.url(forResource: "voices", withExtension: "npz") {
				let voice = VoicePack.curated.first!
				self.synthesizer = try SynthesizerModule.Synthesizer(voice: voice, modelURL: modelURL, voicesURL: voicesURL)
			} else {
				error = "TTS model files not found in app bundle."
			}
		} catch {
			self.error = "Failed to load book: \(error)"
		}
	}

	private func startPlayback(from offset: CharacterOffset, book: Book) async throws {
		guard let synthesizer else { return }
		try await engine.play(book: book, from: offset, using: synthesizer)
	}

	private func globalOffset(forLocalOffset local: Int, in book: Book) -> CharacterOffset {
		var offset = 0
		for chapter in book.chapters where chapter.index < selectedChapterIndex {
			offset += chapter.text.count
		}
		return offset + local
	}

	private func currentSentenceRange(in book: Book) -> Range<Int>? {
		guard engine.state == .playing || engine.state == .synthesizing else { return nil }
		let sentences = book.sentences
		guard let idx = book.sentenceIndex(containing: engine.currentPosition) else { return nil }
		let sentence = sentences[idx]
		var chapterStart = 0
		for chapter in book.chapters where chapter.index < selectedChapterIndex {
			chapterStart += chapter.text.count
		}
		let localStart = sentence.range.lowerBound - chapterStart
		let localEnd = sentence.range.upperBound - chapterStart
		guard localStart >= 0 else { return nil }
		return localStart..<localEnd
	}
}
  • Step 6: Generate and build
xcodegen generate

Build macOS target in Xcode. Fix any compilation issues.

  • Step 7: Commit
git add Vorleser-macOS/
git commit -m "add macOS app shell with sidebar library, reader, click-to-play"

Chunk 6: Integration + Polish

Task 11: Download model weights and verify end-to-end

  • Step 1: Add .gitignore for model weights (before downloading)

Add to .gitignore:

Resources/Models/*.safetensors
Resources/Voices/*.npz

Commit immediately so the large files cannot be accidentally staged:

git add .gitignore
git commit -m "add gitignore for model weights and voice embeddings"
  • Step 2: Download Kokoro model weights
cd Resources/Models
huggingface-cli download hexgrad/Kokoro-82M kokoro-v1_0.safetensors --local-dir .

If huggingface-cli is not installed: pip install huggingface-hub. Alternatively, download manually from https://huggingface.co/hexgrad/Kokoro-82M — look for kokoro-v1_0.safetensors (~600MB).

Verify: ls -lh Resources/Models/kokoro-v1_0.safetensors should show ~600MB.

  • Step 3: Download voice embeddings
cd Resources/Voices
huggingface-cli download hexgrad/Kokoro-82M voices/voices.npz --local-dir .

Or download from the kokoro-ios test app resources. The file is voices.npz (~14MB), containing 28 voice embeddings as numpy arrays.

Verify: ls -lh Resources/Voices/voices.npz should show ~14MB.

  • Step 4: Regenerate project and run on iOS device
xcodegen generate

Open in Xcode, select a real iOS device (not Simulator — MLX requires Metal), build and run. Test:

  1. Import a plain text file
  2. Tap a word → verify synthesis starts and audio plays
  3. Tap play/pause → verify controls work
  4. Close and reopen the book → verify position is restored
  • Step 5: Test on macOS

Build and run macOS target. Same test sequence as iOS. Verify sidebar layout works.

  • Step 6: Fix any issues found during testing

Address compilation errors, runtime crashes, audio quality issues. This is the integration debugging step — budget time for it.

  • Step 7: Commit working state
git add Vorleser-iOS/ Vorleser-macOS/ VorleserKit/ project.yml
git commit -m "verify end-to-end playback on iOS and macOS"

Task 12: CalVer version + final cleanup

  • Step 1: Set CalVer version in project.yml

Add MARKETING_VERSION and CURRENT_PROJECT_VERSION to each target's settings.base in project.yml:

  Vorleser-iOS:
    settings:
      base:
        MARKETING_VERSION: "2026.03.13"
        CURRENT_PROJECT_VERSION: "1"
        # ... existing settings ...

  Vorleser-macOS:
    settings:
      base:
        MARKETING_VERSION: "2026.03.13"
        CURRENT_PROJECT_VERSION: "1"
        # ... existing settings ...
  • Step 2: Remove old project files

Delete the old codebase directories that are no longer used:

git rm -r Vorleser/ VorleserMac/ Vendor/

Keep tools/kokoro_coreml/ and autoaudiobook/ — they contain reference documentation and learnings.

  • Step 3: Final commit
git add project.yml
git commit -m "set CalVer 2026.03.13, remove legacy code"