64 KiB
Vorleser Greenfield Implementation Plan
For agentic workers: REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (
- [ ]) syntax for tracking.
Goal: Build a macOS + iOS app that reads EPUB and plain text books aloud using on-device Kokoro TTS via MLX Swift, with tap-to-play-from-here and position memory.
Architecture: VorleserKit Swift Package contains all logic (parsing, synthesis, playback, storage). Thin SwiftUI app shells for iOS and macOS. KokoroSwift handles TTS inference, MisakiSwift handles phonemization — both pulled in as SPM dependencies.
Tech Stack: Swift 6.2, KokoroSwift (MLX), MisakiSwift, SwiftData, ZIPFoundation, SwiftSoup, XcodeGen
Spec: docs/superpowers/specs/2026-03-13-vorleser-greenfield-design.md
Chunk 1: Project Skeleton + BookParser
Task 1: Project scaffold
Files:
-
Create:
VorleserKit/Package.swift -
Create:
VorleserKit/Sources/VorleserKit/VorleserKit.swift(shared types) -
Create:
VorleserKit/Sources/BookParser/BookParser.swift -
Create:
VorleserKit/Sources/BookParser/Book.swift -
Create:
VorleserKit/Sources/BookParser/Chapter.swift -
Create:
VorleserKit/Sources/BookParser/EPUBParser.swift -
Create:
VorleserKit/Sources/BookParser/PlainTextParser.swift -
Create:
VorleserKit/Sources/VorleserKit/SentenceSegmenter.swift -
Create:
VorleserKit/Sources/VorleserKit/Sentence.swift -
Create:
VorleserKit/Tests/BookParserTests/BookParserTests.swift -
Create:
VorleserKit/Tests/VorleserKitTests/SentenceSegmenterTests.swift -
Create:
project.yml -
Step 1: Create Package.swift
// swift-tools-version: 6.2
import PackageDescription
let package = Package(
name: "VorleserKit",
platforms: [
.iOS(.v18),
.macOS(.v15),
],
products: [
.library(name: "VorleserKit", targets: ["VorleserKit"]),
.library(name: "BookParser", targets: ["BookParser"]),
],
dependencies: [
.package(url: "https://github.com/weichsel/ZIPFoundation.git", from: "0.9.0"),
.package(url: "https://github.com/scinfu/SwiftSoup.git", from: "2.7.0"),
],
targets: [
.target(
name: "VorleserKit",
dependencies: ["BookParser"]
),
.target(
name: "BookParser",
dependencies: ["ZIPFoundation", "SwiftSoup"]
),
.testTarget(
name: "BookParserTests",
dependencies: ["BookParser"],
resources: [.copy("Fixtures")]
),
.testTarget(
name: "VorleserKitTests",
dependencies: ["VorleserKit"]
),
]
)
Note: Synthesizer and AudioEngine targets are added in later tasks. Start lean — only what's needed now.
- Step 2: Create shared types
Create VorleserKit/Sources/VorleserKit/VorleserKit.swift:
/// A position in a book, measured in characters from the start of the first chapter.
public typealias CharacterOffset = Int
Create VorleserKit/Sources/VorleserKit/Sentence.swift:
public struct Sentence: Sendable {
public let text: String
public let range: Range<CharacterOffset>
public init(text: String, range: Range<CharacterOffset>) {
self.text = text
self.range = range
}
}
Create VorleserKit/Sources/VorleserKit/SentenceSegmenter.swift:
import NaturalLanguage
public struct SentenceSegmenter: Sendable {
public static func segment(_ text: String, globalOffset: CharacterOffset = 0) -> [Sentence] {
let tokenizer = NLTokenizer(unit: .sentence)
tokenizer.string = text
var sentences: [Sentence] = []
tokenizer.enumerateTokens(in: text.startIndex..<text.endIndex) { range, _ in
let sentenceText = String(text[range]).trimmingCharacters(in: .whitespacesAndNewlines)
guard !sentenceText.isEmpty else { return true }
let start = text.distance(from: text.startIndex, to: range.lowerBound) + globalOffset
let end = text.distance(from: text.startIndex, to: range.upperBound) + globalOffset
sentences.append(Sentence(text: sentenceText, range: start..<end))
return true
}
return sentences
}
}
- Step 3: Create Book and Chapter types
Create VorleserKit/Sources/BookParser/Chapter.swift:
public struct Chapter: Sendable {
public let index: Int
public let title: String
public let text: String
public init(index: Int, title: String, text: String) {
self.index = index
self.title = title
self.text = text
}
}
Create VorleserKit/Sources/BookParser/Book.swift:
import VorleserKit
public struct Book: Sendable {
public let id: UUID
public let title: String
public let author: String?
public let chapters: [Chapter]
public init(id: UUID = UUID(), title: String, author: String?, chapters: [Chapter]) {
self.id = id
self.title = title
self.author = author
self.chapters = chapters
}
/// All sentences across all chapters, with global character offsets.
public var sentences: [Sentence] {
var result: [Sentence] = []
var offset: CharacterOffset = 0
for chapter in chapters {
let chapterSentences = SentenceSegmenter.segment(chapter.text, globalOffset: offset)
result.append(contentsOf: chapterSentences)
offset += chapter.text.count
}
return result
}
/// Returns the sentence index containing the given global character offset.
public func sentenceIndex(containing offset: CharacterOffset) -> Int? {
let allSentences = sentences
return allSentences.firstIndex { $0.range.contains(offset) }
}
/// Maps a global character offset to (chapter index, local offset within chapter).
public func chapterAndLocalOffset(for globalOffset: CharacterOffset) -> (chapterIndex: Int, localOffset: Int)? {
var offset = 0
for chapter in chapters {
let chapterEnd = offset + chapter.text.count
if globalOffset < chapterEnd {
return (chapter.index, globalOffset - offset)
}
offset = chapterEnd
}
return nil
}
/// Total character count across all chapters.
public var totalCharacters: Int {
chapters.reduce(0) { $0 + $1.text.count }
}
}
Note: sentences is a computed property (recomputed each call). This is fine for sentence navigation — AudioEngine will cache the result once per playback session. Avoids the lazy var on struct issue flagged in spec review.
- Step 4: Write SentenceSegmenter tests
Create VorleserKit/Tests/VorleserKitTests/SentenceSegmenterTests.swift:
import Testing
@testable import VorleserKit
@Suite("SentenceSegmenter")
struct SentenceSegmenterTests {
@Test func segmentsSimpleSentences() {
let text = "Hello world. How are you? I am fine."
let sentences = SentenceSegmenter.segment(text)
#expect(sentences.count == 3)
#expect(sentences[0].text == "Hello world.")
#expect(sentences[1].text == "How are you?")
#expect(sentences[2].text == "I am fine.")
}
@Test func handlesAbbreviations() {
let text = "Dr. Smith went to Washington. He arrived at 3 p.m."
let sentences = SentenceSegmenter.segment(text)
// NLTokenizer should handle "Dr." without splitting
#expect(sentences.count == 2)
}
@Test func appliesGlobalOffset() {
let text = "First sentence. Second sentence."
let sentences = SentenceSegmenter.segment(text, globalOffset: 100)
#expect(sentences[0].range.lowerBound >= 100)
}
@Test func handlesEmptyText() {
let sentences = SentenceSegmenter.segment("")
#expect(sentences.isEmpty)
}
@Test func handlesSingleSentence() {
let text = "Just one sentence."
let sentences = SentenceSegmenter.segment(text)
#expect(sentences.count == 1)
#expect(sentences[0].text == "Just one sentence.")
}
}
- Step 5: Run tests to verify they pass
Run: cd VorleserKit && swift test --filter SentenceSegmenterTests
Expected: All 5 tests pass. If handlesAbbreviations fails (NLTokenizer splits on "Dr."), adjust the expectation — document the actual NLTokenizer behavior rather than fighting it.
- Step 6: Commit
git add VorleserKit/
git commit -m "scaffold VorleserKit package with shared types, sentence segmenter, tests"
Task 2: EPUB parser
Files:
-
Create:
VorleserKit/Sources/BookParser/EPUBParser.swift -
Create:
VorleserKit/Tests/BookParserTests/EPUBParserTests.swift -
Create:
VorleserKit/Tests/BookParserTests/Fixtures/(test EPUB) -
Step 1: Create a minimal test EPUB fixture
Create a minimal valid EPUB for testing. An EPUB is a ZIP with specific structure:
cd VorleserKit/Tests/BookParserTests
mkdir -p Fixtures
Write a script that creates a minimal EPUB file programmatically. The EPUB must contain:
META-INF/container.xmlpointing tocontent.opfcontent.opfwith manifest + spine (2 chapters)chapter1.xhtmlwith<h1>Chapter One</h1><p>This is the first chapter.</p>chapter2.xhtmlwith<h1>Chapter Two</h1><p>This is the second chapter.</p>
Save as Fixtures/test.epub.
- Step 2: Write EPUBParser tests
Create VorleserKit/Tests/BookParserTests/EPUBParserTests.swift:
import Testing
import Foundation
@testable import BookParser
@Suite("EPUBParser")
struct EPUBParserTests {
let fixtureURL: URL = {
Bundle.module.url(forResource: "test", withExtension: "epub", subdirectory: "Fixtures")!
}()
@Test func parsesTestEPUB() throws {
let book = try EPUBParser.parse(url: fixtureURL)
#expect(book.chapters.count == 2)
#expect(book.chapters[0].title == "Chapter One")
#expect(book.chapters[0].text.contains("first chapter"))
#expect(book.chapters[1].title == "Chapter Two")
}
@Test func extractsTitle() throws {
let book = try EPUBParser.parse(url: fixtureURL)
#expect(!book.title.isEmpty)
}
@Test func throwsOnInvalidFile() {
let badURL = URL(fileURLWithPath: "/tmp/nonexistent.epub")
#expect(throws: EPUBParserError.self) {
try EPUBParser.parse(url: badURL)
}
}
@Test func chaptersHaveSequentialIndices() throws {
let book = try EPUBParser.parse(url: fixtureURL)
for (i, chapter) in book.chapters.enumerated() {
#expect(chapter.index == i)
}
}
}
- Step 3: Run tests to verify they fail
Run: cd VorleserKit && swift test --filter EPUBParserTests
Expected: FAIL — EPUBParser doesn't exist yet.
- Step 4: Implement EPUBParser
Create VorleserKit/Sources/BookParser/EPUBParser.swift:
import Foundation
import ZIPFoundation
import SwiftSoup
public enum EPUBParserError: Error, CustomStringConvertible {
case cannotOpenArchive(URL)
case missingContainerXML
case missingOPF(String)
case malformedOPF
case noSpineItems
public var description: String {
switch self {
case .cannotOpenArchive(let url): "cannot open EPUB archive at \(url.path)"
case .missingContainerXML: "missing META-INF/container.xml"
case .missingOPF(let path): "missing OPF file at \(path)"
case .malformedOPF: "malformed OPF (package document)"
case .noSpineItems: "EPUB has no spine items"
}
}
}
public struct EPUBParser {
public static func parse(url: URL) throws -> Book {
guard let archive = Archive(url: url, accessMode: .read) else {
throw EPUBParserError.cannotOpenArchive(url)
}
// 1. Find OPF path from container.xml
let opfPath = try findOPFPath(in: archive)
// 2. Parse OPF to get manifest + spine
let opfDir = (opfPath as NSString).deletingLastPathComponent
let opfData = try extractData(from: archive, path: opfPath)
let (title, author, manifest, spine) = try parseOPF(data: opfData)
guard !spine.isEmpty else { throw EPUBParserError.noSpineItems }
// 3. Extract chapters from spine
var chapters: [Chapter] = []
for (index, itemRef) in spine.enumerated() {
guard let href = manifest[itemRef] else { continue }
let fullPath = opfDir.isEmpty ? href : "\(opfDir)/\(href)"
let chapterTitle: String
let chapterText: String
do {
let htmlData = try extractData(from: archive, path: fullPath)
let html = String(data: htmlData, encoding: .utf8) ?? ""
let doc = try SwiftSoup.parse(html)
chapterTitle = try doc.select("h1, h2, h3, title").first()?.text() ?? "Chapter \(index + 1)"
let body = try doc.body()?.text() ?? ""
chapterText = normalizeWhitespace(body)
} catch {
// Broken chapter — include with empty text per spec
chapterTitle = "Chapter \(index + 1) (parse error)"
chapterText = ""
}
chapters.append(Chapter(index: index, title: chapterTitle, text: chapterText))
}
return Book(
title: title ?? url.deletingPathExtension().lastPathComponent,
author: author,
chapters: chapters
)
}
// MARK: - Private
private static func findOPFPath(in archive: Archive) throws -> String {
let containerData = try extractData(from: archive, path: "META-INF/container.xml")
let parser = ContainerXMLParser(data: containerData)
guard let opfPath = parser.parse() else {
throw EPUBParserError.missingContainerXML
}
return opfPath
}
private static func parseOPF(data: Data) throws -> (title: String?, author: String?, manifest: [String: String], spine: [String]) {
let parser = OPFParser(data: data)
guard let result = parser.parse() else {
throw EPUBParserError.malformedOPF
}
return result
}
private static func extractData(from archive: Archive, path: String) throws -> Data {
guard let entry = archive[path] else {
throw EPUBParserError.missingOPF(path)
}
var data = Data()
_ = try archive.extract(entry) { chunk in
data.append(chunk)
}
return data
}
private static func normalizeWhitespace(_ text: String) -> String {
text.components(separatedBy: .whitespacesAndNewlines)
.filter { !$0.isEmpty }
.joined(separator: " ")
}
}
// MARK: - XML Parsers
/// Parses META-INF/container.xml to find the OPF path.
private class ContainerXMLParser: NSObject, XMLParserDelegate {
private let data: Data
private var opfPath: String?
init(data: Data) {
self.data = data
}
func parse() -> String? {
let parser = XMLParser(data: data)
parser.delegate = self
parser.parse()
return opfPath
}
func parser(_ parser: XMLParser, didStartElement element: String, namespaceURI: String?,
qualifiedName: String?, attributes: [String: String]) {
if element == "rootfile", let path = attributes["full-path"] {
opfPath = path
}
}
}
/// Parses the OPF (package document) for title, author, manifest, and spine.
private class OPFParser: NSObject, XMLParserDelegate {
private let data: Data
private var title: String?
private var author: String?
private var manifest: [String: String] = [:] // id → href
private var spine: [String] = [] // ordered item refs
private var currentElement = ""
private var currentText = ""
private var inMetadata = false
init(data: Data) {
self.data = data
}
func parse() -> (String?, String?, [String: String], [String])? {
let parser = XMLParser(data: data)
parser.delegate = self
parser.parse()
return (title, author, manifest, spine)
}
func parser(_ parser: XMLParser, didStartElement element: String, namespaceURI: String?,
qualifiedName: String?, attributes: [String: String]) {
let localName = element.components(separatedBy: ":").last ?? element
currentElement = localName
currentText = ""
switch localName {
case "metadata":
inMetadata = true
case "item":
if let id = attributes["id"], let href = attributes["href"] {
manifest[id] = href
}
case "itemref":
if let idref = attributes["idref"] {
spine.append(idref)
}
default:
break
}
}
func parser(_ parser: XMLParser, foundCharacters string: String) {
currentText += string
}
func parser(_ parser: XMLParser, didEndElement element: String, namespaceURI: String?,
qualifiedName: String?) {
let localName = element.components(separatedBy: ":").last ?? element
if inMetadata {
let trimmed = currentText.trimmingCharacters(in: .whitespacesAndNewlines)
if localName == "title" && title == nil && !trimmed.isEmpty {
title = trimmed
} else if localName == "creator" && author == nil && !trimmed.isEmpty {
author = trimmed
} else if localName == "metadata" {
inMetadata = false
}
}
}
}
- Step 5: Run tests to verify they pass
Run: cd VorleserKit && swift test --filter EPUBParserTests
Expected: All 4 tests pass.
- Step 6: Commit
git add VorleserKit/Sources/BookParser/ VorleserKit/Tests/BookParserTests/
git commit -m "add EPUB parser with ZIP extraction, OPF/spine parsing, HTML-to-text"
Task 3: Plain text parser
Files:
-
Create:
VorleserKit/Sources/BookParser/PlainTextParser.swift -
Create:
VorleserKit/Tests/BookParserTests/PlainTextParserTests.swift -
Step 1: Write tests
Create VorleserKit/Tests/BookParserTests/PlainTextParserTests.swift:
import Testing
import Foundation
@testable import BookParser
@Suite("PlainTextParser")
struct PlainTextParserTests {
@Test func parsesMultipleChapters() throws {
let text = "First chapter content.\n\nSecond chapter content.\n\nThird chapter."
let book = PlainTextParser.parse(text: text, title: "Test Book")
#expect(book.chapters.count == 3)
#expect(book.chapters[0].text == "First chapter content.")
#expect(book.chapters[1].text == "Second chapter content.")
}
@Test func parsesSingleParagraphAsOneChapter() {
let text = "Just a single paragraph with no double newlines."
let book = PlainTextParser.parse(text: text, title: "Test")
#expect(book.chapters.count == 1)
#expect(book.chapters[0].text == text)
}
@Test func setsTitle() {
let book = PlainTextParser.parse(text: "Hello", title: "My Book")
#expect(book.title == "My Book")
}
@Test func parsesFromFile() throws {
let tmpFile = URL(fileURLWithPath: NSTemporaryDirectory()).appendingPathComponent("test.txt")
try "Line one.\n\nLine two.".write(to: tmpFile, atomically: true, encoding: .utf8)
defer { try? FileManager.default.removeItem(at: tmpFile) }
let book = try PlainTextParser.parse(url: tmpFile)
#expect(book.chapters.count == 2)
#expect(book.title == "test")
}
}
- Step 2: Run to verify failure
Run: cd VorleserKit && swift test --filter PlainTextParserTests
Expected: FAIL — PlainTextParser doesn't exist.
- Step 3: Implement PlainTextParser
Create VorleserKit/Sources/BookParser/PlainTextParser.swift:
import Foundation
public struct PlainTextParser {
public static func parse(text: String, title: String, author: String? = nil) -> Book {
let paragraphs = text.components(separatedBy: "\n\n")
.map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
.filter { !$0.isEmpty }
let chapters = paragraphs.enumerated().map { index, text in
Chapter(index: index, title: "Section \(index + 1)", text: text)
}
return Book(title: title, author: author, chapters: chapters)
}
public static func parse(url: URL) throws -> Book {
let text = try String(contentsOf: url, encoding: .utf8)
let title = url.deletingPathExtension().lastPathComponent
return parse(text: text, title: title)
}
}
- Step 4: Run tests
Run: cd VorleserKit && swift test --filter PlainTextParserTests
Expected: All 4 tests pass.
- Step 5: Commit
git add VorleserKit/Sources/BookParser/PlainTextParser.swift VorleserKit/Tests/BookParserTests/PlainTextParserTests.swift
git commit -m "add plain text parser with paragraph-based chapter splitting"
Task 4: BookParser facade + Book integration tests
Files:
-
Create:
VorleserKit/Sources/BookParser/BookParser.swift -
Create:
VorleserKit/Tests/BookParserTests/BookTests.swift -
Step 1: Write BookParser facade tests
Create VorleserKit/Tests/BookParserTests/BookTests.swift:
import Testing
import Foundation
@testable import BookParser
@testable import VorleserKit
@Suite("BookParser")
struct BookParserFacadeTests {
@Test func detectsEPUBByExtension() throws {
let url = Bundle.module.url(forResource: "test", withExtension: "epub", subdirectory: "Fixtures")!
let book = try BookParser.parse(url: url)
#expect(book.chapters.count == 2)
}
@Test func detectsTextByExtension() throws {
let tmpFile = URL(fileURLWithPath: NSTemporaryDirectory()).appendingPathComponent("test.txt")
try "Paragraph one.\n\nParagraph two.".write(to: tmpFile, atomically: true, encoding: .utf8)
defer { try? FileManager.default.removeItem(at: tmpFile) }
let book = try BookParser.parse(url: tmpFile)
#expect(book.chapters.count == 2)
}
}
@Suite("Book character addressing")
struct BookCharacterAddressingTests {
let book = Book(
title: "Test",
author: nil,
chapters: [
Chapter(index: 0, title: "Ch 1", text: "Hello world."), // 12 chars
Chapter(index: 1, title: "Ch 2", text: "Second chapter."), // 15 chars
]
)
@Test func totalCharacters() {
#expect(book.totalCharacters == 27)
}
@Test func chapterAndLocalOffset() {
// Offset 0 → chapter 0, local 0
let first = book.chapterAndLocalOffset(for: 0)
#expect(first?.chapterIndex == 0)
#expect(first?.localOffset == 0)
// Offset 12 → chapter 1, local 0
let second = book.chapterAndLocalOffset(for: 12)
#expect(second?.chapterIndex == 1)
#expect(second?.localOffset == 0)
}
@Test func sentenceIndex() {
let idx = book.sentenceIndex(containing: 0)
#expect(idx != nil)
}
@Test func outOfRangeReturnsNil() {
#expect(book.chapterAndLocalOffset(for: 9999) == nil)
}
}
- Step 2: Implement BookParser facade
Create VorleserKit/Sources/BookParser/BookParser.swift:
import Foundation
public enum BookParserError: Error, CustomStringConvertible {
case unsupportedFormat(String)
public var description: String {
switch self {
case .unsupportedFormat(let ext): "unsupported file format: \(ext)"
}
}
}
public struct BookParser {
public static func parse(url: URL) throws -> Book {
switch url.pathExtension.lowercased() {
case "epub":
return try EPUBParser.parse(url: url)
case "txt", "text":
return try PlainTextParser.parse(url: url)
default:
throw BookParserError.unsupportedFormat(url.pathExtension)
}
}
}
- Step 3: Run all BookParser tests
Run: cd VorleserKit && swift test --filter "BookParser|Book"
Expected: All tests pass.
- Step 4: Commit
git add VorleserKit/Sources/BookParser/BookParser.swift VorleserKit/Tests/BookParserTests/BookTests.swift
git commit -m "add BookParser facade with format detection, Book character addressing tests"
Chunk 2: Storage
Task 5: SwiftData storage
Files:
-
Create:
VorleserKit/Sources/Storage/StoredBook.swift -
Create:
VorleserKit/Sources/Storage/BookStore.swift -
Create:
VorleserKit/Tests/StorageTests/BookStoreTests.swift -
Modify:
VorleserKit/Package.swift— add Storage target -
Step 1: Add Storage target to Package.swift
Add to the targets array in Package.swift:
.target(
name: "Storage",
dependencies: []
),
.testTarget(
name: "StorageTests",
dependencies: ["Storage"]
),
Add "Storage" to the VorleserKit target's dependencies.
- Step 2: Write StoredBook model
Create VorleserKit/Sources/Storage/StoredBook.swift:
import Foundation
import SwiftData
@Model
public class StoredBook {
public var bookID: UUID
public var title: String
public var author: String?
public var sourceFileName: String
public var dateAdded: Date
public var lastPosition: Int
public var lastRead: Date?
public var voiceName: String?
public init(
bookID: UUID = UUID(),
title: String,
author: String? = nil,
sourceFileName: String,
dateAdded: Date = .now,
lastPosition: Int = 0,
lastRead: Date? = nil,
voiceName: String? = nil
) {
self.bookID = bookID
self.title = title
self.author = author
self.sourceFileName = sourceFileName
self.dateAdded = dateAdded
self.lastPosition = lastPosition
self.lastRead = lastRead
self.voiceName = voiceName
}
}
- Step 3: Write BookStore
Create VorleserKit/Sources/Storage/BookStore.swift:
import Foundation
import SwiftData
public struct BookStore {
private let modelContainer: ModelContainer
private let documentsDirectory: URL
public init(modelContainer: ModelContainer, documentsDirectory: URL) {
self.modelContainer = modelContainer
self.documentsDirectory = documentsDirectory
}
@MainActor
public func importBook(from sourceURL: URL, title: String, author: String?) throws -> StoredBook {
let fileName = "\(UUID().uuidString)_\(sourceURL.lastPathComponent)"
let destination = documentsDirectory.appendingPathComponent(fileName)
try FileManager.default.copyItem(at: sourceURL, to: destination)
let stored = StoredBook(title: title, author: author, sourceFileName: fileName)
let context = modelContainer.mainContext
context.insert(stored)
try context.save()
return stored
}
@MainActor
public func allBooks() throws -> [StoredBook] {
let context = modelContainer.mainContext
let descriptor = FetchDescriptor<StoredBook>(
sortBy: [SortDescriptor(\.lastRead, order: .reverse), SortDescriptor(\.dateAdded, order: .reverse)]
)
return try context.fetch(descriptor)
}
@MainActor
public func updatePosition(_ book: StoredBook, position: Int) throws {
book.lastPosition = position
book.lastRead = .now
try modelContainer.mainContext.save()
}
@MainActor
public func deleteBook(_ book: StoredBook) throws {
let filePath = documentsDirectory.appendingPathComponent(book.sourceFileName)
try? FileManager.default.removeItem(at: filePath)
modelContainer.mainContext.delete(book)
try modelContainer.mainContext.save()
}
public func fileURL(for book: StoredBook) -> URL {
documentsDirectory.appendingPathComponent(book.sourceFileName)
}
public func fileExists(for book: StoredBook) -> Bool {
FileManager.default.fileExists(atPath: fileURL(for: book).path)
}
}
- Step 4: Write tests
Create VorleserKit/Tests/StorageTests/BookStoreTests.swift:
import Testing
import Foundation
import SwiftData
@testable import Storage
@Suite("BookStore")
struct BookStoreTests {
@MainActor
func makeStore() throws -> (BookStore, URL) {
let config = ModelConfiguration(isStoredInMemoryOnly: true)
let container = try ModelContainer(for: StoredBook.self, configurations: config)
let tmpDir = URL(fileURLWithPath: NSTemporaryDirectory())
.appendingPathComponent(UUID().uuidString)
try FileManager.default.createDirectory(at: tmpDir, withIntermediateDirectories: true)
return (BookStore(modelContainer: container, documentsDirectory: tmpDir), tmpDir)
}
@Test @MainActor func importAndList() throws {
let (store, tmpDir) = try makeStore()
defer { try? FileManager.default.removeItem(at: tmpDir) }
// Create a fake source file
let sourceFile = tmpDir.appendingPathComponent("source.epub")
try "fake epub".write(to: sourceFile, atomically: true, encoding: .utf8)
let stored = try store.importBook(from: sourceFile, title: "Test Book", author: "Author")
#expect(stored.title == "Test Book")
#expect(stored.lastPosition == 0)
let books = try store.allBooks()
#expect(books.count == 1)
#expect(store.fileExists(for: books[0]))
}
@Test @MainActor func updatePosition() throws {
let (store, tmpDir) = try makeStore()
defer { try? FileManager.default.removeItem(at: tmpDir) }
let sourceFile = tmpDir.appendingPathComponent("source.txt")
try "text".write(to: sourceFile, atomically: true, encoding: .utf8)
let stored = try store.importBook(from: sourceFile, title: "Book", author: nil)
try store.updatePosition(stored, position: 500)
#expect(stored.lastPosition == 500)
#expect(stored.lastRead != nil)
}
@Test @MainActor func deleteBook() throws {
let (store, tmpDir) = try makeStore()
defer { try? FileManager.default.removeItem(at: tmpDir) }
let sourceFile = tmpDir.appendingPathComponent("source.txt")
try "text".write(to: sourceFile, atomically: true, encoding: .utf8)
let stored = try store.importBook(from: sourceFile, title: "Book", author: nil)
#expect(store.fileExists(for: stored))
try store.deleteBook(stored)
let books = try store.allBooks()
#expect(books.isEmpty)
}
}
- Step 5: Run tests
Run: cd VorleserKit && swift test --filter StorageTests
Expected: All 3 tests pass.
- Step 6: Commit
git add VorleserKit/Sources/Storage/ VorleserKit/Tests/StorageTests/ VorleserKit/Package.swift
git commit -m "add SwiftData storage with BookStore for library management, position tracking"
Chunk 3: Synthesizer
Task 6: Synthesizer wrapper around KokoroSwift
Files:
- Create:
VorleserKit/Sources/Synthesizer/Synthesizer.swift - Create:
VorleserKit/Sources/Synthesizer/VoicePack.swift - Create:
VorleserKit/Sources/Synthesizer/SynthesizerError.swift - Modify:
VorleserKit/Package.swift— add Synthesizer target + KokoroSwift dependency
Note: The Synthesizer wraps KokoroSwift. Tests for this module require a real device with MLX Metal support — they cannot run in the iOS Simulator. Write integration tests that are clearly marked and can be skipped in CI.
- Step 1: Add KokoroSwift dependency and Synthesizer target to Package.swift
Add to dependencies array:
.package(url: "https://github.com/mlalma/kokoro-ios.git", exact: "1.0.6"),
Note: Pin the exact version to match MisakiSwift's expectations. Check the latest tag on the repo and use that.
Add to targets array:
.target(
name: "Synthesizer",
dependencies: [
.product(name: "KokoroSwift", package: "kokoro-ios"),
]
),
Add "Synthesizer" to VorleserKit target's dependencies.
- Step 2: Create VoicePack
Create VorleserKit/Sources/Synthesizer/VoicePack.swift:
import Foundation
public struct VoicePack: Sendable, Identifiable {
public let name: String
public let displayName: String
public let language: Language
public var id: String { name }
public enum Language: String, Sendable {
case enUS = "en-us"
case enGB = "en-gb"
}
/// Curated voices bundled with the app.
/// The app shell must bundle `voices.npz` containing these voice embeddings.
public static let curated: [VoicePack] = [
VoicePack(name: "af_heart", displayName: "Heart", language: .enUS),
VoicePack(name: "af_bella", displayName: "Bella", language: .enUS),
VoicePack(name: "am_michael", displayName: "Michael", language: .enUS),
]
}
- Step 3: Create SynthesizerError
Create VorleserKit/Sources/Synthesizer/SynthesizerError.swift:
public enum SynthesizerError: Error, CustomStringConvertible {
case modelNotFound(String)
case voicesNotFound(String)
case voiceNotAvailable(String)
case synthesisFailure(String, Error)
public var description: String {
switch self {
case .modelNotFound(let path): "kokoro model not found at \(path)"
case .voicesNotFound(let path): "voices.npz not found at \(path)"
case .voiceNotAvailable(let name): "voice '\(name)' not found in voices.npz"
case .synthesisFailure(let text, let error): "synthesis failed for '\(text.prefix(50))...': \(error)"
}
}
}
- Step 4: Implement Synthesizer
Create VorleserKit/Sources/Synthesizer/Synthesizer.swift:
import Foundation
import KokoroSwift
import MLX
public final class Synthesizer: Sendable {
private let tts: KokoroTTS
private let voiceEmbedding: MLXArray
private let language: KokoroSwift.Language
private let voicePack: VoicePack
/// Initialize the synthesizer with a specific voice.
/// - Parameters:
/// - voice: The voice pack to use.
/// - modelURL: URL to `kokoro-v1_0.safetensors`.
/// - voicesURL: URL to `voices.npz`.
public init(voice: VoicePack, modelURL: URL, voicesURL: URL) throws {
guard FileManager.default.fileExists(atPath: modelURL.path) else {
throw SynthesizerError.modelNotFound(modelURL.path)
}
guard FileManager.default.fileExists(atPath: voicesURL.path) else {
throw SynthesizerError.voicesNotFound(voicesURL.path)
}
self.tts = KokoroTTS(modelPath: modelURL, g2p: .misaki)
// Load voice embedding from voices.npz
guard let voices = NpyzReader.read(fileFromPath: voicesURL),
let embedding = voices["\(voice.name).npy"] else {
throw SynthesizerError.voiceNotAvailable(voice.name)
}
self.voiceEmbedding = embedding
self.voicePack = voice
self.language = voice.language == .enUS ? .enUS : .enGB
}
/// Synthesize a sentence to PCM audio.
/// - Parameter text: Sentence-length text to synthesize.
/// - Returns: PCM Float32 samples at 24,000 Hz, mono.
public func synthesize(text: String) throws -> [Float] {
do {
let (samples, _) = try tts.generateAudio(
voice: voiceEmbedding,
language: language,
text: text,
speed: 1.0
)
return samples
} catch {
throw SynthesizerError.synthesisFailure(text, error)
}
}
/// Sample rate of the generated audio.
public static let sampleRate: Double = 24_000
}
Note: The import MLXUtilsLibrary for NpyzReader may need to be added depending on how KokoroSwift re-exports it. Adjust imports during implementation based on what compiles.
- Step 5: Verify it compiles
Run: cd VorleserKit && swift build
Expected: Compiles successfully. If KokoroSwift's API differs from what the research found, adjust the wrapper. The key integration points to verify:
-
KokoroTTS(modelPath:, g2p:)constructor -
tts.generateAudio(voice:, language:, text:, speed:)method signature -
NpyzReaderimport path -
Step 6: Commit
git add VorleserKit/Sources/Synthesizer/ VorleserKit/Package.swift
git commit -m "add Synthesizer wrapper around KokoroSwift with voice pack support"
Chunk 4: AudioEngine
Task 7: AudioEngine playback + position tracking
Files:
-
Create:
VorleserKit/Sources/AudioEngine/AudioEngine.swift -
Create:
VorleserKit/Sources/AudioEngine/PlaybackState.swift -
Modify:
VorleserKit/Package.swift— add AudioEngine target -
Step 1: Add AudioEngine target to Package.swift
Add to targets:
.target(
name: "AudioEngine",
dependencies: ["VorleserKit", "Synthesizer", "BookParser"]
),
Add "AudioEngine" to VorleserKit target's dependencies.
- Step 2: Create PlaybackState
Create VorleserKit/Sources/AudioEngine/PlaybackState.swift:
public enum PlaybackState: Sendable {
case idle
case synthesizing
case playing
case paused
}
- Step 3: Implement AudioEngine
Create VorleserKit/Sources/AudioEngine/AudioEngine.swift:
import Foundation
import AVFoundation
import Observation
import BookParser
import VorleserKit
import Synthesizer
@Observable
@MainActor
public final class AudioEngine {
public private(set) var currentPosition: CharacterOffset = 0
public private(set) var state: PlaybackState = .idle
private var avEngine: AVAudioEngine?
private var playerNode: AVAudioPlayerNode?
private var sentences: [Sentence] = []
private var currentSentenceIndex: Int = 0
private var synthesizer: Synthesizer?
private var book: Book?
private var nextBuffer: AVAudioPCMBuffer?
private var playbackTask: Task<Void, Never>?
public init() {}
public func play(book: Book, from offset: CharacterOffset, using synthesizer: Synthesizer) async throws {
stop()
self.book = book
self.synthesizer = synthesizer
self.sentences = book.sentences
guard let startIndex = book.sentenceIndex(containing: offset) ?? sentences.indices.first else {
return
}
self.currentSentenceIndex = startIndex
self.currentPosition = sentences[startIndex].range.lowerBound
#if os(iOS)
try AVAudioSession.sharedInstance().setCategory(.playback, mode: .spokenAudio)
try AVAudioSession.sharedInstance().setActive(true)
#endif
let engine = AVAudioEngine()
let player = AVAudioPlayerNode()
engine.attach(player)
let format = AVAudioFormat(standardFormatWithSampleRate: Synthesizer.sampleRate, channels: 1)!
engine.connect(player, to: engine.mainMixerNode, format: format)
try engine.start()
player.play()
self.avEngine = engine
self.playerNode = player
playbackTask = Task { [weak self] in
await self?.playbackLoop()
}
}
public func pause() {
playerNode?.pause()
state = .paused
}
public func resume() {
playerNode?.play()
state = .playing
}
public func stop() {
playbackTask?.cancel()
playbackTask = nil
playerNode?.stop()
avEngine?.stop()
avEngine = nil
playerNode = nil
nextBuffer = nil
state = .idle
}
public func skipForward() {
guard currentSentenceIndex + 1 < sentences.count else { return }
let nextIndex = currentSentenceIndex + 1
playerNode?.stop()
currentSentenceIndex = nextIndex
currentPosition = sentences[nextIndex].range.lowerBound
nextBuffer = nil
playbackTask?.cancel()
playbackTask = Task { [weak self] in
await self?.playbackLoop()
}
}
public func skipBackward() {
guard currentSentenceIndex > 0 else { return }
let prevIndex = currentSentenceIndex - 1
playerNode?.stop()
currentSentenceIndex = prevIndex
currentPosition = sentences[prevIndex].range.lowerBound
nextBuffer = nil
playbackTask?.cancel()
playbackTask = Task { [weak self] in
await self?.playbackLoop()
}
}
// MARK: - Private
private func playbackLoop() async {
guard let synthesizer, let playerNode else { return }
while currentSentenceIndex < sentences.count {
if Task.isCancelled { return }
let sentence = sentences[currentSentenceIndex]
currentPosition = sentence.range.lowerBound
state = .synthesizing
let buffer: AVAudioPCMBuffer
if let prefetched = nextBuffer {
buffer = prefetched
nextBuffer = nil
} else {
do {
let samples = try synthesizer.synthesize(text: sentence.text)
buffer = Self.makePCMBuffer(from: samples)
} catch {
// Skip failed sentence
currentSentenceIndex += 1
continue
}
}
state = .playing
// Start prefetching next sentence
let prefetchTask: Task<AVAudioPCMBuffer?, Never>? = {
let nextIdx = currentSentenceIndex + 1
guard nextIdx < sentences.count else { return nil }
let nextText = sentences[nextIdx].text
return Task.detached { [synthesizer] in
guard let samples = try? synthesizer.synthesize(text: nextText) else { return nil }
return Self.makePCMBuffer(from: samples)
}
}()
// Play current buffer and wait for completion
await withCheckedContinuation { continuation in
playerNode.scheduleBuffer(buffer) {
continuation.resume()
}
}
if Task.isCancelled { return }
// Collect prefetched buffer
if let prefetchTask {
nextBuffer = await prefetchTask.value
}
currentSentenceIndex += 1
}
state = .idle
}
private static func makePCMBuffer(from samples: [Float]) -> AVAudioPCMBuffer {
let format = AVAudioFormat(standardFormatWithSampleRate: Synthesizer.sampleRate, channels: 1)!
let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(samples.count))!
buffer.frameLength = AVAudioFrameCount(samples.count)
samples.withUnsafeBufferPointer { src in
buffer.floatChannelData![0].update(from: src.baseAddress!, count: samples.count)
}
return buffer
}
}
- Step 4: Verify it compiles
Run: cd VorleserKit && swift build
Expected: Compiles. The prefetch task closure may need adjustment based on Swift 6.2 concurrency rules — if there are Sendable issues, make Synthesizer conform to @unchecked Sendable and document why (MLX internals are thread-safe for inference).
- Step 5: Commit
git add VorleserKit/Sources/AudioEngine/ VorleserKit/Package.swift
git commit -m "add AudioEngine with AVAudioEngine playback, one-ahead buffering, skip controls"
Chunk 5: XcodeGen Project + App Shells
Task 8: XcodeGen project configuration
Files:
-
Create:
project.yml -
Step 1: Create project.yml
name: Vorleser
options:
bundleIdPrefix: de.felixfoertsch
deploymentTarget:
iOS: "18.0"
macOS: "15.0"
xcodeVersion: "16.0"
createIntermediateGroups: true
packages:
VorleserKit:
path: VorleserKit
targets:
Vorleser-iOS:
type: application
platform: iOS
sources:
- Vorleser-iOS
dependencies:
- package: VorleserKit
product: VorleserKit
settings:
base:
PRODUCT_BUNDLE_IDENTIFIER: de.felixfoertsch.vorleser
INFOPLIST_VALUES: >-
UIBackgroundModes=(audio);
UILaunchScreen={};
UIFileSharingEnabled=YES;
LSSupportsOpeningDocumentsInPlace=YES;
resources:
- path: Resources/Models
buildPhase: resources
- path: Resources/Voices
buildPhase: resources
Vorleser-macOS:
type: application
platform: macOS
sources:
- Vorleser-macOS
dependencies:
- package: VorleserKit
product: VorleserKit
settings:
base:
PRODUCT_BUNDLE_IDENTIFIER: de.felixfoertsch.vorleser.mac
resources:
- path: Resources/Models
buildPhase: resources
- path: Resources/Voices
buildPhase: resources
- Step 2: Create shared Resources directories
mkdir -p Resources/Models Resources/Voices
Add .gitkeep files so the empty directories are tracked:
touch Resources/Models/.gitkeep Resources/Voices/.gitkeep
- Step 3: Commit
git add project.yml Resources/
git commit -m "add XcodeGen project config with iOS and macOS targets, resource directories"
Task 9: iOS app shell
Files:
-
Create:
Vorleser-iOS/VorleserApp.swift -
Create:
Vorleser-iOS/LibraryView.swift -
Create:
Vorleser-iOS/ReaderView.swift -
Create:
Vorleser-iOS/PlaybackControls.swift -
Create:
Vorleser-iOS/ReadingTextView.swift -
Step 1: Create app entry point
Create Vorleser-iOS/VorleserApp.swift:
import SwiftUI
import SwiftData
import Storage
@main
struct VorleserApp: App {
var body: some Scene {
WindowGroup {
LibraryView()
}
.modelContainer(for: StoredBook.self)
}
}
- Step 2: Create LibraryView
Create Vorleser-iOS/LibraryView.swift:
import SwiftUI
import SwiftData
import Storage
import BookParser
struct LibraryView: View {
@Environment(\.modelContext) private var modelContext
@Query(sort: \StoredBook.lastRead, order: .reverse) private var books: [StoredBook]
@State private var showFileImporter = false
private var bookStore: BookStore {
BookStore(
modelContainer: modelContext.container,
documentsDirectory: FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
)
}
var body: some View {
NavigationStack {
List {
ForEach(books) { book in
NavigationLink(value: book) {
VStack(alignment: .leading) {
Text(book.title)
.font(.headline)
if let author = book.author {
Text(author)
.font(.subheadline)
.foregroundStyle(.secondary)
}
}
}
}
.onDelete(perform: deleteBooks)
}
.navigationTitle("Library")
.navigationDestination(for: StoredBook.self) { storedBook in
ReaderView(storedBook: storedBook)
}
.toolbar {
Button("Import", systemImage: "plus") {
showFileImporter = true
}
}
.fileImporter(
isPresented: $showFileImporter,
allowedContentTypes: [.epub, .plainText],
allowsMultipleSelection: false
) { result in
handleImport(result)
}
}
}
private func handleImport(_ result: Result<[URL], Error>) {
guard case .success(let urls) = result, let url = urls.first else { return }
guard url.startAccessingSecurityScopedResource() else { return }
defer { url.stopAccessingSecurityScopedResource() }
Task {
do {
let parsed = try BookParser.parse(url: url)
try bookStore.importBook(from: url, title: parsed.title, author: parsed.author)
} catch {
// Surface error to user (add alert state if needed)
print("Import failed: \(error)")
}
}
}
private func deleteBooks(at offsets: IndexSet) {
for index in offsets {
try? bookStore.deleteBook(books[index])
}
}
}
- Step 3: Create ReadingTextView (UITextView wrapper for tap-to-character)
Create Vorleser-iOS/ReadingTextView.swift:
import SwiftUI
import UIKit
import VorleserKit
struct ReadingTextView: UIViewRepresentable {
let text: String
let highlightedRange: Range<Int>?
let onTapCharacter: (CharacterOffset) -> Void
func makeUIView(context: Context) -> UITextView {
let textView = UITextView()
textView.isEditable = false
textView.isSelectable = false
textView.font = .preferredFont(forTextStyle: .body)
textView.textContainerInset = UIEdgeInsets(top: 16, left: 16, bottom: 16, right: 16)
let tap = UITapGestureRecognizer(target: context.coordinator, action: #selector(Coordinator.handleTap(_:)))
textView.addGestureRecognizer(tap)
return textView
}
func updateUIView(_ textView: UITextView, context: Context) {
let attributed = NSMutableAttributedString(
string: text,
attributes: [
.font: UIFont.preferredFont(forTextStyle: .body),
.foregroundColor: UIColor.label,
]
)
if let range = highlightedRange,
range.lowerBound >= 0,
range.upperBound <= text.count {
let nsRange = NSRange(location: range.lowerBound, length: range.upperBound - range.lowerBound)
attributed.addAttribute(.backgroundColor, value: UIColor.systemYellow.withAlphaComponent(0.3), range: nsRange)
}
textView.attributedText = attributed
}
func makeCoordinator() -> Coordinator {
Coordinator(onTapCharacter: onTapCharacter)
}
class Coordinator: NSObject {
let onTapCharacter: (CharacterOffset) -> Void
init(onTapCharacter: @escaping (CharacterOffset) -> Void) {
self.onTapCharacter = onTapCharacter
}
@objc func handleTap(_ gesture: UITapGestureRecognizer) {
guard let textView = gesture.view as? UITextView else { return }
let point = gesture.location(in: textView)
// Use TextKit2-compatible API (TextKit1's layoutManager is deprecated on iOS 16+)
let characterIndex = textView.offset(
from: textView.beginningOfDocument,
to: textView.closestPosition(to: point) ?? textView.beginningOfDocument
)
if characterIndex < textView.text.count {
onTapCharacter(characterIndex)
}
}
}
}
- Step 4: Create PlaybackControls
Create Vorleser-iOS/PlaybackControls.swift:
import SwiftUI
import AudioEngine
struct PlaybackControls: View {
@Bindable var engine: AudioEngine
var body: some View {
HStack(spacing: 32) {
Button(action: { engine.skipBackward() }) {
Image(systemName: "backward.fill")
.font(.title2)
}
.disabled(engine.state == .idle)
Button(action: togglePlayback) {
Image(systemName: playButtonIcon)
.font(.title)
}
Button(action: { engine.skipForward() }) {
Image(systemName: "forward.fill")
.font(.title2)
}
.disabled(engine.state == .idle)
}
.padding()
}
private var playButtonIcon: String {
switch engine.state {
case .playing: "pause.fill"
case .synthesizing: "hourglass"
case .paused: "play.fill"
case .idle: "play.fill"
}
}
private func togglePlayback() {
switch engine.state {
case .playing: engine.pause()
case .paused: engine.resume()
default: break
}
}
}
- Step 5: Create ReaderView
Create Vorleser-iOS/ReaderView.swift:
import SwiftUI
import SwiftData
import Storage
import BookParser
import AudioEngine as AudioEngineModule
import Synthesizer as SynthesizerModule
import VorleserKit
struct ReaderView: View {
let storedBook: StoredBook
@State private var book: Book?
@State private var error: String?
@State private var engine = AudioEngine()
@State private var synthesizer: SynthesizerModule.Synthesizer?
@State private var selectedChapterIndex: Int = 0
@Environment(\.modelContext) private var modelContext
private var bookStore: BookStore {
BookStore(
modelContainer: modelContext.container,
documentsDirectory: FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
)
}
var body: some View {
VStack {
if let error {
ContentUnavailableView("Error", systemImage: "exclamationmark.triangle", description: Text(error))
} else if let book {
chapterPicker(book: book)
readingContent(book: book)
PlaybackControls(engine: engine)
} else {
ProgressView("Loading…")
}
}
.navigationTitle(storedBook.title)
.task { await loadBook() }
.onDisappear {
engine.stop()
try? bookStore.updatePosition(storedBook, position: engine.currentPosition)
}
}
@ViewBuilder
private func chapterPicker(book: Book) -> some View {
if book.chapters.count > 1 {
Picker("Chapter", selection: $selectedChapterIndex) {
ForEach(book.chapters, id: \.index) { chapter in
Text(chapter.title).tag(chapter.index)
}
}
.pickerStyle(.menu)
.padding(.horizontal)
}
}
@ViewBuilder
private func readingContent(book: Book) -> some View {
let chapter = book.chapters[selectedChapterIndex]
let highlightRange = currentSentenceRange(in: book)
ReadingTextView(
text: chapter.text,
highlightedRange: highlightRange,
onTapCharacter: { localOffset in
let globalOffset = globalOffset(forLocalOffset: localOffset, in: book)
Task {
try await startPlayback(from: globalOffset, book: book)
}
}
)
}
private func loadBook() async {
let fileURL = bookStore.fileURL(for: storedBook)
guard bookStore.fileExists(for: storedBook) else {
error = "Book file is missing. Please re-import."
return
}
do {
self.book = try BookParser.parse(url: fileURL)
// Restore position
if let book, storedBook.lastPosition > 0 {
if let (chIdx, _) = book.chapterAndLocalOffset(for: storedBook.lastPosition) {
selectedChapterIndex = chIdx
}
}
// Load synthesizer
if let modelURL = Bundle.main.url(forResource: "kokoro-v1_0", withExtension: "safetensors"),
let voicesURL = Bundle.main.url(forResource: "voices", withExtension: "npz") {
let voice = VoicePack.curated.first!
self.synthesizer = try SynthesizerModule.Synthesizer(voice: voice, modelURL: modelURL, voicesURL: voicesURL)
} else {
error = "TTS model files not found in app bundle."
}
} catch {
self.error = "Failed to load book: \(error)"
}
}
private func startPlayback(from offset: CharacterOffset, book: Book) async throws {
guard let synthesizer else { return }
try await engine.play(book: book, from: offset, using: synthesizer)
}
private func globalOffset(forLocalOffset local: Int, in book: Book) -> CharacterOffset {
var offset = 0
for chapter in book.chapters where chapter.index < selectedChapterIndex {
offset += chapter.text.count
}
return offset + local
}
private func currentSentenceRange(in book: Book) -> Range<Int>? {
guard engine.state == .playing || engine.state == .synthesizing else { return nil }
let sentences = book.sentences
guard let idx = book.sentenceIndex(containing: engine.currentPosition) else { return nil }
let sentence = sentences[idx]
// Convert global range to local range within current chapter
var chapterStart = 0
for chapter in book.chapters where chapter.index < selectedChapterIndex {
chapterStart += chapter.text.count
}
let localStart = sentence.range.lowerBound - chapterStart
let localEnd = sentence.range.upperBound - chapterStart
guard localStart >= 0 else { return nil }
return localStart..<localEnd
}
}
- Step 6: Generate Xcode project and verify build
cd /Users/felixfoertsch/Developer/vorleser
xcodegen generate
open Vorleser.xcodeproj
Build the iOS target in Xcode. Fix any import or module visibility issues. Common issues:
-
Module names may conflict with type names (e.g.
AudioEnginemodule vsAudioEngineclass). Use module aliases in imports if needed. -
SwiftData
@Queryrequires the model container to be in the environment. -
Step 7: Commit
git add Vorleser-iOS/
git commit -m "add iOS app shell with library, reader, tap-to-play, playback controls"
Task 10: macOS app shell
Files:
-
Create:
Vorleser-macOS/VorleserMacApp.swift -
Create:
Vorleser-macOS/MacLibraryView.swift -
Create:
Vorleser-macOS/MacReaderView.swift -
Create:
Vorleser-macOS/MacReadingTextView.swift -
Create:
Vorleser-macOS/MacPlaybackControls.swift -
Step 1: Create macOS app entry point
Create Vorleser-macOS/VorleserMacApp.swift:
import SwiftUI
import SwiftData
import Storage
@main
struct VorleserMacApp: App {
var body: some Scene {
WindowGroup {
MacLibraryView()
}
.modelContainer(for: StoredBook.self)
}
}
- Step 2: Create MacLibraryView with sidebar layout
Create Vorleser-macOS/MacLibraryView.swift:
import SwiftUI
import SwiftData
import Storage
import BookParser
struct MacLibraryView: View {
@Environment(\.modelContext) private var modelContext
@Query(sort: \StoredBook.lastRead, order: .reverse) private var books: [StoredBook]
@State private var selectedBook: StoredBook?
@State private var showFileImporter = false
private var bookStore: BookStore {
BookStore(
modelContainer: modelContext.container,
documentsDirectory: FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
)
}
var body: some View {
NavigationSplitView {
List(books, selection: $selectedBook) { book in
VStack(alignment: .leading) {
Text(book.title).font(.headline)
if let author = book.author {
Text(author).font(.subheadline).foregroundStyle(.secondary)
}
}
.tag(book)
.contextMenu {
Button("Delete", role: .destructive) {
try? bookStore.deleteBook(book)
}
}
}
.navigationTitle("Library")
.toolbar {
Button("Import", systemImage: "plus") {
showFileImporter = true
}
}
.fileImporter(
isPresented: $showFileImporter,
allowedContentTypes: [.epub, .plainText],
allowsMultipleSelection: false
) { result in
handleImport(result)
}
} detail: {
if let selectedBook {
MacReaderView(storedBook: selectedBook)
} else {
ContentUnavailableView("Select a Book", systemImage: "book", description: Text("Choose a book from the sidebar or import one."))
}
}
}
private func handleImport(_ result: Result<[URL], Error>) {
guard case .success(let urls) = result, let url = urls.first else { return }
guard url.startAccessingSecurityScopedResource() else { return }
defer { url.stopAccessingSecurityScopedResource() }
Task {
do {
let parsed = try BookParser.parse(url: url)
try bookStore.importBook(from: url, title: parsed.title, author: parsed.author)
} catch {
print("Import failed: \(error)")
}
}
}
}
- Step 3: Create MacReadingTextView (NSTextView wrapper)
Create Vorleser-macOS/MacReadingTextView.swift:
import SwiftUI
import AppKit
import VorleserKit
struct MacReadingTextView: NSViewRepresentable {
let text: String
let highlightedRange: Range<Int>?
let onClickCharacter: (CharacterOffset) -> Void
func makeNSView(context: Context) -> NSScrollView {
let scrollView = NSTextView.scrollableTextView()
let textView = scrollView.documentView as! NSTextView
textView.isEditable = false
textView.isSelectable = false
textView.font = .preferredFont(forTextStyle: .body)
textView.textContainerInset = NSSize(width: 16, height: 16)
let click = NSClickGestureRecognizer(target: context.coordinator, action: #selector(Coordinator.handleClick(_:)))
textView.addGestureRecognizer(click)
context.coordinator.textView = textView
return scrollView
}
func updateNSView(_ scrollView: NSScrollView, context: Context) {
let textView = scrollView.documentView as! NSTextView
let attributed = NSMutableAttributedString(
string: text,
attributes: [
.font: NSFont.preferredFont(forTextStyle: .body),
.foregroundColor: NSColor.textColor,
]
)
if let range = highlightedRange,
range.lowerBound >= 0,
range.upperBound <= text.count {
let nsRange = NSRange(location: range.lowerBound, length: range.upperBound - range.lowerBound)
attributed.addAttribute(.backgroundColor, value: NSColor.systemYellow.withAlphaComponent(0.3), range: nsRange)
}
textView.textStorage?.setAttributedString(attributed)
}
func makeCoordinator() -> Coordinator {
Coordinator(onClickCharacter: onClickCharacter)
}
class Coordinator: NSObject {
weak var textView: NSTextView?
let onClickCharacter: (CharacterOffset) -> Void
init(onClickCharacter: @escaping (CharacterOffset) -> Void) {
self.onClickCharacter = onClickCharacter
}
@objc func handleClick(_ gesture: NSClickGestureRecognizer) {
guard let textView else { return }
let point = gesture.location(in: textView)
let characterIndex = textView.characterIndexForInsertion(at: point)
if characterIndex < textView.string.count {
onClickCharacter(characterIndex)
}
}
}
}
- Step 4: Create MacPlaybackControls
Create Vorleser-macOS/MacPlaybackControls.swift:
import SwiftUI
import AudioEngine
struct MacPlaybackControls: View {
@Bindable var engine: AudioEngine
var body: some View {
HStack(spacing: 32) {
Button(action: { engine.skipBackward() }) {
Image(systemName: "backward.fill")
.font(.title2)
}
.disabled(engine.state == .idle)
Button(action: togglePlayback) {
Image(systemName: playButtonIcon)
.font(.title)
}
Button(action: { engine.skipForward() }) {
Image(systemName: "forward.fill")
.font(.title2)
}
.disabled(engine.state == .idle)
}
.padding()
}
private var playButtonIcon: String {
switch engine.state {
case .playing: "pause.fill"
case .synthesizing: "hourglass"
case .paused: "play.fill"
case .idle: "play.fill"
}
}
private func togglePlayback() {
switch engine.state {
case .playing: engine.pause()
case .paused: engine.resume()
default: break
}
}
}
- Step 5: Create MacReaderView (full code — shares logic with iOS ReaderView but uses MacReadingTextView and MacPlaybackControls)
Create Vorleser-macOS/MacReaderView.swift:
import SwiftUI
import SwiftData
import Storage
import BookParser
import AudioEngine as AudioEngineModule
import Synthesizer as SynthesizerModule
import VorleserKit
struct MacReaderView: View {
let storedBook: StoredBook
@State private var book: Book?
@State private var error: String?
@State private var engine = AudioEngine()
@State private var synthesizer: SynthesizerModule.Synthesizer?
@State private var selectedChapterIndex: Int = 0
@Environment(\.modelContext) private var modelContext
private var bookStore: BookStore {
BookStore(
modelContainer: modelContext.container,
documentsDirectory: FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
)
}
var body: some View {
VStack {
if let error {
ContentUnavailableView("Error", systemImage: "exclamationmark.triangle", description: Text(error))
} else if let book {
chapterPicker(book: book)
readingContent(book: book)
MacPlaybackControls(engine: engine)
} else {
ProgressView("Loading…")
}
}
.navigationTitle(storedBook.title)
.task { await loadBook() }
.onDisappear {
engine.stop()
try? bookStore.updatePosition(storedBook, position: engine.currentPosition)
}
}
@ViewBuilder
private func chapterPicker(book: Book) -> some View {
if book.chapters.count > 1 {
Picker("Chapter", selection: $selectedChapterIndex) {
ForEach(book.chapters, id: \.index) { chapter in
Text(chapter.title).tag(chapter.index)
}
}
.pickerStyle(.menu)
.padding(.horizontal)
}
}
@ViewBuilder
private func readingContent(book: Book) -> some View {
let chapter = book.chapters[selectedChapterIndex]
let highlightRange = currentSentenceRange(in: book)
MacReadingTextView(
text: chapter.text,
highlightedRange: highlightRange,
onClickCharacter: { localOffset in
let globalOffset = globalOffset(forLocalOffset: localOffset, in: book)
Task {
try await startPlayback(from: globalOffset, book: book)
}
}
)
}
private func loadBook() async {
let fileURL = bookStore.fileURL(for: storedBook)
guard bookStore.fileExists(for: storedBook) else {
error = "Book file is missing. Please re-import."
return
}
do {
self.book = try BookParser.parse(url: fileURL)
if let book, storedBook.lastPosition > 0 {
if let (chIdx, _) = book.chapterAndLocalOffset(for: storedBook.lastPosition) {
selectedChapterIndex = chIdx
}
}
if let modelURL = Bundle.main.url(forResource: "kokoro-v1_0", withExtension: "safetensors"),
let voicesURL = Bundle.main.url(forResource: "voices", withExtension: "npz") {
let voice = VoicePack.curated.first!
self.synthesizer = try SynthesizerModule.Synthesizer(voice: voice, modelURL: modelURL, voicesURL: voicesURL)
} else {
error = "TTS model files not found in app bundle."
}
} catch {
self.error = "Failed to load book: \(error)"
}
}
private func startPlayback(from offset: CharacterOffset, book: Book) async throws {
guard let synthesizer else { return }
try await engine.play(book: book, from: offset, using: synthesizer)
}
private func globalOffset(forLocalOffset local: Int, in book: Book) -> CharacterOffset {
var offset = 0
for chapter in book.chapters where chapter.index < selectedChapterIndex {
offset += chapter.text.count
}
return offset + local
}
private func currentSentenceRange(in book: Book) -> Range<Int>? {
guard engine.state == .playing || engine.state == .synthesizing else { return nil }
let sentences = book.sentences
guard let idx = book.sentenceIndex(containing: engine.currentPosition) else { return nil }
let sentence = sentences[idx]
var chapterStart = 0
for chapter in book.chapters where chapter.index < selectedChapterIndex {
chapterStart += chapter.text.count
}
let localStart = sentence.range.lowerBound - chapterStart
let localEnd = sentence.range.upperBound - chapterStart
guard localStart >= 0 else { return nil }
return localStart..<localEnd
}
}
- Step 6: Generate and build
xcodegen generate
Build macOS target in Xcode. Fix any compilation issues.
- Step 7: Commit
git add Vorleser-macOS/
git commit -m "add macOS app shell with sidebar library, reader, click-to-play"
Chunk 6: Integration + Polish
Task 11: Download model weights and verify end-to-end
- Step 1: Add .gitignore for model weights (before downloading)
Add to .gitignore:
Resources/Models/*.safetensors
Resources/Voices/*.npz
Commit immediately so the large files cannot be accidentally staged:
git add .gitignore
git commit -m "add gitignore for model weights and voice embeddings"
- Step 2: Download Kokoro model weights
cd Resources/Models
huggingface-cli download hexgrad/Kokoro-82M kokoro-v1_0.safetensors --local-dir .
If huggingface-cli is not installed: pip install huggingface-hub. Alternatively, download manually from https://huggingface.co/hexgrad/Kokoro-82M — look for kokoro-v1_0.safetensors (~600MB).
Verify: ls -lh Resources/Models/kokoro-v1_0.safetensors should show ~600MB.
- Step 3: Download voice embeddings
cd Resources/Voices
huggingface-cli download hexgrad/Kokoro-82M voices/voices.npz --local-dir .
Or download from the kokoro-ios test app resources. The file is voices.npz (~14MB), containing 28 voice embeddings as numpy arrays.
Verify: ls -lh Resources/Voices/voices.npz should show ~14MB.
- Step 4: Regenerate project and run on iOS device
xcodegen generate
Open in Xcode, select a real iOS device (not Simulator — MLX requires Metal), build and run. Test:
- Import a plain text file
- Tap a word → verify synthesis starts and audio plays
- Tap play/pause → verify controls work
- Close and reopen the book → verify position is restored
- Step 5: Test on macOS
Build and run macOS target. Same test sequence as iOS. Verify sidebar layout works.
- Step 6: Fix any issues found during testing
Address compilation errors, runtime crashes, audio quality issues. This is the integration debugging step — budget time for it.
- Step 7: Commit working state
git add Vorleser-iOS/ Vorleser-macOS/ VorleserKit/ project.yml
git commit -m "verify end-to-end playback on iOS and macOS"
Task 12: CalVer version + final cleanup
- Step 1: Set CalVer version in project.yml
Add MARKETING_VERSION and CURRENT_PROJECT_VERSION to each target's settings.base in project.yml:
Vorleser-iOS:
settings:
base:
MARKETING_VERSION: "2026.03.13"
CURRENT_PROJECT_VERSION: "1"
# ... existing settings ...
Vorleser-macOS:
settings:
base:
MARKETING_VERSION: "2026.03.13"
CURRENT_PROJECT_VERSION: "1"
# ... existing settings ...
- Step 2: Remove old project files
Delete the old codebase directories that are no longer used:
git rm -r Vorleser/ VorleserMac/ Vendor/
Keep tools/kokoro_coreml/ and autoaudiobook/ — they contain reference documentation and learnings.
- Step 3: Final commit
git add project.yml
git commit -m "set CalVer 2026.03.13, remove legacy code"