add Synthesizer wrapper around KokoroSwift with voice pack support

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-13 22:17:45 +01:00
parent c76ef0aec6
commit c81b78bea0
5 changed files with 142 additions and 2 deletions

View File

@@ -1,6 +1,51 @@
{
"originHash" : "1bf1d418d8d58ea936176af8e96313605ea72a6fbf437f877b8e5d9a5b0d822c",
"originHash" : "6130f8afd39b4763c878d68f7965a176b7136eb36f304eaf701123ffb55cbbf7",
"pins" : [
{
"identity" : "kokoro-ios",
"kind" : "remoteSourceControl",
"location" : "https://github.com/mlalma/kokoro-ios.git",
"state" : {
"revision" : "4d6d1d8ff8cd012014180c9cd4cf0151e7682354",
"version" : "1.0.11"
}
},
{
"identity" : "misakiswift",
"kind" : "remoteSourceControl",
"location" : "https://github.com/mlalma/MisakiSwift",
"state" : {
"revision" : "6835a1ce4a8854075c89f18ff75c74b13ef58e15",
"version" : "1.0.6"
}
},
{
"identity" : "mlx-swift",
"kind" : "remoteSourceControl",
"location" : "https://github.com/ml-explore/mlx-swift",
"state" : {
"revision" : "f58bd2c2b3b84316da69182f436db4219aff30b9",
"version" : "0.30.2"
}
},
{
"identity" : "mlxutilslibrary",
"kind" : "remoteSourceControl",
"location" : "https://github.com/mlalma/MLXUtilsLibrary.git",
"state" : {
"revision" : "41f6cfd5d68b65aa3c65a34efe3b71c371ed915b",
"version" : "0.0.6"
}
},
{
"identity" : "swift-numerics",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-numerics",
"state" : {
"revision" : "0c0290ff6b24942dadb83a929ffaaa1481df04a2",
"version" : "1.1.1"
}
},
{
"identity" : "swiftsoup",
"kind" : "remoteSourceControl",

View File

@@ -11,15 +11,27 @@ let package = Package(
.library(name: "VorleserKit", targets: ["VorleserKit"]),
.library(name: "BookParser", targets: ["BookParser"]),
.library(name: "Storage", targets: ["Storage"]),
.library(name: "Synthesizer", targets: ["Synthesizer"]),
],
dependencies: [
.package(url: "https://github.com/weichsel/ZIPFoundation.git", from: "0.9.0"),
.package(url: "https://github.com/scinfu/SwiftSoup.git", from: "2.7.0"),
.package(url: "https://github.com/mlalma/kokoro-ios.git", exact: "1.0.11"),
.package(url: "https://github.com/mlalma/MLXUtilsLibrary.git", exact: "0.0.6"),
.package(url: "https://github.com/ml-explore/mlx-swift", exact: "0.30.2"),
],
targets: [
.target(
name: "VorleserKit",
dependencies: ["Storage"]
dependencies: ["Storage", "Synthesizer"]
),
.target(
name: "Synthesizer",
dependencies: [
.product(name: "KokoroSwift", package: "kokoro-ios"),
.product(name: "MLXUtilsLibrary", package: "MLXUtilsLibrary"),
.product(name: "MLX", package: "mlx-swift"),
]
),
.target(
name: "Storage",

View File

@@ -0,0 +1,47 @@
import Foundation
import KokoroSwift
import MLX
import MLXUtilsLibrary
public final class Synthesizer: @unchecked Sendable {
private let tts: KokoroTTS
private let voiceEmbedding: MLXArray
private let language: Language
private let voicePack: VoicePack
public init(voice: VoicePack, modelURL: URL, voicesURL: URL) throws {
guard FileManager.default.fileExists(atPath: modelURL.path) else {
throw SynthesizerError.modelNotFound(modelURL.path)
}
guard FileManager.default.fileExists(atPath: voicesURL.path) else {
throw SynthesizerError.voicesNotFound(voicesURL.path)
}
self.tts = KokoroTTS(modelPath: modelURL, g2p: .misaki)
guard let voices = NpyzReader.read(fileFromPath: voicesURL),
let embedding = voices["\(voice.name).npy"] else {
throw SynthesizerError.voiceNotAvailable(voice.name)
}
self.voiceEmbedding = embedding
self.voicePack = voice
self.language = voice.language == .enUS ? .enUS : .enGB
}
public func synthesize(text: String) throws -> [Float] {
do {
let (samples, _) = try tts.generateAudio(
voice: voiceEmbedding,
language: language,
text: text,
speed: 1.0
)
return samples
} catch {
throw SynthesizerError.synthesisFailure(text, error)
}
}
public static let sampleRate: Double = 24_000
}

View File

@@ -0,0 +1,15 @@
public enum SynthesizerError: Error, CustomStringConvertible {
case modelNotFound(String)
case voicesNotFound(String)
case voiceNotAvailable(String)
case synthesisFailure(String, Error)
public var description: String {
switch self {
case .modelNotFound(let path): "kokoro model not found at \(path)"
case .voicesNotFound(let path): "voices.npz not found at \(path)"
case .voiceNotAvailable(let name): "voice '\(name)' not found in voices.npz"
case .synthesisFailure(let text, let error): "synthesis failed for '\(text.prefix(50))...': \(error)"
}
}
}

View File

@@ -0,0 +1,21 @@
import Foundation
public struct VoicePack: Sendable, Identifiable {
public let name: String
public let displayName: String
public let language: Language
public var id: String { name }
public enum Language: String, Sendable {
case enUS = "en-us"
case enGB = "en-gb"
}
/// Curated voices bundled with the app.
public static let curated: [VoicePack] = [
VoicePack(name: "af_heart", displayName: "Heart", language: .enUS),
VoicePack(name: "af_bella", displayName: "Bella", language: .enUS),
VoicePack(name: "am_michael", displayName: "Michael", language: .enUS),
]
}