add Synthesizer wrapper around KokoroSwift with voice pack support
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,51 @@
|
||||
{
|
||||
"originHash" : "1bf1d418d8d58ea936176af8e96313605ea72a6fbf437f877b8e5d9a5b0d822c",
|
||||
"originHash" : "6130f8afd39b4763c878d68f7965a176b7136eb36f304eaf701123ffb55cbbf7",
|
||||
"pins" : [
|
||||
{
|
||||
"identity" : "kokoro-ios",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/mlalma/kokoro-ios.git",
|
||||
"state" : {
|
||||
"revision" : "4d6d1d8ff8cd012014180c9cd4cf0151e7682354",
|
||||
"version" : "1.0.11"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "misakiswift",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/mlalma/MisakiSwift",
|
||||
"state" : {
|
||||
"revision" : "6835a1ce4a8854075c89f18ff75c74b13ef58e15",
|
||||
"version" : "1.0.6"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "mlx-swift",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/ml-explore/mlx-swift",
|
||||
"state" : {
|
||||
"revision" : "f58bd2c2b3b84316da69182f436db4219aff30b9",
|
||||
"version" : "0.30.2"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "mlxutilslibrary",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/mlalma/MLXUtilsLibrary.git",
|
||||
"state" : {
|
||||
"revision" : "41f6cfd5d68b65aa3c65a34efe3b71c371ed915b",
|
||||
"version" : "0.0.6"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-numerics",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-numerics",
|
||||
"state" : {
|
||||
"revision" : "0c0290ff6b24942dadb83a929ffaaa1481df04a2",
|
||||
"version" : "1.1.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swiftsoup",
|
||||
"kind" : "remoteSourceControl",
|
||||
|
||||
@@ -11,15 +11,27 @@ let package = Package(
|
||||
.library(name: "VorleserKit", targets: ["VorleserKit"]),
|
||||
.library(name: "BookParser", targets: ["BookParser"]),
|
||||
.library(name: "Storage", targets: ["Storage"]),
|
||||
.library(name: "Synthesizer", targets: ["Synthesizer"]),
|
||||
],
|
||||
dependencies: [
|
||||
.package(url: "https://github.com/weichsel/ZIPFoundation.git", from: "0.9.0"),
|
||||
.package(url: "https://github.com/scinfu/SwiftSoup.git", from: "2.7.0"),
|
||||
.package(url: "https://github.com/mlalma/kokoro-ios.git", exact: "1.0.11"),
|
||||
.package(url: "https://github.com/mlalma/MLXUtilsLibrary.git", exact: "0.0.6"),
|
||||
.package(url: "https://github.com/ml-explore/mlx-swift", exact: "0.30.2"),
|
||||
],
|
||||
targets: [
|
||||
.target(
|
||||
name: "VorleserKit",
|
||||
dependencies: ["Storage"]
|
||||
dependencies: ["Storage", "Synthesizer"]
|
||||
),
|
||||
.target(
|
||||
name: "Synthesizer",
|
||||
dependencies: [
|
||||
.product(name: "KokoroSwift", package: "kokoro-ios"),
|
||||
.product(name: "MLXUtilsLibrary", package: "MLXUtilsLibrary"),
|
||||
.product(name: "MLX", package: "mlx-swift"),
|
||||
]
|
||||
),
|
||||
.target(
|
||||
name: "Storage",
|
||||
|
||||
47
VorleserKit/Sources/Synthesizer/Synthesizer.swift
Normal file
47
VorleserKit/Sources/Synthesizer/Synthesizer.swift
Normal file
@@ -0,0 +1,47 @@
|
||||
import Foundation
|
||||
import KokoroSwift
|
||||
import MLX
|
||||
import MLXUtilsLibrary
|
||||
|
||||
public final class Synthesizer: @unchecked Sendable {
|
||||
private let tts: KokoroTTS
|
||||
private let voiceEmbedding: MLXArray
|
||||
private let language: Language
|
||||
private let voicePack: VoicePack
|
||||
|
||||
public init(voice: VoicePack, modelURL: URL, voicesURL: URL) throws {
|
||||
guard FileManager.default.fileExists(atPath: modelURL.path) else {
|
||||
throw SynthesizerError.modelNotFound(modelURL.path)
|
||||
}
|
||||
guard FileManager.default.fileExists(atPath: voicesURL.path) else {
|
||||
throw SynthesizerError.voicesNotFound(voicesURL.path)
|
||||
}
|
||||
|
||||
self.tts = KokoroTTS(modelPath: modelURL, g2p: .misaki)
|
||||
|
||||
guard let voices = NpyzReader.read(fileFromPath: voicesURL),
|
||||
let embedding = voices["\(voice.name).npy"] else {
|
||||
throw SynthesizerError.voiceNotAvailable(voice.name)
|
||||
}
|
||||
|
||||
self.voiceEmbedding = embedding
|
||||
self.voicePack = voice
|
||||
self.language = voice.language == .enUS ? .enUS : .enGB
|
||||
}
|
||||
|
||||
public func synthesize(text: String) throws -> [Float] {
|
||||
do {
|
||||
let (samples, _) = try tts.generateAudio(
|
||||
voice: voiceEmbedding,
|
||||
language: language,
|
||||
text: text,
|
||||
speed: 1.0
|
||||
)
|
||||
return samples
|
||||
} catch {
|
||||
throw SynthesizerError.synthesisFailure(text, error)
|
||||
}
|
||||
}
|
||||
|
||||
public static let sampleRate: Double = 24_000
|
||||
}
|
||||
15
VorleserKit/Sources/Synthesizer/SynthesizerError.swift
Normal file
15
VorleserKit/Sources/Synthesizer/SynthesizerError.swift
Normal file
@@ -0,0 +1,15 @@
|
||||
public enum SynthesizerError: Error, CustomStringConvertible {
|
||||
case modelNotFound(String)
|
||||
case voicesNotFound(String)
|
||||
case voiceNotAvailable(String)
|
||||
case synthesisFailure(String, Error)
|
||||
|
||||
public var description: String {
|
||||
switch self {
|
||||
case .modelNotFound(let path): "kokoro model not found at \(path)"
|
||||
case .voicesNotFound(let path): "voices.npz not found at \(path)"
|
||||
case .voiceNotAvailable(let name): "voice '\(name)' not found in voices.npz"
|
||||
case .synthesisFailure(let text, let error): "synthesis failed for '\(text.prefix(50))...': \(error)"
|
||||
}
|
||||
}
|
||||
}
|
||||
21
VorleserKit/Sources/Synthesizer/VoicePack.swift
Normal file
21
VorleserKit/Sources/Synthesizer/VoicePack.swift
Normal file
@@ -0,0 +1,21 @@
|
||||
import Foundation
|
||||
|
||||
public struct VoicePack: Sendable, Identifiable {
|
||||
public let name: String
|
||||
public let displayName: String
|
||||
public let language: Language
|
||||
|
||||
public var id: String { name }
|
||||
|
||||
public enum Language: String, Sendable {
|
||||
case enUS = "en-us"
|
||||
case enGB = "en-gb"
|
||||
}
|
||||
|
||||
/// Curated voices bundled with the app.
|
||||
public static let curated: [VoicePack] = [
|
||||
VoicePack(name: "af_heart", displayName: "Heart", language: .enUS),
|
||||
VoicePack(name: "af_bella", displayName: "Bella", language: .enUS),
|
||||
VoicePack(name: "am_michael", displayName: "Michael", language: .enUS),
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user