add per-channel min-max normalization for ASR features

This commit is contained in:
2026-02-19 12:36:29 +01:00
parent 2b24d520fa
commit 2774fb8379

View File

@@ -279,6 +279,7 @@ final class KokoroPipeline {
return channel * (shape.last ?? 0) + token return channel * (shape.last ?? 0) + token
} }
// First pass: compute weighted sum
for h in 0..<availableChannels { for h in 0..<availableChannels {
for f in 0..<frameCount { for f in 0..<frameCount {
var sum: Float = 0 var sum: Float = 0
@@ -295,6 +296,38 @@ final class KokoroPipeline {
output[outIndex] = NSNumber(value: sum) output[outIndex] = NSNumber(value: sum)
} }
} }
// Second pass: per-channel min-max normalization
var channelMin = [Float](repeating: Float.greatestFiniteMagnitude, count: availableChannels)
var channelMax = [Float](repeating: -Float.greatestFiniteMagnitude, count: availableChannels)
for h in 0..<availableChannels {
for f in 0..<frameCount {
let idx = h * frameCount + f
let value = output[idx].floatValue
if value.isFinite {
if value < channelMin[h] { channelMin[h] = value }
if value > channelMax[h] { channelMax[h] = value }
}
}
}
for h in 0..<availableChannels {
let range = channelMax[h] - channelMin[h]
if range > 1e-6 {
for f in 0..<frameCount {
let idx = h * frameCount + f
let value = output[idx].floatValue
if value.isFinite {
let normalized = (value - channelMin[h]) / range * 2.0 - 1.0
output[idx] = NSNumber(value: normalized)
} else {
output[idx] = NSNumber(value: Float(0))
}
}
}
}
return output return output
} }