add per-channel min-max normalization for ASR features
This commit is contained in:
@@ -279,6 +279,7 @@ final class KokoroPipeline {
|
||||
return channel * (shape.last ?? 0) + token
|
||||
}
|
||||
|
||||
// First pass: compute weighted sum
|
||||
for h in 0..<availableChannels {
|
||||
for f in 0..<frameCount {
|
||||
var sum: Float = 0
|
||||
@@ -295,6 +296,38 @@ final class KokoroPipeline {
|
||||
output[outIndex] = NSNumber(value: sum)
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: per-channel min-max normalization
|
||||
var channelMin = [Float](repeating: Float.greatestFiniteMagnitude, count: availableChannels)
|
||||
var channelMax = [Float](repeating: -Float.greatestFiniteMagnitude, count: availableChannels)
|
||||
|
||||
for h in 0..<availableChannels {
|
||||
for f in 0..<frameCount {
|
||||
let idx = h * frameCount + f
|
||||
let value = output[idx].floatValue
|
||||
if value.isFinite {
|
||||
if value < channelMin[h] { channelMin[h] = value }
|
||||
if value > channelMax[h] { channelMax[h] = value }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for h in 0..<availableChannels {
|
||||
let range = channelMax[h] - channelMin[h]
|
||||
if range > 1e-6 {
|
||||
for f in 0..<frameCount {
|
||||
let idx = h * frameCount + f
|
||||
let value = output[idx].floatValue
|
||||
if value.isFinite {
|
||||
let normalized = (value - channelMin[h]) / range * 2.0 - 1.0
|
||||
output[idx] = NSNumber(value: normalized)
|
||||
} else {
|
||||
output[idx] = NSNumber(value: Float(0))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user