add per-channel min-max normalization for ASR features
This commit is contained in:
@@ -279,6 +279,7 @@ final class KokoroPipeline {
|
|||||||
return channel * (shape.last ?? 0) + token
|
return channel * (shape.last ?? 0) + token
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// First pass: compute weighted sum
|
||||||
for h in 0..<availableChannels {
|
for h in 0..<availableChannels {
|
||||||
for f in 0..<frameCount {
|
for f in 0..<frameCount {
|
||||||
var sum: Float = 0
|
var sum: Float = 0
|
||||||
@@ -295,6 +296,38 @@ final class KokoroPipeline {
|
|||||||
output[outIndex] = NSNumber(value: sum)
|
output[outIndex] = NSNumber(value: sum)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Second pass: per-channel min-max normalization
|
||||||
|
var channelMin = [Float](repeating: Float.greatestFiniteMagnitude, count: availableChannels)
|
||||||
|
var channelMax = [Float](repeating: -Float.greatestFiniteMagnitude, count: availableChannels)
|
||||||
|
|
||||||
|
for h in 0..<availableChannels {
|
||||||
|
for f in 0..<frameCount {
|
||||||
|
let idx = h * frameCount + f
|
||||||
|
let value = output[idx].floatValue
|
||||||
|
if value.isFinite {
|
||||||
|
if value < channelMin[h] { channelMin[h] = value }
|
||||||
|
if value > channelMax[h] { channelMax[h] = value }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for h in 0..<availableChannels {
|
||||||
|
let range = channelMax[h] - channelMin[h]
|
||||||
|
if range > 1e-6 {
|
||||||
|
for f in 0..<frameCount {
|
||||||
|
let idx = h * frameCount + f
|
||||||
|
let value = output[idx].floatValue
|
||||||
|
if value.isFinite {
|
||||||
|
let normalized = (value - channelMin[h]) / range * 2.0 - 1.0
|
||||||
|
output[idx] = NSNumber(value: normalized)
|
||||||
|
} else {
|
||||||
|
output[idx] = NSNumber(value: Float(0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return output
|
return output
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user