Files
netfelix-audio-fix/server/services/analyzer.ts
Felix Förtsch 4057b692ba
All checks were successful
Build and Push Docker Image / build (push) Successful in 47s
audio: single EAC3 transcode target, prefer direct-play over lossless default
two simplifications to how we pick and transcode the one-per-language
audio track, motivated by seeing inconsistent DTS → FLAC vs DTS →
EAC3 outputs in the wild:

transcode target:
- drop the FLAC path entirely. every incompatible source now targets
  EAC3 regardless of container or lossless/lossy status
- FLAC for movie audio is bad value: ~2-3× the file size vs EAC3, no
  Atmos spatial metadata (TrueHD Atmos → FLAC silently loses Atmos),
  no AVR passthrough on Apple TV
- one target = no more container-conditional surprises

winner within a language group (betterAudio):
- new priority: highest channels → Apple-compatible → default → index
- old order put 'default' on top which forced a DTS-HD MA transcode
  even when an AC3 track at equal channels was right next to it.
  flipping means AC3 beats DTS-HD MA at the same channel count — pure
  copy instead of a lossless-then-re-encode round trip
- channel count still dominates, so 7.1 TrueHD still beats 5.1 AC3
  (and gets transcoded, which is the right call for real surround)

tests: new case for DTS-HD MA default + AC3 non-default at 5.1 → AC3
wins, job_type=copy. new case for 7.1 TrueHD beats 5.1 AC3 default.
every other existing test still holds.
2026-04-14 10:23:49 +02:00

284 lines
10 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import type { MediaItem, MediaStream, PlanResult } from "../types";
import { computeAppleCompat, isAppleCompatible, transcodeTarget } from "./apple-compat";
import { normalizeLanguage } from "./jellyfin";
export interface AnalyzerConfig {
audioLanguages: string[]; // additional languages to keep (after OG)
}
/**
* Given an item and its streams, compute what action to take for each stream
* and whether the file needs audio remuxing.
*
* Subtitles are ALWAYS removed from the container (they get extracted to
* sidecar files). is_noop considers audio removal/reorder, subtitle
* extraction, and transcode — a "noop" is a file that needs no changes
* at all.
*/
export function analyzeItem(
item: Pick<MediaItem, "original_language" | "needs_review" | "container">,
streams: MediaStream[],
config: AnalyzerConfig,
): PlanResult {
const origLang = item.original_language ? normalizeLanguage(item.original_language) : null;
const notes: string[] = [];
const decisions: PlanResult["decisions"] = streams.map((s) => {
const action = decideAction(s, origLang, config.audioLanguages);
return { stream_id: s.id, action, target_index: null, transcode_codec: null };
});
// Second pass: within each kept-language group, drop commentary/AD tracks
// and alternate formats so we end up with exactly one audio stream per
// language. The user doesn't need 2× English (main + director's
// commentary) — one well-chosen track is enough.
deduplicateAudioByLanguage(streams, decisions, origLang);
const anyAudioRemoved = streams.some((s, i) => s.type === "Audio" && decisions[i].action === "remove");
assignTargetOrder(streams, decisions, origLang, config.audioLanguages);
const audioOrderChanged = checkAudioOrderChanged(streams, decisions);
for (const d of decisions) {
if (d.action !== "keep") continue;
const stream = streams.find((s) => s.id === d.stream_id);
if (stream && stream.type === "Audio") {
// Use Profile (DTS-HD MA, etc.) — NOT title — to pick the transcode target.
// Passing title here used to cause lossless DTS-HD MA in MKV to fall back
// to EAC3 instead of the better FLAC path when the title didn't happen to
// contain "MA".
d.transcode_codec = transcodeTarget(stream.codec ?? "", stream.profile, item.container);
}
}
const keptAudioStreams = decisions
.filter((d) => d.action === "keep")
.map((d) => streams.find((s) => s.id === d.stream_id))
.filter((s): s is MediaStream => !!s && s.type === "Audio");
const keptAudioCodecs = keptAudioStreams.map((s) => s.codec ?? "");
const needsTranscode = decisions.some((d) => d.transcode_codec != null);
const apple_compat = computeAppleCompat(keptAudioCodecs, item.container);
const job_type = needsTranscode ? ("transcode" as const) : ("copy" as const);
const hasSubs = streams.some((s) => s.type === "Subtitle");
// Pipeline also sets default disposition on the first kept audio and writes
// canonical iso3 language tags. If either is already wrong in the file,
// running ffmpeg would produce a different output → not a noop.
const keptAudioSorted = [...keptAudioStreams].sort((a, b) => {
const ai = decisions.find((d) => d.stream_id === a.id)?.target_index ?? 0;
const bi = decisions.find((d) => d.stream_id === b.id)?.target_index ?? 0;
return ai - bi;
});
const firstKeptAudio = keptAudioSorted[0];
const defaultMismatch = !!firstKeptAudio && firstKeptAudio.is_default !== 1;
const nonDefaultHasDefault = keptAudioSorted.slice(1).some((s) => s.is_default === 1);
const languageMismatch = keptAudioStreams.some(
(s) => s.language != null && s.language !== normalizeLanguage(s.language),
);
const is_noop =
!anyAudioRemoved &&
!audioOrderChanged &&
!hasSubs &&
!needsTranscode &&
!defaultMismatch &&
!nonDefaultHasDefault &&
!languageMismatch;
if (!origLang && item.needs_review) {
notes.push("Original language unknown — audio tracks not filtered; manual review required");
}
return { is_noop, has_subs: hasSubs, confidence: "low", apple_compat, job_type, decisions, notes };
}
/**
* Titles that scream "not the main track": commentary, director's track,
* visually-impaired/audio-description, karaoke. Case-insensitive.
*/
const NON_PRIMARY_AUDIO_TITLE =
/\b(commentary|director'?s?\b.*\b(track|comment|feature)|audio description|descriptive|visually? impaired|\bad\b|karaoke|sign language)/i;
function isCommentaryOrAuxiliary(stream: MediaStream): boolean {
if (stream.is_hearing_impaired) return true;
const title = stream.title ?? "";
return NON_PRIMARY_AUDIO_TITLE.test(title);
}
/**
* Sort comparator for picking the "primary" audio track within a
* single language group. Lower return → a wins.
*
* Priority (most → least important):
* 1. highest channel count (quality; 7.1 beats 5.1 beats stereo)
* 2. Apple-compatible codec (skips a transcode pass; AC3 wins over
* DTS-HD MA at equal channels — direct play > lossless that
* has to be re-encoded anyway)
* 3. default disposition (muxer's pick, tiebreak)
* 4. lowest stream_index (source order, stable tiebreak)
*/
function betterAudio(a: MediaStream, b: MediaStream): number {
const byChannels = (b.channels ?? 0) - (a.channels ?? 0);
if (byChannels !== 0) return byChannels;
const aApple = isAppleCompatible(a.codec ?? "") ? 1 : 0;
const bApple = isAppleCompatible(b.codec ?? "") ? 1 : 0;
const byApple = bApple - aApple;
if (byApple !== 0) return byApple;
const byDefault = (b.is_default ?? 0) - (a.is_default ?? 0);
if (byDefault !== 0) return byDefault;
return a.stream_index - b.stream_index;
}
function deduplicateAudioByLanguage(
streams: MediaStream[],
decisions: PlanResult["decisions"],
origLang: string | null,
): void {
const decisionById = new Map(decisions.map((d) => [d.stream_id, d]));
const keptAudio = streams.filter((s) => s.type === "Audio" && decisionById.get(s.id)?.action === "keep");
// 1. Flag commentary/AD tracks as remove regardless of language match.
for (const s of keptAudio) {
if (isCommentaryOrAuxiliary(s)) {
const d = decisionById.get(s.id);
if (d) d.action = "remove";
}
}
// 2. Group remaining kept-audio streams by normalized language and keep
// one winner per group. Streams without a language tag are handled
// specially: when OG language is unknown we keep them all (ambiguity
// means we can't safely drop anything); when OG is known they've
// already been kept by decideAction's "unknown language falls
// through" clause, so still dedupe within them.
const stillKept = keptAudio.filter((s) => decisionById.get(s.id)?.action === "keep");
const byLang = new Map<string, MediaStream[]>();
const noLang: MediaStream[] = [];
for (const s of stillKept) {
if (!s.language) {
noLang.push(s);
continue;
}
const key = normalizeLanguage(s.language);
if (!byLang.has(key)) byLang.set(key, []);
byLang.get(key)!.push(s);
}
for (const [, group] of byLang) {
if (group.length <= 1) continue;
const sorted = [...group].sort(betterAudio);
const winner = sorted[0];
for (const s of sorted.slice(1)) {
const d = decisionById.get(s.id);
if (d) d.action = "remove";
}
// Touch winner (no-op) to make intent clear.
void winner;
}
// Null-language audio: only dedupe when OG is known (so we already have
// a primary pick). If OG is null we leave ambiguity alone.
if (origLang && noLang.length > 1) {
const sorted = [...noLang].sort(betterAudio);
for (const s of sorted.slice(1)) {
const d = decisionById.get(s.id);
if (d) d.action = "remove";
}
}
}
function decideAction(stream: MediaStream, origLang: string | null, audioLanguages: string[]): "keep" | "remove" {
switch (stream.type) {
case "Video":
case "Data":
case "EmbeddedImage":
return "keep";
case "Audio": {
if (!origLang) return "keep";
if (!stream.language) return "keep";
const normalized = normalizeLanguage(stream.language);
if (normalized === origLang) return "keep";
if (audioLanguages.includes(normalized)) return "keep";
return "remove";
}
case "Subtitle":
return "remove";
default:
return "keep";
}
}
/**
* Assign target_index to each kept stream. target_index is the 0-based
* position within its type group in the output file, after sorting audio
* streams by language rank (OG first, then additional languages in
* configured order, then by original stream_index for stability).
*/
export function assignTargetOrder(
allStreams: MediaStream[],
decisions: PlanResult["decisions"],
origLang: string | null,
audioLanguages: string[],
): void {
const keptByType = new Map<string, MediaStream[]>();
for (const s of allStreams) {
const dec = decisions.find((d) => d.stream_id === s.id);
if (dec?.action !== "keep") continue;
if (!keptByType.has(s.type)) keptByType.set(s.type, []);
keptByType.get(s.type)!.push(s);
}
const audio = keptByType.get("Audio");
if (audio) {
audio.sort((a, b) => {
const aRank = langRank(a.language, origLang, audioLanguages);
const bRank = langRank(b.language, origLang, audioLanguages);
if (aRank !== bRank) return aRank - bRank;
return a.stream_index - b.stream_index;
});
}
for (const [, streams] of keptByType) {
streams.forEach((s, idx) => {
const dec = decisions.find((d) => d.stream_id === s.id);
if (dec) dec.target_index = idx;
});
}
}
function langRank(lang: string | null, origLang: string | null, audioLanguages: string[]): number {
const normalized = lang ? normalizeLanguage(lang) : null;
if (origLang && normalized === origLang) return 0;
if (normalized) {
const idx = audioLanguages.indexOf(normalized);
if (idx >= 0) return idx + 1;
}
return audioLanguages.length + 1;
}
/**
* True when the output order of kept audio streams differs from their
* original order in the input. Compares original stream_index order
* against target_index order.
*/
function checkAudioOrderChanged(streams: MediaStream[], decisions: PlanResult["decisions"]): boolean {
const keptAudio = streams
.filter((s) => s.type === "Audio" && decisions.find((d) => d.stream_id === s.id)?.action === "keep")
.sort((a, b) => a.stream_index - b.stream_index);
for (let i = 0; i < keptAudio.length; i++) {
const dec = decisions.find((d) => d.stream_id === keptAudio[i].id);
if (dec?.target_index !== i) return true;
}
return false;
}