Files
netfelix-audio-fix/server/services/analyzer.ts
T
felixfoertsch 444d2eb733 extract normalizeLanguage, guessOriginalLanguage to language-utils.ts
Move language normalization out of jellyfin.ts into its own module so
non-Jellyfin services (ffmpeg, radarr, sonarr, analyzer) no longer
depend on the Jellyfin service file. jellyfin.ts re-exports
normalizeLanguage for backward compatibility.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 06:31:21 +02:00

393 lines
15 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import type { MediaItem, MediaStream, PlanResult } from "../types";
import { computeAppleCompat, isAppleCompatible, transcodeTarget } from "./apple-compat";
import { isExtractableSubtitle } from "./ffmpeg";
import { normalizeLanguage } from "./language-utils";
const AUTHORITATIVE_ORIG_SOURCES = new Set<string>(["radarr", "sonarr", "manual"]);
export interface AnalyzerConfig {
audioLanguages: string[]; // additional languages to keep (after OG)
}
/**
* Effective-language lookup — prefer the user's per-stream override, fall
* back to whatever the file reports. Returned as raw; callers still need
* to normalizeLanguage() for comparison.
*/
function effectiveLanguage(stream: MediaStream, overrides: Map<number, string> | undefined): string | null {
const override = overrides?.get(stream.id);
if (override) return override;
return stream.language;
}
/**
* Given an item and its streams, compute what action to take for each stream
* and whether the file needs audio remuxing.
*
* Subtitles are ALWAYS removed from the container (they get extracted to
* sidecar files). is_noop considers audio removal/reorder, subtitle
* extraction, and transcode — a "noop" is a file that needs no changes
* at all.
*
* `languageOverrides` maps stream_id → ISO code and lets the user correct a
* mislabeled track ("und" → "spa") before the analyzer groups and filters.
* When present, the override wins over `MediaStream.language` for every
* language-aware decision (keep/remove, dedup, ordering, is_noop).
*/
export function analyzeItem(
item: Pick<MediaItem, "original_language" | "orig_lang_source" | "needs_review" | "container">,
streams: MediaStream[],
config: AnalyzerConfig,
languageOverrides?: Map<number, string>,
): PlanResult {
const origLang = item.original_language ? normalizeLanguage(item.original_language) : null;
const notes: string[] = [];
const decisions: PlanResult["decisions"] = streams.map((s) => {
const lang = effectiveLanguage(s, languageOverrides);
const action = decideAction(s, lang, origLang, config.audioLanguages);
return { stream_id: s.id, action, target_index: null, transcode_codec: null };
});
// Snapshot actions before dedup so we can distinguish language-driven removes
// from commentary-title-driven removes when computing commentaryHeuristicFired.
const decisionsBeforeDedup = new Map<number, "keep" | "remove">(decisions.map((d) => [d.stream_id, d.action]));
// Second pass: within each kept-language group, drop commentary/AD tracks
// and alternate formats so we end up with exactly one audio stream per
// language. The user doesn't need 2× English (main + director's
// commentary) — one well-chosen track is enough.
deduplicateAudioByLanguage(streams, decisions, origLang, languageOverrides);
const anyAudioRemoved = streams.some((s, i) => s.type === "Audio" && decisions[i].action === "remove");
assignTargetOrder(streams, decisions, origLang, config.audioLanguages, languageOverrides);
const audioOrderChanged = checkAudioOrderChanged(streams, decisions);
for (const d of decisions) {
if (d.action !== "keep") continue;
const stream = streams.find((s) => s.id === d.stream_id);
if (stream && stream.type === "Audio") {
// Use Profile (DTS-HD MA, etc.) — NOT title — to pick the transcode target.
// Passing title here used to cause lossless DTS-HD MA in MKV to fall back
// to EAC3 instead of the better FLAC path when the title didn't happen to
// contain "MA".
d.transcode_codec = transcodeTarget(stream.codec ?? "", stream.profile, item.container);
}
}
const keptAudioStreams = decisions
.filter((d) => d.action === "keep")
.map((d) => streams.find((s) => s.id === d.stream_id))
.filter((s): s is MediaStream => !!s && s.type === "Audio");
const keptAudioCodecs = keptAudioStreams.map((s) => s.codec ?? "");
const needsTranscode = decisions.some((d) => d.transcode_codec != null);
const apple_compat = computeAppleCompat(keptAudioCodecs, item.container);
const job_type = needsTranscode ? ("transcode" as const) : ("copy" as const);
const hasSubs = streams.some((s) => s.type === "Subtitle");
// Pipeline also sets default disposition on the first kept audio and writes
// canonical iso3 language tags. If either is already wrong in the file,
// running ffmpeg would produce a different output → not a noop.
const keptAudioSorted = [...keptAudioStreams].sort((a, b) => {
const ai = decisions.find((d) => d.stream_id === a.id)?.target_index ?? 0;
const bi = decisions.find((d) => d.stream_id === b.id)?.target_index ?? 0;
return ai - bi;
});
const firstKeptAudio = keptAudioSorted[0];
const defaultMismatch = !!firstKeptAudio && firstKeptAudio.is_default !== 1;
const nonDefaultHasDefault = keptAudioSorted.slice(1).some((s) => s.is_default === 1);
// Non-canonical language tag in the file (e.g. "ger" instead of "deu", or
// "en" instead of "eng") or a user-provided custom_language that differs
// from the stream's raw tag — either one means ffmpeg would rewrite the
// metadata, so the file isn't already in the desired state.
const languageMismatch = keptAudioStreams.some((s) => {
const override = languageOverrides?.get(s.id);
if (override) {
const canonical = normalizeLanguage(override);
return s.language !== canonical;
}
return s.language != null && s.language !== normalizeLanguage(s.language);
});
const is_noop =
!anyAudioRemoved &&
!audioOrderChanged &&
!hasSubs &&
!needsTranscode &&
!defaultMismatch &&
!nonDefaultHasDefault &&
!languageMismatch;
if (!origLang && item.needs_review) {
notes.push("Original language unknown — audio tracks not filtered; manual review required");
}
// Surface image-based subtitles that can't be written to a sane
// single-file sidecar. They'll still be stripped from the container,
// but won't land on disk anywhere — the user sees this in the plan
// notes so nothing vanishes silently.
const nonExtractable = streams.filter((s) => s.type === "Subtitle" && !isExtractableSubtitle(s.codec));
if (nonExtractable.length > 0) {
const grouped = new Map<string, string[]>();
for (const s of nonExtractable) {
const codec = (s.codec ?? "unknown").toLowerCase();
if (!grouped.has(codec)) grouped.set(codec, []);
grouped.get(codec)!.push(s.language ?? "und");
}
const summary = [...grouped.entries()].map(([codec, langs]) => `${codec} (${langs.join(", ")})`).join("; ");
notes.push(`${nonExtractable.length} subtitle(s) dropped: ${summary} — not extractable to sidecar`);
}
const origLangSource = item.orig_lang_source ?? null;
const authoritativeOg =
!!origLang && !!origLangSource && AUTHORITATIVE_ORIG_SOURCES.has(origLangSource) && item.needs_review === 0;
const keptAudioLanguages = keptAudioStreams.map((s) => {
const lang = effectiveLanguage(s, languageOverrides);
return lang ? normalizeLanguage(lang) : null;
});
const ogPresent = !!origLang && keptAudioLanguages.includes(origLang);
const everyKeptHasLanguage = keptAudioStreams.length > 0 && keptAudioLanguages.every((l) => l != null);
// Only count as heuristic-fired when the commentary regex itself CAUSED the
// removal: track was "keep" after language-based decideAction, then flipped
// to "remove" by deduplicateAudioByLanguage because of its title/flag.
// A track removed for LANGUAGE reasons (keep→remove never happened) should
// not upgrade the classification even if its title coincidentally matches.
const commentaryHeuristicFired = decisions.some((d) => {
const before = decisionsBeforeDedup.get(d.stream_id);
if (before !== "keep" || d.action !== "remove") return false;
const s = streams.find((str) => str.id === d.stream_id);
return !!s && isCommentaryOrAuxiliary(s);
});
// Quality mismatch: OG track exists but has fewer channels than a non-OG
// track available in the file. E.g. Japanese mono vs English 5.1 — user
// should decide whether to keep the higher-quality dub. Compares against
// ALL non-OG audio streams (not just kept ones) because the user might
// want to keep a track that the config currently removes.
const audioStreams = streams.filter((s) => s.type === "Audio");
const ogStreams = audioStreams.filter((s) => {
const lang = s.language ? normalizeLanguage(s.language) : null;
return lang && lang === origLang;
});
const nonOgStreams = audioStreams.filter((s) => {
const lang = s.language ? normalizeLanguage(s.language) : null;
return lang && lang !== origLang;
});
const ogMaxChannels = Math.max(0, ...ogStreams.map((s) => s.channels ?? 0));
const nonOgMaxChannels = Math.max(0, ...nonOgStreams.map((s) => s.channels ?? 0));
const ogQualityInferior = ogMaxChannels > 0 && nonOgMaxChannels > ogMaxChannels;
let auto_class: PlanResult["auto_class"];
if (!authoritativeOg || !ogPresent || !everyKeptHasLanguage) {
auto_class = "manual";
} else if (commentaryHeuristicFired || ogQualityInferior) {
auto_class = "auto_heuristic";
} else {
auto_class = "auto";
}
return { is_noop, has_subs: hasSubs, auto_class, apple_compat, job_type, decisions, notes };
}
/**
* Titles that scream "not the main track": commentary, director's track,
* visually-impaired/audio-description, karaoke. Case-insensitive.
*/
const NON_PRIMARY_AUDIO_TITLE =
/\b(commentary|director'?s?\b.*\b(track|comment|feature)|audio description|descriptive|visually? impaired|\bad\b|karaoke|sign language)/i;
function isCommentaryOrAuxiliary(stream: MediaStream): boolean {
if (stream.is_hearing_impaired) return true;
const title = stream.title ?? "";
return NON_PRIMARY_AUDIO_TITLE.test(title);
}
/**
* Sort comparator for picking the "primary" audio track within a
* single language group. Lower return → a wins.
*
* Priority (most → least important):
* 1. highest channel count (quality; 7.1 beats 5.1 beats stereo)
* 2. Apple-compatible codec (skips a transcode pass; AC3 wins over
* DTS-HD MA at equal channels — direct play > lossless that
* has to be re-encoded anyway)
* 3. default disposition (muxer's pick, tiebreak)
* 4. lowest stream_index (source order, stable tiebreak)
*/
function betterAudio(a: MediaStream, b: MediaStream): number {
const byChannels = (b.channels ?? 0) - (a.channels ?? 0);
if (byChannels !== 0) return byChannels;
const aApple = isAppleCompatible(a.codec ?? "") ? 1 : 0;
const bApple = isAppleCompatible(b.codec ?? "") ? 1 : 0;
const byApple = bApple - aApple;
if (byApple !== 0) return byApple;
const byDefault = (b.is_default ?? 0) - (a.is_default ?? 0);
if (byDefault !== 0) return byDefault;
return a.stream_index - b.stream_index;
}
function deduplicateAudioByLanguage(
streams: MediaStream[],
decisions: PlanResult["decisions"],
origLang: string | null,
languageOverrides: Map<number, string> | undefined,
): void {
const decisionById = new Map(decisions.map((d) => [d.stream_id, d]));
const keptAudio = streams.filter((s) => s.type === "Audio" && decisionById.get(s.id)?.action === "keep");
// 1. Flag commentary/AD tracks as remove regardless of language match.
for (const s of keptAudio) {
if (isCommentaryOrAuxiliary(s)) {
const d = decisionById.get(s.id);
if (d) d.action = "remove";
}
}
// 2. Group remaining kept-audio streams by normalized effective language
// and keep one winner per group. Streams without a language (no raw
// tag and no override) are handled specially: when OG language is
// unknown we keep them all (ambiguity means we can't safely drop
// anything); when OG is known they've already been kept by
// decideAction's "unknown language falls through" clause, so still
// dedupe within them.
const stillKept = keptAudio.filter((s) => decisionById.get(s.id)?.action === "keep");
const byLang = new Map<string, MediaStream[]>();
const noLang: MediaStream[] = [];
for (const s of stillKept) {
const lang = effectiveLanguage(s, languageOverrides);
if (!lang) {
noLang.push(s);
continue;
}
const key = normalizeLanguage(lang);
if (!byLang.has(key)) byLang.set(key, []);
byLang.get(key)!.push(s);
}
for (const [, group] of byLang) {
if (group.length <= 1) continue;
const sorted = [...group].sort(betterAudio);
const winner = sorted[0];
for (const s of sorted.slice(1)) {
const d = decisionById.get(s.id);
if (d) d.action = "remove";
}
// Touch winner (no-op) to make intent clear.
void winner;
}
// Null-language audio: only dedupe when OG is known (so we already have
// a primary pick). If OG is null we leave ambiguity alone.
if (origLang && noLang.length > 1) {
const sorted = [...noLang].sort(betterAudio);
for (const s of sorted.slice(1)) {
const d = decisionById.get(s.id);
if (d) d.action = "remove";
}
}
}
function decideAction(
stream: MediaStream,
effectiveLang: string | null,
origLang: string | null,
audioLanguages: string[],
): "keep" | "remove" {
switch (stream.type) {
case "Video":
case "Data":
case "EmbeddedImage":
return "keep";
case "Audio": {
if (!origLang) return "keep";
if (!effectiveLang) return "keep";
const normalized = normalizeLanguage(effectiveLang);
if (normalized === origLang) return "keep";
if (audioLanguages.includes(normalized)) return "keep";
return "remove";
}
case "Subtitle":
return "remove";
default:
return "keep";
}
}
/**
* Assign target_index to each kept stream. target_index is the 0-based
* position within its type group in the output file, after sorting audio
* streams by language rank (OG first, then additional languages in
* configured order, then by original stream_index for stability).
*/
export function assignTargetOrder(
allStreams: MediaStream[],
decisions: PlanResult["decisions"],
origLang: string | null,
audioLanguages: string[],
languageOverrides?: Map<number, string>,
): void {
const keptByType = new Map<string, MediaStream[]>();
for (const s of allStreams) {
const dec = decisions.find((d) => d.stream_id === s.id);
if (dec?.action !== "keep") continue;
if (!keptByType.has(s.type)) keptByType.set(s.type, []);
keptByType.get(s.type)!.push(s);
}
const audio = keptByType.get("Audio");
if (audio) {
audio.sort((a, b) => {
const aRank = langRank(effectiveLanguage(a, languageOverrides), origLang, audioLanguages);
const bRank = langRank(effectiveLanguage(b, languageOverrides), origLang, audioLanguages);
if (aRank !== bRank) return aRank - bRank;
return a.stream_index - b.stream_index;
});
}
for (const [, streams] of keptByType) {
streams.forEach((s, idx) => {
const dec = decisions.find((d) => d.stream_id === s.id);
if (dec) dec.target_index = idx;
});
}
}
function langRank(lang: string | null, origLang: string | null, audioLanguages: string[]): number {
const normalized = lang ? normalizeLanguage(lang) : null;
if (origLang && normalized === origLang) return 0;
if (normalized) {
const idx = audioLanguages.indexOf(normalized);
if (idx >= 0) return idx + 1;
}
return audioLanguages.length + 1;
}
/**
* True when the output order of kept audio streams differs from their
* original order in the input. Compares original stream_index order
* against target_index order.
*/
function checkAudioOrderChanged(streams: MediaStream[], decisions: PlanResult["decisions"]): boolean {
const keptAudio = streams
.filter((s) => s.type === "Audio" && decisions.find((d) => d.stream_id === s.id)?.action === "keep")
.sort((a, b) => a.stream_index - b.stream_index);
for (let i = 0; i < keptAudio.length; i++) {
const dec = decisions.find((d) => d.stream_id === keptAudio[i].id);
if (dec?.target_index !== i) return true;
}
return false;
}