Files
netfelix-audio-fix/server/services/analyzer.ts
Felix Förtsch d2983d5f38
All checks were successful
Build and Push Docker Image / build (push) Successful in 1m30s
skip non-extractable subs (dvdsub/dvbsub/unknown), summarise ffmpeg errors
Abraham Lincoln crashed with exit 234 because the file had 14 dvd_subtitle
streams: our extraction dict only keyed on the long form (dvd_subtitle)
while jellyfin stores the short form (dvdsub), so the lookup fell back
to .srt, ffmpeg picked the srt muxer, and srt can't encode image-based
subs. textbook silent dict miss.

replaced the extension dict with an EXTRACTABLE map that pairs codec →
{ext, codecArg} and explicitly enumerates every codec we can route to a
single-file sidecar. everything else (dvd_subtitle/dvdsub, dvb_subtitle/
dvbsub, unknown codecs) is now skipped at command-build time. the plan
picks up a note like '14 subtitle(s) dropped: dvdsub (eng, est, ind,
kor, jpn, lav, lit, may, chi, chi, tha, vie, rus, ukr) — not extractable
to sidecar' so the user sees exactly what didn't make it.

also added extractErrorSummary in execute.ts: when a job errors, scan
the last 60 stderr lines for fatal keywords (Error:, Conversion failed!,
Unsupported, Invalid argument, Permission denied, No space left, …),
dedupe, prepend the summary to the job's stored output. the review_plan
notes get the same summary — surfaces the real cause next to the plan
instead of burying it under ffmpeg's 200-line banner.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 18:42:05 +02:00

301 lines
11 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import type { MediaItem, MediaStream, PlanResult } from "../types";
import { computeAppleCompat, isAppleCompatible, transcodeTarget } from "./apple-compat";
import { isExtractableSubtitle } from "./ffmpeg";
import { normalizeLanguage } from "./jellyfin";
export interface AnalyzerConfig {
audioLanguages: string[]; // additional languages to keep (after OG)
}
/**
* Given an item and its streams, compute what action to take for each stream
* and whether the file needs audio remuxing.
*
* Subtitles are ALWAYS removed from the container (they get extracted to
* sidecar files). is_noop considers audio removal/reorder, subtitle
* extraction, and transcode — a "noop" is a file that needs no changes
* at all.
*/
export function analyzeItem(
item: Pick<MediaItem, "original_language" | "needs_review" | "container">,
streams: MediaStream[],
config: AnalyzerConfig,
): PlanResult {
const origLang = item.original_language ? normalizeLanguage(item.original_language) : null;
const notes: string[] = [];
const decisions: PlanResult["decisions"] = streams.map((s) => {
const action = decideAction(s, origLang, config.audioLanguages);
return { stream_id: s.id, action, target_index: null, transcode_codec: null };
});
// Second pass: within each kept-language group, drop commentary/AD tracks
// and alternate formats so we end up with exactly one audio stream per
// language. The user doesn't need 2× English (main + director's
// commentary) — one well-chosen track is enough.
deduplicateAudioByLanguage(streams, decisions, origLang);
const anyAudioRemoved = streams.some((s, i) => s.type === "Audio" && decisions[i].action === "remove");
assignTargetOrder(streams, decisions, origLang, config.audioLanguages);
const audioOrderChanged = checkAudioOrderChanged(streams, decisions);
for (const d of decisions) {
if (d.action !== "keep") continue;
const stream = streams.find((s) => s.id === d.stream_id);
if (stream && stream.type === "Audio") {
// Use Profile (DTS-HD MA, etc.) — NOT title — to pick the transcode target.
// Passing title here used to cause lossless DTS-HD MA in MKV to fall back
// to EAC3 instead of the better FLAC path when the title didn't happen to
// contain "MA".
d.transcode_codec = transcodeTarget(stream.codec ?? "", stream.profile, item.container);
}
}
const keptAudioStreams = decisions
.filter((d) => d.action === "keep")
.map((d) => streams.find((s) => s.id === d.stream_id))
.filter((s): s is MediaStream => !!s && s.type === "Audio");
const keptAudioCodecs = keptAudioStreams.map((s) => s.codec ?? "");
const needsTranscode = decisions.some((d) => d.transcode_codec != null);
const apple_compat = computeAppleCompat(keptAudioCodecs, item.container);
const job_type = needsTranscode ? ("transcode" as const) : ("copy" as const);
const hasSubs = streams.some((s) => s.type === "Subtitle");
// Pipeline also sets default disposition on the first kept audio and writes
// canonical iso3 language tags. If either is already wrong in the file,
// running ffmpeg would produce a different output → not a noop.
const keptAudioSorted = [...keptAudioStreams].sort((a, b) => {
const ai = decisions.find((d) => d.stream_id === a.id)?.target_index ?? 0;
const bi = decisions.find((d) => d.stream_id === b.id)?.target_index ?? 0;
return ai - bi;
});
const firstKeptAudio = keptAudioSorted[0];
const defaultMismatch = !!firstKeptAudio && firstKeptAudio.is_default !== 1;
const nonDefaultHasDefault = keptAudioSorted.slice(1).some((s) => s.is_default === 1);
const languageMismatch = keptAudioStreams.some(
(s) => s.language != null && s.language !== normalizeLanguage(s.language),
);
const is_noop =
!anyAudioRemoved &&
!audioOrderChanged &&
!hasSubs &&
!needsTranscode &&
!defaultMismatch &&
!nonDefaultHasDefault &&
!languageMismatch;
if (!origLang && item.needs_review) {
notes.push("Original language unknown — audio tracks not filtered; manual review required");
}
// Surface image-based subtitles that can't be written to a sane
// single-file sidecar. They'll still be stripped from the container,
// but won't land on disk anywhere — the user sees this in the plan
// notes so nothing vanishes silently.
const nonExtractable = streams.filter((s) => s.type === "Subtitle" && !isExtractableSubtitle(s.codec));
if (nonExtractable.length > 0) {
const grouped = new Map<string, string[]>();
for (const s of nonExtractable) {
const codec = (s.codec ?? "unknown").toLowerCase();
if (!grouped.has(codec)) grouped.set(codec, []);
grouped.get(codec)!.push(s.language ?? "und");
}
const summary = [...grouped.entries()].map(([codec, langs]) => `${codec} (${langs.join(", ")})`).join("; ");
notes.push(`${nonExtractable.length} subtitle(s) dropped: ${summary} — not extractable to sidecar`);
}
return { is_noop, has_subs: hasSubs, confidence: "low", apple_compat, job_type, decisions, notes };
}
/**
* Titles that scream "not the main track": commentary, director's track,
* visually-impaired/audio-description, karaoke. Case-insensitive.
*/
const NON_PRIMARY_AUDIO_TITLE =
/\b(commentary|director'?s?\b.*\b(track|comment|feature)|audio description|descriptive|visually? impaired|\bad\b|karaoke|sign language)/i;
function isCommentaryOrAuxiliary(stream: MediaStream): boolean {
if (stream.is_hearing_impaired) return true;
const title = stream.title ?? "";
return NON_PRIMARY_AUDIO_TITLE.test(title);
}
/**
* Sort comparator for picking the "primary" audio track within a
* single language group. Lower return → a wins.
*
* Priority (most → least important):
* 1. highest channel count (quality; 7.1 beats 5.1 beats stereo)
* 2. Apple-compatible codec (skips a transcode pass; AC3 wins over
* DTS-HD MA at equal channels — direct play > lossless that
* has to be re-encoded anyway)
* 3. default disposition (muxer's pick, tiebreak)
* 4. lowest stream_index (source order, stable tiebreak)
*/
function betterAudio(a: MediaStream, b: MediaStream): number {
const byChannels = (b.channels ?? 0) - (a.channels ?? 0);
if (byChannels !== 0) return byChannels;
const aApple = isAppleCompatible(a.codec ?? "") ? 1 : 0;
const bApple = isAppleCompatible(b.codec ?? "") ? 1 : 0;
const byApple = bApple - aApple;
if (byApple !== 0) return byApple;
const byDefault = (b.is_default ?? 0) - (a.is_default ?? 0);
if (byDefault !== 0) return byDefault;
return a.stream_index - b.stream_index;
}
function deduplicateAudioByLanguage(
streams: MediaStream[],
decisions: PlanResult["decisions"],
origLang: string | null,
): void {
const decisionById = new Map(decisions.map((d) => [d.stream_id, d]));
const keptAudio = streams.filter((s) => s.type === "Audio" && decisionById.get(s.id)?.action === "keep");
// 1. Flag commentary/AD tracks as remove regardless of language match.
for (const s of keptAudio) {
if (isCommentaryOrAuxiliary(s)) {
const d = decisionById.get(s.id);
if (d) d.action = "remove";
}
}
// 2. Group remaining kept-audio streams by normalized language and keep
// one winner per group. Streams without a language tag are handled
// specially: when OG language is unknown we keep them all (ambiguity
// means we can't safely drop anything); when OG is known they've
// already been kept by decideAction's "unknown language falls
// through" clause, so still dedupe within them.
const stillKept = keptAudio.filter((s) => decisionById.get(s.id)?.action === "keep");
const byLang = new Map<string, MediaStream[]>();
const noLang: MediaStream[] = [];
for (const s of stillKept) {
if (!s.language) {
noLang.push(s);
continue;
}
const key = normalizeLanguage(s.language);
if (!byLang.has(key)) byLang.set(key, []);
byLang.get(key)!.push(s);
}
for (const [, group] of byLang) {
if (group.length <= 1) continue;
const sorted = [...group].sort(betterAudio);
const winner = sorted[0];
for (const s of sorted.slice(1)) {
const d = decisionById.get(s.id);
if (d) d.action = "remove";
}
// Touch winner (no-op) to make intent clear.
void winner;
}
// Null-language audio: only dedupe when OG is known (so we already have
// a primary pick). If OG is null we leave ambiguity alone.
if (origLang && noLang.length > 1) {
const sorted = [...noLang].sort(betterAudio);
for (const s of sorted.slice(1)) {
const d = decisionById.get(s.id);
if (d) d.action = "remove";
}
}
}
function decideAction(stream: MediaStream, origLang: string | null, audioLanguages: string[]): "keep" | "remove" {
switch (stream.type) {
case "Video":
case "Data":
case "EmbeddedImage":
return "keep";
case "Audio": {
if (!origLang) return "keep";
if (!stream.language) return "keep";
const normalized = normalizeLanguage(stream.language);
if (normalized === origLang) return "keep";
if (audioLanguages.includes(normalized)) return "keep";
return "remove";
}
case "Subtitle":
return "remove";
default:
return "keep";
}
}
/**
* Assign target_index to each kept stream. target_index is the 0-based
* position within its type group in the output file, after sorting audio
* streams by language rank (OG first, then additional languages in
* configured order, then by original stream_index for stability).
*/
export function assignTargetOrder(
allStreams: MediaStream[],
decisions: PlanResult["decisions"],
origLang: string | null,
audioLanguages: string[],
): void {
const keptByType = new Map<string, MediaStream[]>();
for (const s of allStreams) {
const dec = decisions.find((d) => d.stream_id === s.id);
if (dec?.action !== "keep") continue;
if (!keptByType.has(s.type)) keptByType.set(s.type, []);
keptByType.get(s.type)!.push(s);
}
const audio = keptByType.get("Audio");
if (audio) {
audio.sort((a, b) => {
const aRank = langRank(a.language, origLang, audioLanguages);
const bRank = langRank(b.language, origLang, audioLanguages);
if (aRank !== bRank) return aRank - bRank;
return a.stream_index - b.stream_index;
});
}
for (const [, streams] of keptByType) {
streams.forEach((s, idx) => {
const dec = decisions.find((d) => d.stream_id === s.id);
if (dec) dec.target_index = idx;
});
}
}
function langRank(lang: string | null, origLang: string | null, audioLanguages: string[]): number {
const normalized = lang ? normalizeLanguage(lang) : null;
if (origLang && normalized === origLang) return 0;
if (normalized) {
const idx = audioLanguages.indexOf(normalized);
if (idx >= 0) return idx + 1;
}
return audioLanguages.length + 1;
}
/**
* True when the output order of kept audio streams differs from their
* original order in the input. Compares original stream_index order
* against target_index order.
*/
function checkAudioOrderChanged(streams: MediaStream[], decisions: PlanResult["decisions"]): boolean {
const keptAudio = streams
.filter((s) => s.type === "Audio" && decisions.find((d) => d.stream_id === s.id)?.action === "keep")
.sort((a, b) => a.stream_index - b.stream_index);
for (let i = 0; i < keptAudio.length; i++) {
const dec = decisions.find((d) => d.stream_id === keptAudio[i].id);
if (dec?.target_index !== i) return true;
}
return false;
}