Files
netfelix-audio-fix/server/services/ffmpeg.ts
Felix Förtsch d2983d5f38
All checks were successful
Build and Push Docker Image / build (push) Successful in 1m30s
skip non-extractable subs (dvdsub/dvbsub/unknown), summarise ffmpeg errors
Abraham Lincoln crashed with exit 234 because the file had 14 dvd_subtitle
streams: our extraction dict only keyed on the long form (dvd_subtitle)
while jellyfin stores the short form (dvdsub), so the lookup fell back
to .srt, ffmpeg picked the srt muxer, and srt can't encode image-based
subs. textbook silent dict miss.

replaced the extension dict with an EXTRACTABLE map that pairs codec →
{ext, codecArg} and explicitly enumerates every codec we can route to a
single-file sidecar. everything else (dvd_subtitle/dvdsub, dvb_subtitle/
dvbsub, unknown codecs) is now skipped at command-build time. the plan
picks up a note like '14 subtitle(s) dropped: dvdsub (eng, est, ind,
kor, jpn, lav, lit, may, chi, chi, tha, vie, rus, ukr) — not extractable
to sidecar' so the user sees exactly what didn't make it.

also added extractErrorSummary in execute.ts: when a job errors, scan
the last 60 stderr lines for fatal keywords (Error:, Conversion failed!,
Unsupported, Invalid argument, Permission denied, No space left, …),
dedupe, prepend the summary to the job's stored output. the review_plan
notes get the same summary — surfaces the real cause next to the plan
instead of burying it under ffmpeg's 200-line banner.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 18:42:05 +02:00

515 lines
16 KiB
TypeScript

import type { MediaItem, MediaStream, StreamDecision } from "../types";
import { normalizeLanguage } from "./jellyfin";
// ─── Subtitle extraction helpers ──────────────────────────────────────────────
/** ISO 639-2/B → ISO 639-1 two-letter codes for subtitle filenames. */
const ISO639_1: Record<string, string> = {
eng: "en",
deu: "de",
spa: "es",
fra: "fr",
ita: "it",
por: "pt",
jpn: "ja",
kor: "ko",
zho: "zh",
ara: "ar",
rus: "ru",
nld: "nl",
swe: "sv",
nor: "no",
dan: "da",
fin: "fi",
pol: "pl",
tur: "tr",
tha: "th",
hin: "hi",
hun: "hu",
ces: "cs",
ron: "ro",
ell: "el",
heb: "he",
fas: "fa",
ukr: "uk",
ind: "id",
cat: "ca",
nob: "nb",
nno: "nn",
isl: "is",
hrv: "hr",
slk: "sk",
bul: "bg",
srp: "sr",
slv: "sl",
lav: "lv",
lit: "lt",
est: "et",
};
/**
* Subtitle codecs we can reliably extract to a single-file sidecar. Mapped
* to {ext, codecArg} for the ffmpeg output. Anything NOT in this map is
* deliberately skipped — ffmpeg's srt/text muxers reject image-based
* codecs like dvd_subtitle/dvb_subtitle with "Unsupported subtitles
* codec", crashing the whole job. VobSub extraction would produce a
* .sub + .idx pair and complicate the predicted-files contract, so for
* now those are stripped from the container but not written out. A plan
* note records what was dropped (see analyzer.ts).
*
* Jellyfin returns short codec names (dvdsub, pgssub) while ffmpeg's own
* output uses the long form (dvd_subtitle, hdmv_pgs_subtitle). Both are
* accepted here to keep alias drift harmless.
*/
const EXTRACTABLE: Record<string, { ext: string; codecArg: string }> = {
subrip: { ext: "srt", codecArg: "copy" },
srt: { ext: "srt", codecArg: "copy" },
ass: { ext: "ass", codecArg: "copy" },
ssa: { ext: "ssa", codecArg: "copy" },
webvtt: { ext: "vtt", codecArg: "copy" },
vtt: { ext: "vtt", codecArg: "copy" },
mov_text: { ext: "srt", codecArg: "subrip" },
text: { ext: "srt", codecArg: "copy" },
hdmv_pgs_subtitle: { ext: "sup", codecArg: "copy" },
pgssub: { ext: "sup", codecArg: "copy" },
};
export function isExtractableSubtitle(codec: string | null): boolean {
if (!codec) return false;
return codec.toLowerCase() in EXTRACTABLE;
}
function subtitleLang2(lang: string | null): string {
if (!lang) return "und";
const n = normalizeLanguage(lang);
return ISO639_1[n] ?? n;
}
/**
* Build ffmpeg output args for extracting ALL subtitle streams
* to external sidecar files next to the video.
*
* Returns a flat array of args to append after the main output in the
* command. Each subtitle becomes a separate ffmpeg output:
* -map 0:s:N -c:s copy 'basename.en.srt'
*
* @param allStreams All streams for the item (needed to compute type-relative indices)
* @param basePath Video file path without extension (host or /work path)
*/
interface ExtractionEntry {
stream: MediaStream;
typeIdx: number;
outPath: string;
codecArg: string;
}
/** Compute extraction metadata for all subtitle streams. Shared by buildExtractionOutputs and predictExtractedFiles. */
function computeExtractionEntries(allStreams: MediaStream[], basePath: string): ExtractionEntry[] {
const subTypeIdx = new Map<number, number>();
let subCount = 0;
for (const s of [...allStreams].sort((a, b) => a.stream_index - b.stream_index)) {
if (s.type === "Subtitle") subTypeIdx.set(s.id, subCount++);
}
// Only extract codecs we can route to a sane single-file sidecar. Image
// formats like dvd_subtitle crash the job if we try — see EXTRACTABLE.
const allSubs = allStreams
.filter((s) => s.type === "Subtitle")
.filter((s) => isExtractableSubtitle(s.codec))
.sort((a, b) => a.stream_index - b.stream_index);
if (allSubs.length === 0) return [];
const usedNames = new Set<string>();
const entries: ExtractionEntry[] = [];
for (const s of allSubs) {
const typeIdx = subTypeIdx.get(s.id) ?? 0;
const langCode = subtitleLang2(s.language);
const spec = EXTRACTABLE[(s.codec ?? "").toLowerCase()];
const ext = spec.ext;
const codecArg = spec.codecArg;
const nameParts = [langCode];
if (s.is_forced) nameParts.push("forced");
if (s.is_hearing_impaired) nameParts.push("hi");
let outPath = `${basePath}.${nameParts.join(".")}.${ext}`;
let counter = 2;
while (usedNames.has(outPath)) {
outPath = `${basePath}.${nameParts.join(".")}.${counter}.${ext}`;
counter++;
}
usedNames.add(outPath);
entries.push({ stream: s, typeIdx, outPath, codecArg });
}
return entries;
}
/**
* Predict the sidecar files that subtitle extraction will create.
* Used to populate the subtitle_files table after a successful job.
*/
export function predictExtractedFiles(
item: MediaItem,
streams: MediaStream[],
): Array<{
file_path: string;
language: string | null;
codec: string | null;
is_forced: boolean;
is_hearing_impaired: boolean;
}> {
const basePath = item.file_path.replace(/\.[^.]+$/, "");
const entries = computeExtractionEntries(streams, basePath);
return entries.map((e) => ({
file_path: e.outPath,
language: e.stream.language,
codec: e.stream.codec,
is_forced: !!e.stream.is_forced,
is_hearing_impaired: !!e.stream.is_hearing_impaired,
}));
}
// ─────────────────────────────────────────────────────────────────────────────
const LANG_NAMES: Record<string, string> = {
eng: "English",
deu: "German",
spa: "Spanish",
fra: "French",
ita: "Italian",
por: "Portuguese",
jpn: "Japanese",
kor: "Korean",
zho: "Chinese",
ara: "Arabic",
rus: "Russian",
nld: "Dutch",
swe: "Swedish",
nor: "Norwegian",
dan: "Danish",
fin: "Finnish",
pol: "Polish",
tur: "Turkish",
tha: "Thai",
hin: "Hindi",
hun: "Hungarian",
ces: "Czech",
ron: "Romanian",
ell: "Greek",
heb: "Hebrew",
fas: "Persian",
ukr: "Ukrainian",
ind: "Indonesian",
cat: "Catalan",
nob: "Norwegian Bokmål",
nno: "Norwegian Nynorsk",
isl: "Icelandic",
slk: "Slovak",
hrv: "Croatian",
bul: "Bulgarian",
srp: "Serbian",
slv: "Slovenian",
lav: "Latvian",
lit: "Lithuanian",
est: "Estonian",
};
/**
* Channel count → "N.M" layout string (5.1, 7.1, 2.0, 1.0).
* Falls back to "Nch" for anything outside the common consumer layouts.
*/
function formatChannels(n: number | null): string | null {
if (n == null) return null;
if (n === 1) return "1.0";
if (n === 2) return "2.0";
if (n === 6) return "5.1";
if (n === 7) return "6.1";
if (n === 8) return "7.1";
return `${n}ch`;
}
function trackTitle(stream: MediaStream): string | null {
if (stream.type === "Subtitle") {
// Subtitles always get a clean language-based title so Jellyfin displays
// "German", "English (Forced)", etc. regardless of the original file title.
// The review UI shows a ⚠ badge when the original title looks like a
// different language, so users can spot and remove mislabeled tracks.
if (!stream.language) return null;
const lang = normalizeLanguage(stream.language);
const base = LANG_NAMES[lang] ?? lang.toUpperCase();
if (stream.is_forced) return `${base} (Forced)`;
if (stream.is_hearing_impaired) return `${base} (CC)`;
return base;
}
// Audio: harmonize to "ENG - AC3 · 5.1". Overrides whatever the file had
// (e.g. "Audio Description", "Director's Commentary") — the user uses
// the review UI to drop unwanted tracks before we get here, so by this
// point every kept audio track is a primary track that deserves a clean
// canonical label. If a user wants a different title, custom_title on
// the decision still wins (see buildStreamFlags).
const lang = stream.language ? normalizeLanguage(stream.language) : null;
const langPart = lang ? lang.toUpperCase() : null;
const codecPart = stream.codec ? stream.codec.toUpperCase() : null;
const channelsPart = formatChannels(stream.channels);
const tail = [codecPart, channelsPart].filter((v): v is string => !!v).join(" · ");
if (langPart && tail) return `${langPart} - ${tail}`;
if (langPart) return langPart;
if (tail) return tail;
return null;
}
const TYPE_SPEC: Record<string, string> = { Video: "v", Audio: "a", Subtitle: "s" };
/**
* Build -map flags using type-relative specifiers (0:v:N, 0:a:N, 0:s:N).
*
* Jellyfin's stream_index is an absolute index that can include EmbeddedImage
* and Data streams which ffmpeg may count differently (e.g. cover art stored
* as attachments). Using the stream's position within its own type group
* matches ffmpeg's 0:a:N convention exactly and avoids silent mismatches.
*/
function buildMaps(allStreams: MediaStream[], kept: { stream: MediaStream; dec: StreamDecision }[]): string[] {
// Map each stream id → its 0-based position among streams of the same type,
// sorted by stream_index (the order ffmpeg sees them in the input).
const typePos = new Map<number, number>();
const counts: Record<string, number> = {};
for (const s of [...allStreams].sort((a, b) => a.stream_index - b.stream_index)) {
if (!TYPE_SPEC[s.type]) continue;
const n = counts[s.type] ?? 0;
typePos.set(s.id, n);
counts[s.type] = n + 1;
}
return kept
.filter((k) => !!TYPE_SPEC[k.stream.type])
.map((k) => `-map 0:${TYPE_SPEC[k.stream.type]}:${typePos.get(k.stream.id) ?? 0}`);
}
/**
* Build disposition and metadata flags for kept audio streams.
* - Marks the first kept audio stream as default, clears all others.
* - Sets harmonized language-name titles on all kept audio streams.
* - Writes canonical ISO 639-2/B 3-letter language tags (e.g. "en" → "eng",
* "ger" → "deu"). Streams with no language get "und" (ffmpeg convention).
*/
function buildStreamFlags(kept: { stream: MediaStream; dec: StreamDecision }[]): string[] {
const audioKept = kept.filter((k) => k.stream.type === "Audio");
const args: string[] = [];
audioKept.forEach((k, i) => {
args.push(`-disposition:a:${i}`, i === 0 ? "default" : "0");
const title = k.dec.custom_title ?? trackTitle(k.stream);
if (title) args.push(`-metadata:s:a:${i}`, `title=${shellQuote(title)}`);
const lang = k.stream.language ? normalizeLanguage(k.stream.language) : "und";
args.push(`-metadata:s:a:${i}`, `language=${lang}`);
});
return args;
}
/** Canonical output order of stream types. Used by every command builder. */
const TYPE_ORDER: Record<string, number> = { Video: 0, Audio: 1, Subtitle: 2, Data: 3, EmbeddedImage: 4 };
/**
* Return kept streams paired with their decisions, sorted in canonical
* output order: type priority first, then target_index within each type.
* This is the single source of truth for output stream ordering.
*/
export function sortKeptStreams(
streams: MediaStream[],
decisions: StreamDecision[],
): { stream: MediaStream; dec: StreamDecision }[] {
const kept: { stream: MediaStream; dec: StreamDecision }[] = [];
for (const s of streams) {
const dec = decisions.find((d) => d.stream_id === s.id);
if (dec?.action === "keep") kept.push({ stream: s, dec });
}
kept.sort((a, b) => {
const ta = TYPE_ORDER[a.stream.type] ?? 9;
const tb = TYPE_ORDER[b.stream.type] ?? 9;
if (ta !== tb) return ta - tb;
return (a.dec.target_index ?? 0) - (b.dec.target_index ?? 0);
});
return kept;
}
/**
* Build the full shell command to process a media file. Every subtitle
* stream is first extracted to a sidecar file next to the video, then
* the container is remuxed without subtitles and with only the audio
* tracks decisions kept (transcoding incompatible codecs per-track).
*
* Thin wrapper over buildPipelineCommand — the single source of truth
* for how we touch a file. Kept as a named export so callers don't have
* to destructure { command } at every site.
*/
export function buildCommand(item: MediaItem, streams: MediaStream[], decisions: StreamDecision[]): string {
return buildPipelineCommand(item, streams, decisions).command;
}
/**
* Build a command that also changes the container to MKV.
* Used when MP4 container can't hold certain subtitle codecs.
*/
export function buildMkvConvertCommand(item: MediaItem, streams: MediaStream[], decisions: StreamDecision[]): string {
const inputPath = item.file_path;
const outputPath = inputPath.replace(/\.[^.]+$/, ".mkv");
const tmpPath = inputPath.replace(/\.[^.]+$/, ".tmp.mkv");
const kept = sortKeptStreams(streams, decisions);
const maps = buildMaps(streams, kept);
const streamFlags = buildStreamFlags(kept);
return [
"ffmpeg",
"-y",
"-i",
shellQuote(inputPath),
...maps,
...streamFlags,
"-c copy",
"-f matroska",
shellQuote(tmpPath),
"&&",
"mv",
shellQuote(tmpPath),
shellQuote(outputPath),
].join(" ");
}
/**
* Build a single FFmpeg command that:
* 1. Extracts subtitles to sidecar files
* 2. Remuxes with reordered/filtered audio
* 3. Transcodes incompatible audio codecs
*/
export function buildPipelineCommand(
item: MediaItem,
streams: MediaStream[],
decisions: (StreamDecision & { stream?: MediaStream })[],
): {
command: string;
extractedFiles: Array<{
path: string;
language: string | null;
codec: string | null;
is_forced: number;
is_hearing_impaired: number;
}>;
} {
const inputPath = item.file_path;
const ext = inputPath.match(/\.([^.]+)$/)?.[1] ?? "mkv";
const tmpPath = inputPath.replace(/\.[^.]+$/, `.tmp.${ext}`);
const basePath = inputPath.replace(/\.[^.]+$/, "");
// --- Subtitle extraction outputs ---
const extractionEntries = computeExtractionEntries(streams, basePath);
const subOutputArgs: string[] = [];
for (const e of extractionEntries) {
subOutputArgs.push(`-map 0:s:${e.typeIdx}`, `-c:s ${e.codecArg}`, shellQuote(e.outPath));
}
// --- Kept streams for remuxed output ---
const kept = sortKeptStreams(streams, decisions as StreamDecision[]);
const enriched = kept.map((k) => ({ ...k.dec, stream: k.stream }));
// Build -map flags
const maps = buildMaps(streams, kept);
// Build per-stream codec flags
const codecFlags: string[] = ["-c:v copy"];
let audioIdx = 0;
for (const d of enriched) {
if (d.stream.type === "Audio") {
if (d.transcode_codec) {
codecFlags.push(`-c:a:${audioIdx} ${d.transcode_codec}`);
// For EAC3, set a reasonable bitrate based on channel count
if (d.transcode_codec === "eac3") {
const bitrate = (d.stream.channels ?? 2) >= 6 ? "640k" : "256k";
codecFlags.push(`-b:a:${audioIdx} ${bitrate}`);
}
} else {
codecFlags.push(`-c:a:${audioIdx} copy`);
}
audioIdx++;
}
}
// If no audio transcoding, simplify to -c copy (covers video + audio)
const hasTranscode = enriched.some((d) => d.transcode_codec);
const finalCodecFlags = hasTranscode ? codecFlags : ["-c copy"];
// Disposition + metadata flags for audio
const streamFlags = buildStreamFlags(kept);
// Assemble command
const parts: string[] = ["ffmpeg", "-y", "-i", shellQuote(inputPath)];
// Subtitle extraction outputs first
parts.push(...subOutputArgs);
// Map flags for remuxed output
parts.push(...maps);
// Codec flags
parts.push(...finalCodecFlags);
// Stream flags (disposition, metadata)
parts.push(...streamFlags);
// Output file
parts.push(shellQuote(tmpPath));
const command = `${parts.join(" ")} && mv ${shellQuote(tmpPath)} ${shellQuote(inputPath)}`;
return {
command,
extractedFiles: extractionEntries.map((e) => ({
path: e.outPath,
language: e.stream.language,
codec: e.stream.codec,
is_forced: e.stream.is_forced ? 1 : 0,
is_hearing_impaired: e.stream.is_hearing_impaired ? 1 : 0,
})),
};
}
/** Safely quote a path for shell usage. */
export function shellQuote(s: string): string {
return `'${s.replace(/'/g, "'\\''")}'`;
}
/** Returns a human-readable summary of what will change. */
export function summarizeChanges(
streams: MediaStream[],
decisions: StreamDecision[],
): { removed: MediaStream[]; kept: MediaStream[] } {
const removed: MediaStream[] = [];
const kept: MediaStream[] = [];
for (const s of streams) {
const dec = decisions.find((d) => d.stream_id === s.id);
if (!dec || dec.action === "remove") removed.push(s);
else kept.push(s);
}
return { removed, kept };
}
/** Format a stream for display. */
export function streamLabel(s: MediaStream): string {
const parts: string[] = [s.type];
if (s.codec) parts.push(s.codec);
if (s.language_display || s.language) parts.push(s.language_display ?? s.language!);
if (s.title) parts.push(`"${s.title}"`);
if (s.type === "Audio" && s.channels) parts.push(`${s.channels}ch`);
if (s.is_forced) parts.push("forced");
if (s.is_hearing_impaired) parts.push("CC");
return parts.join(" · ");
}