8112bfeb65
Build and Push Docker Image / build (push) Successful in 3m3s
adds stream_decisions.custom_language (ISO 639-2 code or null) so the user can correct a mislabeled audio track — e.g. a Spanish dub tagged "und" in the container — without going through Jellyfin. the override wins over stream.language everywhere it matters: the analyzer reads it for keep/remove decisions and track ordering, the ffmpeg command builder writes it as both the language metadata tag and the harmonized track title, and reanalyze preserves it across reruns and rescans. on the audio detail page, each pending audio row swaps its language cell for an inline <select> populated from LANG_NAMES. picking the raw file language clears the override; anything else sets it and triggers a server-side reanalyze so keep/remove + target_index update immediately. a small ✎ hint marks overridden tracks. rebuilt commands tag the output accordingly so Jellyfin reads the corrected language. PATCH /api/review/:id/stream/:streamId/language validates the code against LANG_NAMES (accepts ISO 639-1/2/2B aliases, rejects garbage) and runs reanalyze inside. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
496 lines
16 KiB
TypeScript
496 lines
16 KiB
TypeScript
import type { MediaItem, MediaStream, StreamDecision } from "../types";
|
|
import { normalizeLanguage } from "./jellyfin";
|
|
|
|
// ─── Subtitle extraction helpers ──────────────────────────────────────────────
|
|
|
|
/** ISO 639-2/B → ISO 639-1 two-letter codes for subtitle filenames. */
|
|
const ISO639_1: Record<string, string> = {
|
|
eng: "en",
|
|
deu: "de",
|
|
spa: "es",
|
|
fra: "fr",
|
|
ita: "it",
|
|
por: "pt",
|
|
jpn: "ja",
|
|
kor: "ko",
|
|
zho: "zh",
|
|
ara: "ar",
|
|
rus: "ru",
|
|
nld: "nl",
|
|
swe: "sv",
|
|
nor: "no",
|
|
dan: "da",
|
|
fin: "fi",
|
|
pol: "pl",
|
|
tur: "tr",
|
|
tha: "th",
|
|
hin: "hi",
|
|
hun: "hu",
|
|
ces: "cs",
|
|
ron: "ro",
|
|
ell: "el",
|
|
heb: "he",
|
|
fas: "fa",
|
|
ukr: "uk",
|
|
ind: "id",
|
|
cat: "ca",
|
|
nob: "nb",
|
|
nno: "nn",
|
|
isl: "is",
|
|
hrv: "hr",
|
|
slk: "sk",
|
|
bul: "bg",
|
|
srp: "sr",
|
|
slv: "sl",
|
|
lav: "lv",
|
|
lit: "lt",
|
|
est: "et",
|
|
};
|
|
|
|
/**
|
|
* Subtitle codecs we can reliably extract to a single-file sidecar. Mapped
|
|
* to {ext, codecArg} for the ffmpeg output. Anything NOT in this map is
|
|
* deliberately skipped — ffmpeg's srt/text muxers reject image-based
|
|
* codecs like dvd_subtitle/dvb_subtitle with "Unsupported subtitles
|
|
* codec", crashing the whole job. VobSub extraction would produce a
|
|
* .sub + .idx pair and complicate the predicted-files contract, so for
|
|
* now those are stripped from the container but not written out. A plan
|
|
* note records what was dropped (see analyzer.ts).
|
|
*
|
|
* Jellyfin returns short codec names (dvdsub, pgssub) while ffmpeg's own
|
|
* output uses the long form (dvd_subtitle, hdmv_pgs_subtitle). Both are
|
|
* accepted here to keep alias drift harmless.
|
|
*/
|
|
const EXTRACTABLE: Record<string, { ext: string; codecArg: string }> = {
|
|
subrip: { ext: "srt", codecArg: "copy" },
|
|
srt: { ext: "srt", codecArg: "copy" },
|
|
ass: { ext: "ass", codecArg: "copy" },
|
|
ssa: { ext: "ssa", codecArg: "copy" },
|
|
webvtt: { ext: "vtt", codecArg: "copy" },
|
|
vtt: { ext: "vtt", codecArg: "copy" },
|
|
mov_text: { ext: "srt", codecArg: "subrip" },
|
|
text: { ext: "srt", codecArg: "copy" },
|
|
hdmv_pgs_subtitle: { ext: "sup", codecArg: "copy" },
|
|
pgssub: { ext: "sup", codecArg: "copy" },
|
|
};
|
|
|
|
export function isExtractableSubtitle(codec: string | null): boolean {
|
|
if (!codec) return false;
|
|
return codec.toLowerCase() in EXTRACTABLE;
|
|
}
|
|
|
|
function subtitleLang2(lang: string | null): string {
|
|
if (!lang) return "und";
|
|
const n = normalizeLanguage(lang);
|
|
return ISO639_1[n] ?? n;
|
|
}
|
|
|
|
/**
|
|
* Build ffmpeg output args for extracting ALL subtitle streams
|
|
* to external sidecar files next to the video.
|
|
*
|
|
* Returns a flat array of args to append after the main output in the
|
|
* command. Each subtitle becomes a separate ffmpeg output:
|
|
* -map 0:s:N -c:s copy 'basename.en.srt'
|
|
*
|
|
* @param allStreams All streams for the item (needed to compute type-relative indices)
|
|
* @param basePath Video file path without extension (host or /work path)
|
|
*/
|
|
interface ExtractionEntry {
|
|
stream: MediaStream;
|
|
typeIdx: number;
|
|
outPath: string;
|
|
codecArg: string;
|
|
}
|
|
|
|
/** Compute extraction metadata for all subtitle streams. */
|
|
function computeExtractionEntries(allStreams: MediaStream[], basePath: string): ExtractionEntry[] {
|
|
const subTypeIdx = new Map<number, number>();
|
|
let subCount = 0;
|
|
for (const s of [...allStreams].sort((a, b) => a.stream_index - b.stream_index)) {
|
|
if (s.type === "Subtitle") subTypeIdx.set(s.id, subCount++);
|
|
}
|
|
|
|
// Only extract codecs we can route to a sane single-file sidecar. Image
|
|
// formats like dvd_subtitle crash the job if we try — see EXTRACTABLE.
|
|
const allSubs = allStreams
|
|
.filter((s) => s.type === "Subtitle")
|
|
.filter((s) => isExtractableSubtitle(s.codec))
|
|
.sort((a, b) => a.stream_index - b.stream_index);
|
|
|
|
if (allSubs.length === 0) return [];
|
|
|
|
const usedNames = new Set<string>();
|
|
const entries: ExtractionEntry[] = [];
|
|
|
|
for (const s of allSubs) {
|
|
const typeIdx = subTypeIdx.get(s.id) ?? 0;
|
|
const langCode = subtitleLang2(s.language);
|
|
const spec = EXTRACTABLE[(s.codec ?? "").toLowerCase()];
|
|
const ext = spec.ext;
|
|
const codecArg = spec.codecArg;
|
|
|
|
const nameParts = [langCode];
|
|
if (s.is_forced) nameParts.push("forced");
|
|
if (s.is_hearing_impaired) nameParts.push("hi");
|
|
|
|
let outPath = `${basePath}.${nameParts.join(".")}.${ext}`;
|
|
let counter = 2;
|
|
while (usedNames.has(outPath)) {
|
|
outPath = `${basePath}.${nameParts.join(".")}.${counter}.${ext}`;
|
|
counter++;
|
|
}
|
|
usedNames.add(outPath);
|
|
|
|
entries.push({ stream: s, typeIdx, outPath, codecArg });
|
|
}
|
|
|
|
return entries;
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
export const LANG_NAMES: Record<string, string> = {
|
|
eng: "English",
|
|
deu: "German",
|
|
spa: "Spanish",
|
|
fra: "French",
|
|
ita: "Italian",
|
|
por: "Portuguese",
|
|
jpn: "Japanese",
|
|
kor: "Korean",
|
|
zho: "Chinese",
|
|
ara: "Arabic",
|
|
rus: "Russian",
|
|
nld: "Dutch",
|
|
swe: "Swedish",
|
|
nor: "Norwegian",
|
|
dan: "Danish",
|
|
fin: "Finnish",
|
|
pol: "Polish",
|
|
tur: "Turkish",
|
|
tha: "Thai",
|
|
hin: "Hindi",
|
|
hun: "Hungarian",
|
|
ces: "Czech",
|
|
ron: "Romanian",
|
|
ell: "Greek",
|
|
heb: "Hebrew",
|
|
fas: "Persian",
|
|
ukr: "Ukrainian",
|
|
ind: "Indonesian",
|
|
cat: "Catalan",
|
|
nob: "Norwegian Bokmål",
|
|
nno: "Norwegian Nynorsk",
|
|
isl: "Icelandic",
|
|
slk: "Slovak",
|
|
hrv: "Croatian",
|
|
bul: "Bulgarian",
|
|
srp: "Serbian",
|
|
slv: "Slovenian",
|
|
lav: "Latvian",
|
|
lit: "Lithuanian",
|
|
est: "Estonian",
|
|
};
|
|
|
|
/**
|
|
* Channel count → "N.M" layout string (5.1, 7.1, 2.0, 1.0).
|
|
* Falls back to "Nch" for anything outside the common consumer layouts.
|
|
*/
|
|
function formatChannels(n: number | null): string | null {
|
|
if (n == null) return null;
|
|
if (n === 1) return "1.0";
|
|
if (n === 2) return "2.0";
|
|
if (n === 6) return "5.1";
|
|
if (n === 7) return "6.1";
|
|
if (n === 8) return "7.1";
|
|
return `${n}ch`;
|
|
}
|
|
|
|
function trackTitle(stream: MediaStream, customLanguage: string | null = null): string | null {
|
|
if (stream.type === "Subtitle") {
|
|
// Subtitles always get a clean language-based title so Jellyfin displays
|
|
// "German", "English (Forced)", etc. regardless of the original file title.
|
|
// The review UI shows a ⚠ badge when the original title looks like a
|
|
// different language, so users can spot and remove mislabeled tracks.
|
|
if (!stream.language) return null;
|
|
const lang = normalizeLanguage(stream.language);
|
|
const base = LANG_NAMES[lang] ?? lang.toUpperCase();
|
|
if (stream.is_forced) return `${base} (Forced)`;
|
|
if (stream.is_hearing_impaired) return `${base} (CC)`;
|
|
return base;
|
|
}
|
|
// Audio: harmonize to "ENG - AC3 · 5.1". Overrides whatever the file had
|
|
// (e.g. "Audio Description", "Director's Commentary") — the user uses
|
|
// the review UI to drop unwanted tracks before we get here, so by this
|
|
// point every kept audio track is a primary track that deserves a clean
|
|
// canonical label. If a user wants a different title, custom_title on
|
|
// the decision still wins (see buildStreamFlags). A per-stream language
|
|
// override comes through as customLanguage so "UND → Spanish" renames
|
|
// flow through to the harmonized title too.
|
|
const rawLang = customLanguage ?? stream.language;
|
|
const lang = rawLang ? normalizeLanguage(rawLang) : null;
|
|
const langPart = lang ? lang.toUpperCase() : null;
|
|
const codecPart = stream.codec ? stream.codec.toUpperCase() : null;
|
|
const channelsPart = formatChannels(stream.channels);
|
|
const tail = [codecPart, channelsPart].filter((v): v is string => !!v).join(" · ");
|
|
if (langPart && tail) return `${langPart} - ${tail}`;
|
|
if (langPart) return langPart;
|
|
if (tail) return tail;
|
|
return null;
|
|
}
|
|
|
|
const TYPE_SPEC: Record<string, string> = { Video: "v", Audio: "a", Subtitle: "s" };
|
|
|
|
/**
|
|
* Build -map flags using type-relative specifiers (0:v:N, 0:a:N, 0:s:N).
|
|
*
|
|
* Jellyfin's stream_index is an absolute index that can include EmbeddedImage
|
|
* and Data streams which ffmpeg may count differently (e.g. cover art stored
|
|
* as attachments). Using the stream's position within its own type group
|
|
* matches ffmpeg's 0:a:N convention exactly and avoids silent mismatches.
|
|
*/
|
|
function buildMaps(allStreams: MediaStream[], kept: { stream: MediaStream; dec: StreamDecision }[]): string[] {
|
|
// Map each stream id → its 0-based position among streams of the same type,
|
|
// sorted by stream_index (the order ffmpeg sees them in the input).
|
|
const typePos = new Map<number, number>();
|
|
const counts: Record<string, number> = {};
|
|
for (const s of [...allStreams].sort((a, b) => a.stream_index - b.stream_index)) {
|
|
if (!TYPE_SPEC[s.type]) continue;
|
|
const n = counts[s.type] ?? 0;
|
|
typePos.set(s.id, n);
|
|
counts[s.type] = n + 1;
|
|
}
|
|
|
|
return kept
|
|
.filter((k) => !!TYPE_SPEC[k.stream.type])
|
|
.map((k) => `-map 0:${TYPE_SPEC[k.stream.type]}:${typePos.get(k.stream.id) ?? 0}`);
|
|
}
|
|
|
|
/**
|
|
* Build disposition and metadata flags for kept audio streams.
|
|
* - Marks the first kept audio stream as default, clears all others.
|
|
* - Sets harmonized language-name titles on all kept audio streams.
|
|
* - Writes canonical ISO 639-2/B 3-letter language tags (e.g. "en" → "eng",
|
|
* "ger" → "deu"). Streams with no language get "und" (ffmpeg convention).
|
|
*/
|
|
function buildStreamFlags(kept: { stream: MediaStream; dec: StreamDecision }[]): string[] {
|
|
const audioKept = kept.filter((k) => k.stream.type === "Audio");
|
|
const args: string[] = [];
|
|
|
|
audioKept.forEach((k, i) => {
|
|
args.push(`-disposition:a:${i}`, i === 0 ? "default" : "0");
|
|
|
|
const title = k.dec.custom_title ?? trackTitle(k.stream, k.dec.custom_language);
|
|
if (title) args.push(`-metadata:s:a:${i}`, `title=${shellQuote(title)}`);
|
|
|
|
// Per-stream language override wins over the raw file tag so the
|
|
// ffmpeg output carries the corrected language (e.g. "und" → "spa").
|
|
const rawLang = k.dec.custom_language ?? k.stream.language;
|
|
const lang = rawLang ? normalizeLanguage(rawLang) : "und";
|
|
args.push(`-metadata:s:a:${i}`, `language=${lang}`);
|
|
});
|
|
|
|
return args;
|
|
}
|
|
|
|
/** Canonical output order of stream types. Used by every command builder. */
|
|
const TYPE_ORDER: Record<string, number> = { Video: 0, Audio: 1, Subtitle: 2, Data: 3, EmbeddedImage: 4 };
|
|
|
|
/**
|
|
* Return kept streams paired with their decisions, sorted in canonical
|
|
* output order: type priority first, then target_index within each type.
|
|
* This is the single source of truth for output stream ordering.
|
|
*/
|
|
export function sortKeptStreams(
|
|
streams: MediaStream[],
|
|
decisions: StreamDecision[],
|
|
): { stream: MediaStream; dec: StreamDecision }[] {
|
|
const kept: { stream: MediaStream; dec: StreamDecision }[] = [];
|
|
for (const s of streams) {
|
|
const dec = decisions.find((d) => d.stream_id === s.id);
|
|
if (dec?.action === "keep") kept.push({ stream: s, dec });
|
|
}
|
|
kept.sort((a, b) => {
|
|
const ta = TYPE_ORDER[a.stream.type] ?? 9;
|
|
const tb = TYPE_ORDER[b.stream.type] ?? 9;
|
|
if (ta !== tb) return ta - tb;
|
|
return (a.dec.target_index ?? 0) - (b.dec.target_index ?? 0);
|
|
});
|
|
return kept;
|
|
}
|
|
|
|
/**
|
|
* Build the full shell command to process a media file. Every subtitle
|
|
* stream is first extracted to a sidecar file next to the video, then
|
|
* the container is remuxed without subtitles and with only the audio
|
|
* tracks decisions kept (transcoding incompatible codecs per-track).
|
|
*
|
|
* Thin wrapper over buildPipelineCommand — the single source of truth
|
|
* for how we touch a file. Kept as a named export so callers don't have
|
|
* to destructure { command } at every site.
|
|
*/
|
|
export function buildCommand(item: MediaItem, streams: MediaStream[], decisions: StreamDecision[]): string {
|
|
return buildPipelineCommand(item, streams, decisions).command;
|
|
}
|
|
|
|
/**
|
|
* Build a command that also changes the container to MKV.
|
|
* Used when MP4 container can't hold certain subtitle codecs.
|
|
*/
|
|
export function buildMkvConvertCommand(item: MediaItem, streams: MediaStream[], decisions: StreamDecision[]): string {
|
|
const inputPath = item.file_path;
|
|
const outputPath = inputPath.replace(/\.[^.]+$/, ".mkv");
|
|
const tmpPath = inputPath.replace(/\.[^.]+$/, ".tmp.mkv");
|
|
|
|
const kept = sortKeptStreams(streams, decisions);
|
|
|
|
const maps = buildMaps(streams, kept);
|
|
const streamFlags = buildStreamFlags(kept);
|
|
|
|
return [
|
|
"ffmpeg",
|
|
"-y",
|
|
"-i",
|
|
shellQuote(inputPath),
|
|
...maps,
|
|
...streamFlags,
|
|
"-c copy",
|
|
"-f matroska",
|
|
shellQuote(tmpPath),
|
|
"&&",
|
|
"mv",
|
|
shellQuote(tmpPath),
|
|
shellQuote(outputPath),
|
|
].join(" ");
|
|
}
|
|
|
|
/**
|
|
* Build a single FFmpeg command that:
|
|
* 1. Extracts subtitles to sidecar files
|
|
* 2. Remuxes with reordered/filtered audio
|
|
* 3. Transcodes incompatible audio codecs
|
|
*/
|
|
export function buildPipelineCommand(
|
|
item: MediaItem,
|
|
streams: MediaStream[],
|
|
decisions: (StreamDecision & { stream?: MediaStream })[],
|
|
): {
|
|
command: string;
|
|
extractedFiles: Array<{
|
|
path: string;
|
|
language: string | null;
|
|
codec: string | null;
|
|
is_forced: number;
|
|
is_hearing_impaired: number;
|
|
}>;
|
|
} {
|
|
const inputPath = item.file_path;
|
|
const ext = inputPath.match(/\.([^.]+)$/)?.[1] ?? "mkv";
|
|
const tmpPath = inputPath.replace(/\.[^.]+$/, `.tmp.${ext}`);
|
|
const basePath = inputPath.replace(/\.[^.]+$/, "");
|
|
|
|
// --- Subtitle extraction outputs ---
|
|
const extractionEntries = computeExtractionEntries(streams, basePath);
|
|
const subOutputArgs: string[] = [];
|
|
for (const e of extractionEntries) {
|
|
subOutputArgs.push(`-map 0:s:${e.typeIdx}`, `-c:s ${e.codecArg}`, shellQuote(e.outPath));
|
|
}
|
|
|
|
// --- Kept streams for remuxed output ---
|
|
const kept = sortKeptStreams(streams, decisions as StreamDecision[]);
|
|
const enriched = kept.map((k) => ({ ...k.dec, stream: k.stream }));
|
|
|
|
// Build -map flags
|
|
const maps = buildMaps(streams, kept);
|
|
|
|
// Build per-stream codec flags
|
|
const codecFlags: string[] = ["-c:v copy"];
|
|
let audioIdx = 0;
|
|
for (const d of enriched) {
|
|
if (d.stream.type === "Audio") {
|
|
if (d.transcode_codec) {
|
|
codecFlags.push(`-c:a:${audioIdx} ${d.transcode_codec}`);
|
|
// For EAC3, set a reasonable bitrate based on channel count
|
|
if (d.transcode_codec === "eac3") {
|
|
const bitrate = (d.stream.channels ?? 2) >= 6 ? "640k" : "256k";
|
|
codecFlags.push(`-b:a:${audioIdx} ${bitrate}`);
|
|
}
|
|
} else {
|
|
codecFlags.push(`-c:a:${audioIdx} copy`);
|
|
}
|
|
audioIdx++;
|
|
}
|
|
}
|
|
|
|
// If no audio transcoding, simplify to -c copy (covers video + audio)
|
|
const hasTranscode = enriched.some((d) => d.transcode_codec);
|
|
const finalCodecFlags = hasTranscode ? codecFlags : ["-c copy"];
|
|
|
|
// Disposition + metadata flags for audio
|
|
const streamFlags = buildStreamFlags(kept);
|
|
|
|
// Assemble command
|
|
const parts: string[] = ["ffmpeg", "-y", "-i", shellQuote(inputPath)];
|
|
|
|
// Subtitle extraction outputs first
|
|
parts.push(...subOutputArgs);
|
|
|
|
// Map flags for remuxed output
|
|
parts.push(...maps);
|
|
|
|
// Codec flags
|
|
parts.push(...finalCodecFlags);
|
|
|
|
// Stream flags (disposition, metadata)
|
|
parts.push(...streamFlags);
|
|
|
|
// Output file
|
|
parts.push(shellQuote(tmpPath));
|
|
|
|
const command = `${parts.join(" ")} && mv ${shellQuote(tmpPath)} ${shellQuote(inputPath)}`;
|
|
|
|
return {
|
|
command,
|
|
extractedFiles: extractionEntries.map((e) => ({
|
|
path: e.outPath,
|
|
language: e.stream.language,
|
|
codec: e.stream.codec,
|
|
is_forced: e.stream.is_forced ? 1 : 0,
|
|
is_hearing_impaired: e.stream.is_hearing_impaired ? 1 : 0,
|
|
})),
|
|
};
|
|
}
|
|
|
|
/** Safely quote a path for shell usage. */
|
|
export function shellQuote(s: string): string {
|
|
return `'${s.replace(/'/g, "'\\''")}'`;
|
|
}
|
|
|
|
/** Returns a human-readable summary of what will change. */
|
|
export function summarizeChanges(
|
|
streams: MediaStream[],
|
|
decisions: StreamDecision[],
|
|
): { removed: MediaStream[]; kept: MediaStream[] } {
|
|
const removed: MediaStream[] = [];
|
|
const kept: MediaStream[] = [];
|
|
for (const s of streams) {
|
|
const dec = decisions.find((d) => d.stream_id === s.id);
|
|
if (!dec || dec.action === "remove") removed.push(s);
|
|
else kept.push(s);
|
|
}
|
|
return { removed, kept };
|
|
}
|
|
|
|
/** Format a stream for display. */
|
|
export function streamLabel(s: MediaStream): string {
|
|
const parts: string[] = [s.type];
|
|
if (s.codec) parts.push(s.codec);
|
|
if (s.language_display || s.language) parts.push(s.language_display ?? s.language!);
|
|
if (s.title) parts.push(`"${s.title}"`);
|
|
if (s.type === "Audio" && s.channels) parts.push(`${s.channels}ch`);
|
|
if (s.is_forced) parts.push("forced");
|
|
if (s.is_hearing_impaired) parts.push("CC");
|
|
return parts.join(" · ");
|
|
}
|