skip non-extractable subs (dvdsub/dvbsub/unknown), summarise ffmpeg errors

Abraham Lincoln crashed with exit 234 because the file had 14 dvd_subtitle streams: our extraction dict only keyed on the long form (dvd_subtitle) while jellyfin stores the short form (dvdsub), so the lookup fell back to .srt, ffmpeg picked the srt muxer, and srt can't encode image-based subs. textbook silent dict miss. replaced the extension dict with an EXTRACTABLE map that pairs codec → {ext, codecArg} and explicitly enumerates every codec we can route to a single-file sidecar. everything else (dvd_subtitle/dvdsub, dvb_subtitle/ dvbsub, unknown codecs) is now skipped at command-build time. the plan picks up a note like '14 subtitle(s) dropped: dvdsub (eng, est, ind, kor, jpn, lav, lit, may, chi, chi, tha, vie, rus, ukr) — not extractable to sidecar' so the user sees exactly what didn't make it. also added extractErrorSummary in execute.ts: when a job errors, scan the last 60 stderr lines for fatal keywords (Error:, Conversion failed!, Unsupported, Invalid argument, Permission denied, No space left, …), dedupe, prepend the summary to the job's stored output. the review_plan notes get the same summary — surfaces the real cause next to the plan instead of burying it under ffmpeg's 200-line banner. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 18:42:05 +02:00
parent afd95f06df
commit d2983d5f38
5 changed files with 147 additions and 32 deletions
--- a/server/api/tests/execute.test.ts
+++ b/server/api/tests/execute.test.ts
@@ -0,0 +1,49 @@
+import { describe, expect, test } from "bun:test";
+import { extractErrorSummary } from "../execute";
+
+describe("extractErrorSummary", () => {
+	test("pulls the real error line out of ffmpeg's banner", () => {
+		const lines = [
+			"[stderr] ffmpeg version 7.1.3 ...",
+			"[stderr]   built with gcc 14",
+			"[stderr]   Stream #0:2(eng): Subtitle: dvd_subtitle (dvdsub), 1280x720",
+			"[stderr] Stream mapping:",
+			"[stderr]   Stream #0:2 -> #0:0 (copy)",
+			"[stderr] [srt @ 0x55] Unsupported subtitles codec: dvd_subtitle",
+			"[stderr] [out#0/srt @ 0x55] Could not write header (incorrect codec parameters ?): Invalid argument",
+			"[stderr] Conversion failed!",
+		];
+		const summary = extractErrorSummary(lines, new Error("FFmpeg exited with code 234"));
+		expect(summary).toContain("Unsupported subtitles codec: dvd_subtitle");
+		expect(summary).toContain("Invalid argument");
+		expect(summary).toContain("Conversion failed!");
+		// Should NOT include the banner lines.
+		expect(summary).not.toContain("ffmpeg version");
+		expect(summary).not.toContain("Stream #0:2");
+	});
+
+	test("dedupes identical fatal lines (e.g. repeated warnings)", () => {
+		const lines = ["[stderr] Conversion failed!", "[stderr] Conversion failed!", "[stderr] Conversion failed!"];
+		const summary = extractErrorSummary(lines);
+		expect(summary?.split("\n").length).toBe(1);
+	});
+
+	test("falls back to the thrown error when no fatal line is found", () => {
+		const lines = ["[stderr] ffmpeg version 7", "[stderr]   Duration: 00:10:00"];
+		const summary = extractErrorSummary(lines, new Error("FFmpeg exited with code 1"));
+		expect(summary).toBe("Error: FFmpeg exited with code 1");
+	});
+
+	test("returns null when neither a fatal line nor a thrown error is available", () => {
+		expect(extractErrorSummary([])).toBe(null);
+		expect(extractErrorSummary(["[stderr] ffmpeg version 7"])).toBe(null);
+	});
+
+	test("only scans the tail — a banner from a prior run doesn't leak through", () => {
+		// 70 filler lines, real error at the very end; scan window is 60.
+		const filler = Array.from({ length: 70 }, (_, i) => `[stderr] banner line ${i}`);
+		const lines = [...filler, "[stderr] Error: no space left on device"];
+		const summary = extractErrorSummary(lines);
+		expect(summary).toBe("Error: no space left on device");
+	});
+});
--- a/server/api/execute.ts
+++ b/server/api/execute.ts
@@ -562,17 +562,55 @@ async function runJob(job: Job): Promise<void> {
 	} catch (err) {
 		logError(`Job ${job.id} failed:`, err);
 		const fullOutput = `${outputLines.join("\n")}\n${String(err)}`;
+		const summary = extractErrorSummary(outputLines, err);
+		// Prepend the scraped summary so the job log starts with what broke.
+		// ffmpeg's 200-line stream+config banner buries the real error; this
+		// gives the UI a crisp hook for the failure cause.
+		const annotatedOutput = summary ? `${summary}\n\n---\n\n${fullOutput}` : fullOutput;
 		db
 			.prepare("UPDATE jobs SET status = 'error', exit_code = 1, output = ?, completed_at = datetime('now') WHERE id = ?")
-			.run(fullOutput, job.id);
-		emitJobUpdate(job.id, "error", fullOutput);
-		db.prepare("UPDATE review_plans SET status = 'error' WHERE item_id = ?").run(job.item_id);
+			.run(annotatedOutput, job.id);
+		emitJobUpdate(job.id, "error", annotatedOutput);
+		db
+			.prepare("UPDATE review_plans SET status = 'error', notes = ? WHERE item_id = ?")
+			.run(summary ?? String(err), job.item_id);
 	} finally {
 		runningProc = null;
 		runningJobId = null;
 	}
 }

+/**
+ * Extract a short, human-readable reason from a failed job's stderr.
+ *
+ * ffmpeg prints a ~200-line banner (version, config, every stream in the
+ * input file) before the real error shows up. We scan the tail of the
+ * output for the last line matching fatal keywords, plus anything ffmpeg
+ * explicitly labels "Error:" or "Conversion failed!". Returns up to three
+ * lines so the UI can show a crisp summary without users scrolling the
+ * full log.
+ */
+export function extractErrorSummary(outputLines: string[], thrown?: unknown): string | null {
+	const FATAL =
+		/(Error:|Conversion failed!|Unsupported\b|Invalid argument|Permission denied|No such file|Cannot allocate|No space left|Killed|Segmentation fault)/;
+	// Only scan the last 60 lines — anything earlier is the banner or stream
+	// mapping. The real cause sits near the end.
+	const tail = outputLines.slice(-60).filter((l) => l.trim());
+	const hits: string[] = [];
+	for (const line of tail) {
+		if (FATAL.test(line)) hits.push(line.replace(/^\[stderr]\s*/, ""));
+	}
+	const unique = [...new Set(hits)].slice(-3);
+	if (unique.length === 0) {
+		// Fell off the end with no recognisable fatal line — fall back to the
+		// thrown error (usually "FFmpeg exited with code N"). Better than
+		// showing nothing, since the exit code at least tells someone *where*
+		// to look.
+		return thrown ? String(thrown) : null;
+	}
+	return unique.join("\n");
+}
+
 // Scheduler endpoints live on /api/settings/schedule now — see server/api/settings.ts.

 // ─── FFmpeg progress parsing ───────────────────────────────────────────────────
--- a/server/services/analyzer.ts
+++ b/server/services/analyzer.ts
@@ -1,5 +1,6 @@
 import type { MediaItem, MediaStream, PlanResult } from "../types";
 import { computeAppleCompat, isAppleCompatible, transcodeTarget } from "./apple-compat";
+import { isExtractableSubtitle } from "./ffmpeg";
 import { normalizeLanguage } from "./jellyfin";

 export interface AnalyzerConfig {
@@ -92,6 +93,22 @@ export function analyzeItem(
 		notes.push("Original language unknown — audio tracks not filtered; manual review required");
 	}

+	// Surface image-based subtitles that can't be written to a sane
+	// single-file sidecar. They'll still be stripped from the container,
+	// but won't land on disk anywhere — the user sees this in the plan
+	// notes so nothing vanishes silently.
+	const nonExtractable = streams.filter((s) => s.type === "Subtitle" && !isExtractableSubtitle(s.codec));
+	if (nonExtractable.length > 0) {
+		const grouped = new Map<string, string[]>();
+		for (const s of nonExtractable) {
+			const codec = (s.codec ?? "unknown").toLowerCase();
+			if (!grouped.has(codec)) grouped.set(codec, []);
+			grouped.get(codec)!.push(s.language ?? "und");
+		}
+		const summary = [...grouped.entries()].map(([codec, langs]) => `${codec} (${langs.join(", ")})`).join("; ");
+		notes.push(`${nonExtractable.length} subtitle(s) dropped: ${summary} — not extractable to sidecar`);
+	}
+
 	return { is_noop, has_subs: hasSubs, confidence: "low", apple_compat, job_type, decisions, notes };
 }

--- a/server/services/ffmpeg.ts
+++ b/server/services/ffmpeg.ts
@@ -47,39 +47,44 @@ const ISO639_1: Record<string, string> = {
 	est: "et",
 };

-/** Subtitle codec → external file extension. */
-const SUBTITLE_EXT: Record<string, string> = {
-	subrip: "srt",
-	srt: "srt",
-	ass: "ass",
-	ssa: "ssa",
-	webvtt: "vtt",
-	vtt: "vtt",
-	hdmv_pgs_subtitle: "sup",
-	pgssub: "sup",
-	dvd_subtitle: "sub",
-	dvbsub: "sub",
-	mov_text: "srt",
-	text: "srt",
+/**
+ * Subtitle codecs we can reliably extract to a single-file sidecar. Mapped
+ * to {ext, codecArg} for the ffmpeg output. Anything NOT in this map is
+ * deliberately skipped — ffmpeg's srt/text muxers reject image-based
+ * codecs like dvd_subtitle/dvb_subtitle with "Unsupported subtitles
+ * codec", crashing the whole job. VobSub extraction would produce a
+ * .sub + .idx pair and complicate the predicted-files contract, so for
+ * now those are stripped from the container but not written out. A plan
+ * note records what was dropped (see analyzer.ts).
+ *
+ * Jellyfin returns short codec names (dvdsub, pgssub) while ffmpeg's own
+ * output uses the long form (dvd_subtitle, hdmv_pgs_subtitle). Both are
+ * accepted here to keep alias drift harmless.
+ */
+const EXTRACTABLE: Record<string, { ext: string; codecArg: string }> = {
+	subrip: { ext: "srt", codecArg: "copy" },
+	srt: { ext: "srt", codecArg: "copy" },
+	ass: { ext: "ass", codecArg: "copy" },
+	ssa: { ext: "ssa", codecArg: "copy" },
+	webvtt: { ext: "vtt", codecArg: "copy" },
+	vtt: { ext: "vtt", codecArg: "copy" },
+	mov_text: { ext: "srt", codecArg: "subrip" },
+	text: { ext: "srt", codecArg: "copy" },
+	hdmv_pgs_subtitle: { ext: "sup", codecArg: "copy" },
+	pgssub: { ext: "sup", codecArg: "copy" },
 };

+export function isExtractableSubtitle(codec: string | null): boolean {
+	if (!codec) return false;
+	return codec.toLowerCase() in EXTRACTABLE;
+}
+
 function subtitleLang2(lang: string | null): string {
 	if (!lang) return "und";
 	const n = normalizeLanguage(lang);
 	return ISO639_1[n] ?? n;
 }

-/** Returns the ffmpeg codec name to use when extracting this subtitle stream. */
-function subtitleCodecArg(codec: string | null): string {
-	if (!codec) return "copy";
-	return codec.toLowerCase() === "mov_text" ? "subrip" : "copy";
-}
-
-function subtitleExtForCodec(codec: string | null): string {
-	if (!codec) return "srt";
-	return SUBTITLE_EXT[codec.toLowerCase()] ?? "srt";
-}
-
 /**
 * Build ffmpeg output args for extracting ALL subtitle streams
 * to external sidecar files next to the video.
@@ -106,7 +111,12 @@ function computeExtractionEntries(allStreams: MediaStream[], basePath: string):
 		if (s.type === "Subtitle") subTypeIdx.set(s.id, subCount++);
 	}

-	const allSubs = allStreams.filter((s) => s.type === "Subtitle").sort((a, b) => a.stream_index - b.stream_index);
+	// Only extract codecs we can route to a sane single-file sidecar. Image
+	// formats like dvd_subtitle crash the job if we try — see EXTRACTABLE.
+	const allSubs = allStreams
+		.filter((s) => s.type === "Subtitle")
+		.filter((s) => isExtractableSubtitle(s.codec))
+		.sort((a, b) => a.stream_index - b.stream_index);

 	if (allSubs.length === 0) return [];

@@ -116,8 +126,9 @@ function computeExtractionEntries(allStreams: MediaStream[], basePath: string):
 	for (const s of allSubs) {
 		const typeIdx = subTypeIdx.get(s.id) ?? 0;
 		const langCode = subtitleLang2(s.language);
-		const ext = subtitleExtForCodec(s.codec);
-		const codecArg = subtitleCodecArg(s.codec);
+		const spec = EXTRACTABLE[(s.codec ?? "").toLowerCase()];
+		const ext = spec.ext;
+		const codecArg = spec.codecArg;

 		const nameParts = [langCode];
 		if (s.is_forced) nameParts.push("forced");