netfelix-audio-fix/server/services/ffmpeg.ts

import type { MediaItem, MediaStream, StreamDecision } from "../types";
import { normalizeLanguage } from "./jellyfin";

// ─── Subtitle extraction helpers ──────────────────────────────────────────────

/** ISO 639-2/B → ISO 639-1 two-letter codes for subtitle filenames. */
const ISO639_1: Record<string, string> = {
	eng: "en",
	deu: "de",
	spa: "es",
	fra: "fr",
	ita: "it",
	por: "pt",
	jpn: "ja",
	kor: "ko",
	zho: "zh",
	ara: "ar",
	rus: "ru",
	nld: "nl",
	swe: "sv",
	nor: "no",
	dan: "da",
	fin: "fi",
	pol: "pl",
	tur: "tr",
	tha: "th",
	hin: "hi",
	hun: "hu",
	ces: "cs",
	ron: "ro",
	ell: "el",
	heb: "he",
	fas: "fa",
	ukr: "uk",
	ind: "id",
	cat: "ca",
	nob: "nb",
	nno: "nn",
	isl: "is",
	hrv: "hr",
	slk: "sk",
	bul: "bg",
	srp: "sr",
	slv: "sl",
	lav: "lv",
	lit: "lt",
	est: "et",
};

/**
 * Subtitle codecs we can reliably extract to a single-file sidecar. Mapped
 * to {ext, codecArg} for the ffmpeg output. Anything NOT in this map is
 * deliberately skipped — ffmpeg's srt/text muxers reject image-based
 * codecs like dvd_subtitle/dvb_subtitle with "Unsupported subtitles
 * codec", crashing the whole job. VobSub extraction would produce a
 * .sub + .idx pair and complicate the predicted-files contract, so for
 * now those are stripped from the container but not written out. A plan
 * note records what was dropped (see analyzer.ts).
 *
 * Jellyfin returns short codec names (dvdsub, pgssub) while ffmpeg's own
 * output uses the long form (dvd_subtitle, hdmv_pgs_subtitle). Both are
 * accepted here to keep alias drift harmless.
 */
const EXTRACTABLE: Record<string, { ext: string; codecArg: string }> = {
	subrip: { ext: "srt", codecArg: "copy" },
	srt: { ext: "srt", codecArg: "copy" },
	ass: { ext: "ass", codecArg: "copy" },
	ssa: { ext: "ssa", codecArg: "copy" },
	webvtt: { ext: "vtt", codecArg: "copy" },
	vtt: { ext: "vtt", codecArg: "copy" },
	mov_text: { ext: "srt", codecArg: "subrip" },
	text: { ext: "srt", codecArg: "copy" },
	hdmv_pgs_subtitle: { ext: "sup", codecArg: "copy" },
	pgssub: { ext: "sup", codecArg: "copy" },
};

export function isExtractableSubtitle(codec: string | null): boolean {
	if (!codec) return false;
	return codec.toLowerCase() in EXTRACTABLE;
}

function subtitleLang2(lang: string | null): string {
	if (!lang) return "und";
	const n = normalizeLanguage(lang);
	return ISO639_1[n] ?? n;
}

/**
 * Build ffmpeg output args for extracting ALL subtitle streams
 * to external sidecar files next to the video.
 *
 * Returns a flat array of args to append after the main output in the
 * command. Each subtitle becomes a separate ffmpeg output:
 *   -map 0:s:N -c:s copy 'basename.en.srt'
 *
 * @param allStreams  All streams for the item (needed to compute type-relative indices)
 * @param basePath   Video file path without extension (host or /work path)
 */
interface ExtractionEntry {
	stream: MediaStream;
	typeIdx: number;
	outPath: string;
	codecArg: string;
}

/** Compute extraction metadata for all subtitle streams. Shared by buildExtractionOutputs and predictExtractedFiles. */
function computeExtractionEntries(allStreams: MediaStream[], basePath: string): ExtractionEntry[] {
	const subTypeIdx = new Map<number, number>();
	let subCount = 0;
	for (const s of [...allStreams].sort((a, b) => a.stream_index - b.stream_index)) {
		if (s.type === "Subtitle") subTypeIdx.set(s.id, subCount++);
	}

	// Only extract codecs we can route to a sane single-file sidecar. Image
	// formats like dvd_subtitle crash the job if we try — see EXTRACTABLE.
	const allSubs = allStreams
		.filter((s) => s.type === "Subtitle")
		.filter((s) => isExtractableSubtitle(s.codec))
		.sort((a, b) => a.stream_index - b.stream_index);

	if (allSubs.length === 0) return [];

	const usedNames = new Set<string>();
	const entries: ExtractionEntry[] = [];

	for (const s of allSubs) {
		const typeIdx = subTypeIdx.get(s.id) ?? 0;
		const langCode = subtitleLang2(s.language);
		const spec = EXTRACTABLE[(s.codec ?? "").toLowerCase()];
		const ext = spec.ext;
		const codecArg = spec.codecArg;

		const nameParts = [langCode];
		if (s.is_forced) nameParts.push("forced");
		if (s.is_hearing_impaired) nameParts.push("hi");

		let outPath = `${basePath}.${nameParts.join(".")}.${ext}`;
		let counter = 2;
		while (usedNames.has(outPath)) {
			outPath = `${basePath}.${nameParts.join(".")}.${counter}.${ext}`;
			counter++;
		}
		usedNames.add(outPath);

		entries.push({ stream: s, typeIdx, outPath, codecArg });
	}

	return entries;
}

/**
 * Predict the sidecar files that subtitle extraction will create.
 * Used to populate the subtitle_files table after a successful job.
 */
export function predictExtractedFiles(
	item: MediaItem,
	streams: MediaStream[],
): Array<{
	file_path: string;
	language: string | null;
	codec: string | null;
	is_forced: boolean;
	is_hearing_impaired: boolean;
}> {
	const basePath = item.file_path.replace(/\.[^.]+$/, "");
	const entries = computeExtractionEntries(streams, basePath);
	return entries.map((e) => ({
		file_path: e.outPath,
		language: e.stream.language,
		codec: e.stream.codec,
		is_forced: !!e.stream.is_forced,
		is_hearing_impaired: !!e.stream.is_hearing_impaired,
	}));
}

// ─────────────────────────────────────────────────────────────────────────────

const LANG_NAMES: Record<string, string> = {
	eng: "English",
	deu: "German",
	spa: "Spanish",
	fra: "French",
	ita: "Italian",
	por: "Portuguese",
	jpn: "Japanese",
	kor: "Korean",
	zho: "Chinese",
	ara: "Arabic",
	rus: "Russian",
	nld: "Dutch",
	swe: "Swedish",
	nor: "Norwegian",
	dan: "Danish",
	fin: "Finnish",
	pol: "Polish",
	tur: "Turkish",
	tha: "Thai",
	hin: "Hindi",
	hun: "Hungarian",
	ces: "Czech",
	ron: "Romanian",
	ell: "Greek",
	heb: "Hebrew",
	fas: "Persian",
	ukr: "Ukrainian",
	ind: "Indonesian",
	cat: "Catalan",
	nob: "Norwegian Bokmål",
	nno: "Norwegian Nynorsk",
	isl: "Icelandic",
	slk: "Slovak",
	hrv: "Croatian",
	bul: "Bulgarian",
	srp: "Serbian",
	slv: "Slovenian",
	lav: "Latvian",
	lit: "Lithuanian",
	est: "Estonian",
};

/**
 * Channel count → "N.M" layout string (5.1, 7.1, 2.0, 1.0).
 * Falls back to "Nch" for anything outside the common consumer layouts.
 */
function formatChannels(n: number | null): string | null {
	if (n == null) return null;
	if (n === 1) return "1.0";
	if (n === 2) return "2.0";
	if (n === 6) return "5.1";
	if (n === 7) return "6.1";
	if (n === 8) return "7.1";
	return `${n}ch`;
}

function trackTitle(stream: MediaStream): string | null {
	if (stream.type === "Subtitle") {
		// Subtitles always get a clean language-based title so Jellyfin displays
		// "German", "English (Forced)", etc. regardless of the original file title.
		// The review UI shows a ⚠ badge when the original title looks like a
		// different language, so users can spot and remove mislabeled tracks.
		if (!stream.language) return null;
		const lang = normalizeLanguage(stream.language);
		const base = LANG_NAMES[lang] ?? lang.toUpperCase();
		if (stream.is_forced) return `${base} (Forced)`;
		if (stream.is_hearing_impaired) return `${base} (CC)`;
		return base;
	}
	// Audio: harmonize to "ENG - AC3 · 5.1". Overrides whatever the file had
	// (e.g. "Audio Description", "Director's Commentary") — the user uses
	// the review UI to drop unwanted tracks before we get here, so by this
	// point every kept audio track is a primary track that deserves a clean
	// canonical label. If a user wants a different title, custom_title on
	// the decision still wins (see buildStreamFlags).
	const lang = stream.language ? normalizeLanguage(stream.language) : null;
	const langPart = lang ? lang.toUpperCase() : null;
	const codecPart = stream.codec ? stream.codec.toUpperCase() : null;
	const channelsPart = formatChannels(stream.channels);
	const tail = [codecPart, channelsPart].filter((v): v is string => !!v).join(" · ");
	if (langPart && tail) return `${langPart} - ${tail}`;
	if (langPart) return langPart;
	if (tail) return tail;
	return null;
}

const TYPE_SPEC: Record<string, string> = { Video: "v", Audio: "a", Subtitle: "s" };

/**
 * Build -map flags using type-relative specifiers (0:v:N, 0:a:N, 0:s:N).
 *
 * Jellyfin's stream_index is an absolute index that can include EmbeddedImage
 * and Data streams which ffmpeg may count differently (e.g. cover art stored
 * as attachments). Using the stream's position within its own type group
 * matches ffmpeg's 0:a:N convention exactly and avoids silent mismatches.
 */
function buildMaps(allStreams: MediaStream[], kept: { stream: MediaStream; dec: StreamDecision }[]): string[] {
	// Map each stream id → its 0-based position among streams of the same type,
	// sorted by stream_index (the order ffmpeg sees them in the input).
	const typePos = new Map<number, number>();
	const counts: Record<string, number> = {};
	for (const s of [...allStreams].sort((a, b) => a.stream_index - b.stream_index)) {
		if (!TYPE_SPEC[s.type]) continue;
		const n = counts[s.type] ?? 0;
		typePos.set(s.id, n);
		counts[s.type] = n + 1;
	}

	return kept
		.filter((k) => !!TYPE_SPEC[k.stream.type])
		.map((k) => `-map 0:${TYPE_SPEC[k.stream.type]}:${typePos.get(k.stream.id) ?? 0}`);
}

/**
 * Build disposition and metadata flags for kept audio streams.
 * - Marks the first kept audio stream as default, clears all others.
 * - Sets harmonized language-name titles on all kept audio streams.
 * - Writes canonical ISO 639-2/B 3-letter language tags (e.g. "en" → "eng",
 *   "ger" → "deu"). Streams with no language get "und" (ffmpeg convention).
 */
function buildStreamFlags(kept: { stream: MediaStream; dec: StreamDecision }[]): string[] {
	const audioKept = kept.filter((k) => k.stream.type === "Audio");
	const args: string[] = [];

	audioKept.forEach((k, i) => {
		args.push(`-disposition:a:${i}`, i === 0 ? "default" : "0");

		const title = k.dec.custom_title ?? trackTitle(k.stream);
		if (title) args.push(`-metadata:s:a:${i}`, `title=${shellQuote(title)}`);

		const lang = k.stream.language ? normalizeLanguage(k.stream.language) : "und";
		args.push(`-metadata:s:a:${i}`, `language=${lang}`);
	});

	return args;
}

/** Canonical output order of stream types. Used by every command builder. */
const TYPE_ORDER: Record<string, number> = { Video: 0, Audio: 1, Subtitle: 2, Data: 3, EmbeddedImage: 4 };

/**
 * Return kept streams paired with their decisions, sorted in canonical
 * output order: type priority first, then target_index within each type.
 * This is the single source of truth for output stream ordering.
 */
export function sortKeptStreams(
	streams: MediaStream[],
	decisions: StreamDecision[],
): { stream: MediaStream; dec: StreamDecision }[] {
	const kept: { stream: MediaStream; dec: StreamDecision }[] = [];
	for (const s of streams) {
		const dec = decisions.find((d) => d.stream_id === s.id);
		if (dec?.action === "keep") kept.push({ stream: s, dec });
	}
	kept.sort((a, b) => {
		const ta = TYPE_ORDER[a.stream.type] ?? 9;
		const tb = TYPE_ORDER[b.stream.type] ?? 9;
		if (ta !== tb) return ta - tb;
		return (a.dec.target_index ?? 0) - (b.dec.target_index ?? 0);
	});
	return kept;
}

/**
 * Build the full shell command to process a media file. Every subtitle
 * stream is first extracted to a sidecar file next to the video, then
 * the container is remuxed without subtitles and with only the audio
 * tracks decisions kept (transcoding incompatible codecs per-track).
 *
 * Thin wrapper over buildPipelineCommand — the single source of truth
 * for how we touch a file. Kept as a named export so callers don't have
 * to destructure { command } at every site.
 */
export function buildCommand(item: MediaItem, streams: MediaStream[], decisions: StreamDecision[]): string {
	return buildPipelineCommand(item, streams, decisions).command;
}

/**
 * Build a command that also changes the container to MKV.
 * Used when MP4 container can't hold certain subtitle codecs.
 */
export function buildMkvConvertCommand(item: MediaItem, streams: MediaStream[], decisions: StreamDecision[]): string {
	const inputPath = item.file_path;
	const outputPath = inputPath.replace(/\.[^.]+$/, ".mkv");
	const tmpPath = inputPath.replace(/\.[^.]+$/, ".tmp.mkv");

	const kept = sortKeptStreams(streams, decisions);

	const maps = buildMaps(streams, kept);
	const streamFlags = buildStreamFlags(kept);

	return [
		"ffmpeg",
		"-y",
		"-i",
		shellQuote(inputPath),
		...maps,
		...streamFlags,
		"-c copy",
		"-f matroska",
		shellQuote(tmpPath),
		"&&",
		"mv",
		shellQuote(tmpPath),
		shellQuote(outputPath),
	].join(" ");
}

/**
 * Build a single FFmpeg command that:
 * 1. Extracts subtitles to sidecar files
 * 2. Remuxes with reordered/filtered audio
 * 3. Transcodes incompatible audio codecs
 */
export function buildPipelineCommand(
	item: MediaItem,
	streams: MediaStream[],
	decisions: (StreamDecision & { stream?: MediaStream })[],
): {
	command: string;
	extractedFiles: Array<{
		path: string;
		language: string | null;
		codec: string | null;
		is_forced: number;
		is_hearing_impaired: number;
	}>;
} {
	const inputPath = item.file_path;
	const ext = inputPath.match(/\.([^.]+)$/)?.[1] ?? "mkv";
	const tmpPath = inputPath.replace(/\.[^.]+$/, `.tmp.${ext}`);
	const basePath = inputPath.replace(/\.[^.]+$/, "");

	// --- Subtitle extraction outputs ---
	const extractionEntries = computeExtractionEntries(streams, basePath);
	const subOutputArgs: string[] = [];
	for (const e of extractionEntries) {
		subOutputArgs.push(`-map 0:s:${e.typeIdx}`, `-c:s ${e.codecArg}`, shellQuote(e.outPath));
	}

	// --- Kept streams for remuxed output ---
	const kept = sortKeptStreams(streams, decisions as StreamDecision[]);
	const enriched = kept.map((k) => ({ ...k.dec, stream: k.stream }));

	// Build -map flags
	const maps = buildMaps(streams, kept);

	// Build per-stream codec flags
	const codecFlags: string[] = ["-c:v copy"];
	let audioIdx = 0;
	for (const d of enriched) {
		if (d.stream.type === "Audio") {
			if (d.transcode_codec) {
				codecFlags.push(`-c:a:${audioIdx} ${d.transcode_codec}`);
				// For EAC3, set a reasonable bitrate based on channel count
				if (d.transcode_codec === "eac3") {
					const bitrate = (d.stream.channels ?? 2) >= 6 ? "640k" : "256k";
					codecFlags.push(`-b:a:${audioIdx} ${bitrate}`);
				}
			} else {
				codecFlags.push(`-c:a:${audioIdx} copy`);
			}
			audioIdx++;
		}
	}

	// If no audio transcoding, simplify to -c copy (covers video + audio)
	const hasTranscode = enriched.some((d) => d.transcode_codec);
	const finalCodecFlags = hasTranscode ? codecFlags : ["-c copy"];

	// Disposition + metadata flags for audio
	const streamFlags = buildStreamFlags(kept);

	// Assemble command
	const parts: string[] = ["ffmpeg", "-y", "-i", shellQuote(inputPath)];

	// Subtitle extraction outputs first
	parts.push(...subOutputArgs);

	// Map flags for remuxed output
	parts.push(...maps);

	// Codec flags
	parts.push(...finalCodecFlags);

	// Stream flags (disposition, metadata)
	parts.push(...streamFlags);

	// Output file
	parts.push(shellQuote(tmpPath));

	const command = `${parts.join(" ")} && mv ${shellQuote(tmpPath)} ${shellQuote(inputPath)}`;

	return {
		command,
		extractedFiles: extractionEntries.map((e) => ({
			path: e.outPath,
			language: e.stream.language,
			codec: e.stream.codec,
			is_forced: e.stream.is_forced ? 1 : 0,
			is_hearing_impaired: e.stream.is_hearing_impaired ? 1 : 0,
		})),
	};
}

/** Safely quote a path for shell usage. */
export function shellQuote(s: string): string {
	return `'${s.replace(/'/g, "'\\''")}'`;
}

/** Returns a human-readable summary of what will change. */
export function summarizeChanges(
	streams: MediaStream[],
	decisions: StreamDecision[],
): { removed: MediaStream[]; kept: MediaStream[] } {
	const removed: MediaStream[] = [];
	const kept: MediaStream[] = [];
	for (const s of streams) {
		const dec = decisions.find((d) => d.stream_id === s.id);
		if (!dec || dec.action === "remove") removed.push(s);
		else kept.push(s);
	}
	return { removed, kept };
}

/** Format a stream for display. */
export function streamLabel(s: MediaStream): string {
	const parts: string[] = [s.type];
	if (s.codec) parts.push(s.codec);
	if (s.language_display || s.language) parts.push(s.language_display ?? s.language!);
	if (s.title) parts.push(`"${s.title}"`);
	if (s.type === "Audio" && s.channels) parts.push(`${s.channels}ch`);
	if (s.is_forced) parts.push("forced");
	if (s.is_hearing_impaired) parts.push("CC");
	return parts.join(" · ");
}