netfelix-audio-fix/server/services/analyzer.ts

import type { MediaItem, MediaStream, PlanResult } from "../types";
import { computeAppleCompat, isAppleCompatible, transcodeTarget } from "./apple-compat";
import { isExtractableSubtitle } from "./ffmpeg";
import { normalizeLanguage } from "./jellyfin";

export interface AnalyzerConfig {
	audioLanguages: string[]; // additional languages to keep (after OG)
}

/**
 * Given an item and its streams, compute what action to take for each stream
 * and whether the file needs audio remuxing.
 *
 * Subtitles are ALWAYS removed from the container (they get extracted to
 * sidecar files). is_noop considers audio removal/reorder, subtitle
 * extraction, and transcode — a "noop" is a file that needs no changes
 * at all.
 */
export function analyzeItem(
	item: Pick<MediaItem, "original_language" | "needs_review" | "container">,
	streams: MediaStream[],
	config: AnalyzerConfig,
): PlanResult {
	const origLang = item.original_language ? normalizeLanguage(item.original_language) : null;
	const notes: string[] = [];

	const decisions: PlanResult["decisions"] = streams.map((s) => {
		const action = decideAction(s, origLang, config.audioLanguages);
		return { stream_id: s.id, action, target_index: null, transcode_codec: null };
	});

	// Second pass: within each kept-language group, drop commentary/AD tracks
	// and alternate formats so we end up with exactly one audio stream per
	// language. The user doesn't need 2× English (main + director's
	// commentary) — one well-chosen track is enough.
	deduplicateAudioByLanguage(streams, decisions, origLang);

	const anyAudioRemoved = streams.some((s, i) => s.type === "Audio" && decisions[i].action === "remove");

	assignTargetOrder(streams, decisions, origLang, config.audioLanguages);

	const audioOrderChanged = checkAudioOrderChanged(streams, decisions);

	for (const d of decisions) {
		if (d.action !== "keep") continue;
		const stream = streams.find((s) => s.id === d.stream_id);
		if (stream && stream.type === "Audio") {
			// Use Profile (DTS-HD MA, etc.) — NOT title — to pick the transcode target.
			// Passing title here used to cause lossless DTS-HD MA in MKV to fall back
			// to EAC3 instead of the better FLAC path when the title didn't happen to
			// contain "MA".
			d.transcode_codec = transcodeTarget(stream.codec ?? "", stream.profile, item.container);
		}
	}

	const keptAudioStreams = decisions
		.filter((d) => d.action === "keep")
		.map((d) => streams.find((s) => s.id === d.stream_id))
		.filter((s): s is MediaStream => !!s && s.type === "Audio");
	const keptAudioCodecs = keptAudioStreams.map((s) => s.codec ?? "");

	const needsTranscode = decisions.some((d) => d.transcode_codec != null);
	const apple_compat = computeAppleCompat(keptAudioCodecs, item.container);
	const job_type = needsTranscode ? ("transcode" as const) : ("copy" as const);

	const hasSubs = streams.some((s) => s.type === "Subtitle");

	// Pipeline also sets default disposition on the first kept audio and writes
	// canonical iso3 language tags. If either is already wrong in the file,
	// running ffmpeg would produce a different output → not a noop.
	const keptAudioSorted = [...keptAudioStreams].sort((a, b) => {
		const ai = decisions.find((d) => d.stream_id === a.id)?.target_index ?? 0;
		const bi = decisions.find((d) => d.stream_id === b.id)?.target_index ?? 0;
		return ai - bi;
	});
	const firstKeptAudio = keptAudioSorted[0];
	const defaultMismatch = !!firstKeptAudio && firstKeptAudio.is_default !== 1;
	const nonDefaultHasDefault = keptAudioSorted.slice(1).some((s) => s.is_default === 1);
	const languageMismatch = keptAudioStreams.some(
		(s) => s.language != null && s.language !== normalizeLanguage(s.language),
	);

	const is_noop =
		!anyAudioRemoved &&
		!audioOrderChanged &&
		!hasSubs &&
		!needsTranscode &&
		!defaultMismatch &&
		!nonDefaultHasDefault &&
		!languageMismatch;

	if (!origLang && item.needs_review) {
		notes.push("Original language unknown — audio tracks not filtered; manual review required");
	}

	// Surface image-based subtitles that can't be written to a sane
	// single-file sidecar. They'll still be stripped from the container,
	// but won't land on disk anywhere — the user sees this in the plan
	// notes so nothing vanishes silently.
	const nonExtractable = streams.filter((s) => s.type === "Subtitle" && !isExtractableSubtitle(s.codec));
	if (nonExtractable.length > 0) {
		const grouped = new Map<string, string[]>();
		for (const s of nonExtractable) {
			const codec = (s.codec ?? "unknown").toLowerCase();
			if (!grouped.has(codec)) grouped.set(codec, []);
			grouped.get(codec)!.push(s.language ?? "und");
		}
		const summary = [...grouped.entries()].map(([codec, langs]) => `${codec} (${langs.join(", ")})`).join("; ");
		notes.push(`${nonExtractable.length} subtitle(s) dropped: ${summary} — not extractable to sidecar`);
	}

	return { is_noop, has_subs: hasSubs, confidence: "low", apple_compat, job_type, decisions, notes };
}

/**
 * Titles that scream "not the main track": commentary, director's track,
 * visually-impaired/audio-description, karaoke. Case-insensitive.
 */
const NON_PRIMARY_AUDIO_TITLE =
	/\b(commentary|director'?s?\b.*\b(track|comment|feature)|audio description|descriptive|visually? impaired|\bad\b|karaoke|sign language)/i;

function isCommentaryOrAuxiliary(stream: MediaStream): boolean {
	if (stream.is_hearing_impaired) return true;
	const title = stream.title ?? "";
	return NON_PRIMARY_AUDIO_TITLE.test(title);
}

/**
 * Sort comparator for picking the "primary" audio track within a
 * single language group. Lower return → a wins.
 *
 * Priority (most → least important):
 *   1. highest channel count (quality; 7.1 beats 5.1 beats stereo)
 *   2. Apple-compatible codec (skips a transcode pass; AC3 wins over
 *      DTS-HD MA at equal channels — direct play > lossless that
 *      has to be re-encoded anyway)
 *   3. default disposition (muxer's pick, tiebreak)
 *   4. lowest stream_index (source order, stable tiebreak)
 */
function betterAudio(a: MediaStream, b: MediaStream): number {
	const byChannels = (b.channels ?? 0) - (a.channels ?? 0);
	if (byChannels !== 0) return byChannels;

	const aApple = isAppleCompatible(a.codec ?? "") ? 1 : 0;
	const bApple = isAppleCompatible(b.codec ?? "") ? 1 : 0;
	const byApple = bApple - aApple;
	if (byApple !== 0) return byApple;

	const byDefault = (b.is_default ?? 0) - (a.is_default ?? 0);
	if (byDefault !== 0) return byDefault;

	return a.stream_index - b.stream_index;
}

function deduplicateAudioByLanguage(
	streams: MediaStream[],
	decisions: PlanResult["decisions"],
	origLang: string | null,
): void {
	const decisionById = new Map(decisions.map((d) => [d.stream_id, d]));
	const keptAudio = streams.filter((s) => s.type === "Audio" && decisionById.get(s.id)?.action === "keep");

	// 1. Flag commentary/AD tracks as remove regardless of language match.
	for (const s of keptAudio) {
		if (isCommentaryOrAuxiliary(s)) {
			const d = decisionById.get(s.id);
			if (d) d.action = "remove";
		}
	}

	// 2. Group remaining kept-audio streams by normalized language and keep
	//    one winner per group. Streams without a language tag are handled
	//    specially: when OG language is unknown we keep them all (ambiguity
	//    means we can't safely drop anything); when OG is known they've
	//    already been kept by decideAction's "unknown language falls
	//    through" clause, so still dedupe within them.
	const stillKept = keptAudio.filter((s) => decisionById.get(s.id)?.action === "keep");
	const byLang = new Map<string, MediaStream[]>();
	const noLang: MediaStream[] = [];
	for (const s of stillKept) {
		if (!s.language) {
			noLang.push(s);
			continue;
		}
		const key = normalizeLanguage(s.language);
		if (!byLang.has(key)) byLang.set(key, []);
		byLang.get(key)!.push(s);
	}

	for (const [, group] of byLang) {
		if (group.length <= 1) continue;
		const sorted = [...group].sort(betterAudio);
		const winner = sorted[0];
		for (const s of sorted.slice(1)) {
			const d = decisionById.get(s.id);
			if (d) d.action = "remove";
		}
		// Touch winner (no-op) to make intent clear.
		void winner;
	}

	// Null-language audio: only dedupe when OG is known (so we already have
	// a primary pick). If OG is null we leave ambiguity alone.
	if (origLang && noLang.length > 1) {
		const sorted = [...noLang].sort(betterAudio);
		for (const s of sorted.slice(1)) {
			const d = decisionById.get(s.id);
			if (d) d.action = "remove";
		}
	}
}

function decideAction(stream: MediaStream, origLang: string | null, audioLanguages: string[]): "keep" | "remove" {
	switch (stream.type) {
		case "Video":
		case "Data":
		case "EmbeddedImage":
			return "keep";

		case "Audio": {
			if (!origLang) return "keep";
			if (!stream.language) return "keep";
			const normalized = normalizeLanguage(stream.language);
			if (normalized === origLang) return "keep";
			if (audioLanguages.includes(normalized)) return "keep";
			return "remove";
		}

		case "Subtitle":
			return "remove";

		default:
			return "keep";
	}
}

/**
 * Assign target_index to each kept stream. target_index is the 0-based
 * position within its type group in the output file, after sorting audio
 * streams by language rank (OG first, then additional languages in
 * configured order, then by original stream_index for stability).
 */
export function assignTargetOrder(
	allStreams: MediaStream[],
	decisions: PlanResult["decisions"],
	origLang: string | null,
	audioLanguages: string[],
): void {
	const keptByType = new Map<string, MediaStream[]>();
	for (const s of allStreams) {
		const dec = decisions.find((d) => d.stream_id === s.id);
		if (dec?.action !== "keep") continue;
		if (!keptByType.has(s.type)) keptByType.set(s.type, []);
		keptByType.get(s.type)!.push(s);
	}

	const audio = keptByType.get("Audio");
	if (audio) {
		audio.sort((a, b) => {
			const aRank = langRank(a.language, origLang, audioLanguages);
			const bRank = langRank(b.language, origLang, audioLanguages);
			if (aRank !== bRank) return aRank - bRank;
			return a.stream_index - b.stream_index;
		});
	}

	for (const [, streams] of keptByType) {
		streams.forEach((s, idx) => {
			const dec = decisions.find((d) => d.stream_id === s.id);
			if (dec) dec.target_index = idx;
		});
	}
}

function langRank(lang: string | null, origLang: string | null, audioLanguages: string[]): number {
	const normalized = lang ? normalizeLanguage(lang) : null;
	if (origLang && normalized === origLang) return 0;
	if (normalized) {
		const idx = audioLanguages.indexOf(normalized);
		if (idx >= 0) return idx + 1;
	}
	return audioLanguages.length + 1;
}

/**
 * True when the output order of kept audio streams differs from their
 * original order in the input. Compares original stream_index order
 * against target_index order.
 */
function checkAudioOrderChanged(streams: MediaStream[], decisions: PlanResult["decisions"]): boolean {
	const keptAudio = streams
		.filter((s) => s.type === "Audio" && decisions.find((d) => d.stream_id === s.id)?.action === "keep")
		.sort((a, b) => a.stream_index - b.stream_index);

	for (let i = 0; i < keptAudio.length; i++) {
		const dec = decisions.find((d) => d.stream_id === keptAudio[i].id);
		if (dec?.target_index !== i) return true;
	}
	return false;
}