Files
netfelix-audio-fix/server/services/analyzer.ts
T
felixfoertsch 495a40a6c6
Build and Push Docker Image / build (push) Successful in 1m52s
restructure pipeline column headers with backward/skip/forward slots, rename ready → auto-approve
- columnshell: three-row header (title, subtitle, backward · skip · forward grid)
- inbox: auto-processing checkbox moves into subtitle, page top-right toggle removed
- review: add ← back to inbox, rename approve-all-ready to approve auto
- queue: clear button relabeled ← back to inbox (matches existing behaviour)
- done: add ← back to review (reopen-all), clear moves to middle skip slot
- seriescard: action row mirrors movie card, now at the top
-  ready badge →  auto-approve with tooltip, review subtitle uses "auto · need decisions"
- new endpoints: /api/review/unsort-all, /api/review/reopen-all (+ tests)
2026-04-19 13:16:29 +02:00

336 lines
13 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import type { MediaItem, MediaStream, PlanResult } from "../types";
import { computeAppleCompat, isAppleCompatible, transcodeTarget } from "./apple-compat";
import { isExtractableSubtitle } from "./ffmpeg";
import { normalizeLanguage } from "./jellyfin";
const AUTHORITATIVE_ORIG_SOURCES = new Set<string>(["radarr", "sonarr", "manual"]);
export interface AnalyzerConfig {
audioLanguages: string[]; // additional languages to keep (after OG)
}
/**
* Given an item and its streams, compute what action to take for each stream
* and whether the file needs audio remuxing.
*
* Subtitles are ALWAYS removed from the container (they get extracted to
* sidecar files). is_noop considers audio removal/reorder, subtitle
* extraction, and transcode — a "noop" is a file that needs no changes
* at all.
*/
export function analyzeItem(
item: Pick<MediaItem, "original_language" | "orig_lang_source" | "needs_review" | "container">,
streams: MediaStream[],
config: AnalyzerConfig,
): PlanResult {
const origLang = item.original_language ? normalizeLanguage(item.original_language) : null;
const notes: string[] = [];
const decisions: PlanResult["decisions"] = streams.map((s) => {
const action = decideAction(s, origLang, config.audioLanguages);
return { stream_id: s.id, action, target_index: null, transcode_codec: null };
});
// Snapshot actions before dedup so we can distinguish language-driven removes
// from commentary-title-driven removes when computing commentaryHeuristicFired.
const decisionsBeforeDedup = new Map<number, "keep" | "remove">(decisions.map((d) => [d.stream_id, d.action]));
// Second pass: within each kept-language group, drop commentary/AD tracks
// and alternate formats so we end up with exactly one audio stream per
// language. The user doesn't need 2× English (main + director's
// commentary) — one well-chosen track is enough.
deduplicateAudioByLanguage(streams, decisions, origLang);
const anyAudioRemoved = streams.some((s, i) => s.type === "Audio" && decisions[i].action === "remove");
assignTargetOrder(streams, decisions, origLang, config.audioLanguages);
const audioOrderChanged = checkAudioOrderChanged(streams, decisions);
for (const d of decisions) {
if (d.action !== "keep") continue;
const stream = streams.find((s) => s.id === d.stream_id);
if (stream && stream.type === "Audio") {
// Use Profile (DTS-HD MA, etc.) — NOT title — to pick the transcode target.
// Passing title here used to cause lossless DTS-HD MA in MKV to fall back
// to EAC3 instead of the better FLAC path when the title didn't happen to
// contain "MA".
d.transcode_codec = transcodeTarget(stream.codec ?? "", stream.profile, item.container);
}
}
const keptAudioStreams = decisions
.filter((d) => d.action === "keep")
.map((d) => streams.find((s) => s.id === d.stream_id))
.filter((s): s is MediaStream => !!s && s.type === "Audio");
const keptAudioCodecs = keptAudioStreams.map((s) => s.codec ?? "");
const needsTranscode = decisions.some((d) => d.transcode_codec != null);
const apple_compat = computeAppleCompat(keptAudioCodecs, item.container);
const job_type = needsTranscode ? ("transcode" as const) : ("copy" as const);
const hasSubs = streams.some((s) => s.type === "Subtitle");
// Pipeline also sets default disposition on the first kept audio and writes
// canonical iso3 language tags. If either is already wrong in the file,
// running ffmpeg would produce a different output → not a noop.
const keptAudioSorted = [...keptAudioStreams].sort((a, b) => {
const ai = decisions.find((d) => d.stream_id === a.id)?.target_index ?? 0;
const bi = decisions.find((d) => d.stream_id === b.id)?.target_index ?? 0;
return ai - bi;
});
const firstKeptAudio = keptAudioSorted[0];
const defaultMismatch = !!firstKeptAudio && firstKeptAudio.is_default !== 1;
const nonDefaultHasDefault = keptAudioSorted.slice(1).some((s) => s.is_default === 1);
const languageMismatch = keptAudioStreams.some(
(s) => s.language != null && s.language !== normalizeLanguage(s.language),
);
const is_noop =
!anyAudioRemoved &&
!audioOrderChanged &&
!hasSubs &&
!needsTranscode &&
!defaultMismatch &&
!nonDefaultHasDefault &&
!languageMismatch;
if (!origLang && item.needs_review) {
notes.push("Original language unknown — audio tracks not filtered; manual review required");
}
// Surface image-based subtitles that can't be written to a sane
// single-file sidecar. They'll still be stripped from the container,
// but won't land on disk anywhere — the user sees this in the plan
// notes so nothing vanishes silently.
const nonExtractable = streams.filter((s) => s.type === "Subtitle" && !isExtractableSubtitle(s.codec));
if (nonExtractable.length > 0) {
const grouped = new Map<string, string[]>();
for (const s of nonExtractable) {
const codec = (s.codec ?? "unknown").toLowerCase();
if (!grouped.has(codec)) grouped.set(codec, []);
grouped.get(codec)!.push(s.language ?? "und");
}
const summary = [...grouped.entries()].map(([codec, langs]) => `${codec} (${langs.join(", ")})`).join("; ");
notes.push(`${nonExtractable.length} subtitle(s) dropped: ${summary} — not extractable to sidecar`);
}
const origLangSource = item.orig_lang_source ?? null;
const authoritativeOg =
!!origLang && !!origLangSource && AUTHORITATIVE_ORIG_SOURCES.has(origLangSource) && item.needs_review === 0;
const keptAudioLanguages = keptAudioStreams.map((s) => (s.language ? normalizeLanguage(s.language) : null));
const ogPresent = !!origLang && keptAudioLanguages.includes(origLang);
const everyKeptHasLanguage = keptAudioStreams.length > 0 && keptAudioLanguages.every((l) => l != null);
// Only count as heuristic-fired when the commentary regex itself CAUSED the
// removal: track was "keep" after language-based decideAction, then flipped
// to "remove" by deduplicateAudioByLanguage because of its title/flag.
// A track removed for LANGUAGE reasons (keep→remove never happened) should
// not upgrade the classification even if its title coincidentally matches.
const commentaryHeuristicFired = decisions.some((d) => {
const before = decisionsBeforeDedup.get(d.stream_id);
if (before !== "keep" || d.action !== "remove") return false;
const s = streams.find((str) => str.id === d.stream_id);
return !!s && isCommentaryOrAuxiliary(s);
});
let auto_class: PlanResult["auto_class"];
if (!authoritativeOg || !ogPresent || !everyKeptHasLanguage) {
auto_class = "manual";
} else if (commentaryHeuristicFired) {
auto_class = "auto_heuristic";
} else {
auto_class = "auto";
}
return { is_noop, has_subs: hasSubs, auto_class, apple_compat, job_type, decisions, notes };
}
/**
* Titles that scream "not the main track": commentary, director's track,
* visually-impaired/audio-description, karaoke. Case-insensitive.
*/
const NON_PRIMARY_AUDIO_TITLE =
/\b(commentary|director'?s?\b.*\b(track|comment|feature)|audio description|descriptive|visually? impaired|\bad\b|karaoke|sign language)/i;
function isCommentaryOrAuxiliary(stream: MediaStream): boolean {
if (stream.is_hearing_impaired) return true;
const title = stream.title ?? "";
return NON_PRIMARY_AUDIO_TITLE.test(title);
}
/**
* Sort comparator for picking the "primary" audio track within a
* single language group. Lower return → a wins.
*
* Priority (most → least important):
* 1. highest channel count (quality; 7.1 beats 5.1 beats stereo)
* 2. Apple-compatible codec (skips a transcode pass; AC3 wins over
* DTS-HD MA at equal channels — direct play > lossless that
* has to be re-encoded anyway)
* 3. default disposition (muxer's pick, tiebreak)
* 4. lowest stream_index (source order, stable tiebreak)
*/
function betterAudio(a: MediaStream, b: MediaStream): number {
const byChannels = (b.channels ?? 0) - (a.channels ?? 0);
if (byChannels !== 0) return byChannels;
const aApple = isAppleCompatible(a.codec ?? "") ? 1 : 0;
const bApple = isAppleCompatible(b.codec ?? "") ? 1 : 0;
const byApple = bApple - aApple;
if (byApple !== 0) return byApple;
const byDefault = (b.is_default ?? 0) - (a.is_default ?? 0);
if (byDefault !== 0) return byDefault;
return a.stream_index - b.stream_index;
}
function deduplicateAudioByLanguage(
streams: MediaStream[],
decisions: PlanResult["decisions"],
origLang: string | null,
): void {
const decisionById = new Map(decisions.map((d) => [d.stream_id, d]));
const keptAudio = streams.filter((s) => s.type === "Audio" && decisionById.get(s.id)?.action === "keep");
// 1. Flag commentary/AD tracks as remove regardless of language match.
for (const s of keptAudio) {
if (isCommentaryOrAuxiliary(s)) {
const d = decisionById.get(s.id);
if (d) d.action = "remove";
}
}
// 2. Group remaining kept-audio streams by normalized language and keep
// one winner per group. Streams without a language tag are handled
// specially: when OG language is unknown we keep them all (ambiguity
// means we can't safely drop anything); when OG is known they've
// already been kept by decideAction's "unknown language falls
// through" clause, so still dedupe within them.
const stillKept = keptAudio.filter((s) => decisionById.get(s.id)?.action === "keep");
const byLang = new Map<string, MediaStream[]>();
const noLang: MediaStream[] = [];
for (const s of stillKept) {
if (!s.language) {
noLang.push(s);
continue;
}
const key = normalizeLanguage(s.language);
if (!byLang.has(key)) byLang.set(key, []);
byLang.get(key)!.push(s);
}
for (const [, group] of byLang) {
if (group.length <= 1) continue;
const sorted = [...group].sort(betterAudio);
const winner = sorted[0];
for (const s of sorted.slice(1)) {
const d = decisionById.get(s.id);
if (d) d.action = "remove";
}
// Touch winner (no-op) to make intent clear.
void winner;
}
// Null-language audio: only dedupe when OG is known (so we already have
// a primary pick). If OG is null we leave ambiguity alone.
if (origLang && noLang.length > 1) {
const sorted = [...noLang].sort(betterAudio);
for (const s of sorted.slice(1)) {
const d = decisionById.get(s.id);
if (d) d.action = "remove";
}
}
}
function decideAction(stream: MediaStream, origLang: string | null, audioLanguages: string[]): "keep" | "remove" {
switch (stream.type) {
case "Video":
case "Data":
case "EmbeddedImage":
return "keep";
case "Audio": {
if (!origLang) return "keep";
if (!stream.language) return "keep";
const normalized = normalizeLanguage(stream.language);
if (normalized === origLang) return "keep";
if (audioLanguages.includes(normalized)) return "keep";
return "remove";
}
case "Subtitle":
return "remove";
default:
return "keep";
}
}
/**
* Assign target_index to each kept stream. target_index is the 0-based
* position within its type group in the output file, after sorting audio
* streams by language rank (OG first, then additional languages in
* configured order, then by original stream_index for stability).
*/
export function assignTargetOrder(
allStreams: MediaStream[],
decisions: PlanResult["decisions"],
origLang: string | null,
audioLanguages: string[],
): void {
const keptByType = new Map<string, MediaStream[]>();
for (const s of allStreams) {
const dec = decisions.find((d) => d.stream_id === s.id);
if (dec?.action !== "keep") continue;
if (!keptByType.has(s.type)) keptByType.set(s.type, []);
keptByType.get(s.type)!.push(s);
}
const audio = keptByType.get("Audio");
if (audio) {
audio.sort((a, b) => {
const aRank = langRank(a.language, origLang, audioLanguages);
const bRank = langRank(b.language, origLang, audioLanguages);
if (aRank !== bRank) return aRank - bRank;
return a.stream_index - b.stream_index;
});
}
for (const [, streams] of keptByType) {
streams.forEach((s, idx) => {
const dec = decisions.find((d) => d.stream_id === s.id);
if (dec) dec.target_index = idx;
});
}
}
function langRank(lang: string | null, origLang: string | null, audioLanguages: string[]): number {
const normalized = lang ? normalizeLanguage(lang) : null;
if (origLang && normalized === origLang) return 0;
if (normalized) {
const idx = audioLanguages.indexOf(normalized);
if (idx >= 0) return idx + 1;
}
return audioLanguages.length + 1;
}
/**
* True when the output order of kept audio streams differs from their
* original order in the input. Compares original stream_index order
* against target_index order.
*/
function checkAudioOrderChanged(streams: MediaStream[], decisions: PlanResult["decisions"]): boolean {
const keptAudio = streams
.filter((s) => s.type === "Audio" && decisions.find((d) => d.stream_id === s.id)?.action === "keep")
.sort((a, b) => a.stream_index - b.stream_index);
for (let i = 0; i < keptAudio.length; i++) {
const dec = decisions.find((d) => d.stream_id === keptAudio[i].id);
if (dec?.target_index !== i) return true;
}
return false;
}