fix '8 Mile is Turkish': jellyfin guesses never earn high confidence
All checks were successful
Build and Push Docker Image / build (push) Successful in 28s

Two bugs compounded:

1. extractOriginalLanguage() in jellyfin.ts picked the FIRST audio stream's
   language and called it 'original'. Files sourced from non-English regions
   often have a local dub as track 0, so 8 Mile with a Turkish dub first
   got labelled Turkish.

2. scan.ts promoted any single-source answer to confidence='high' — even
   the pure Jellyfin guess, as long as no second source (Radarr/Sonarr)
   contradicted it. Jellyfin's dub-magnet guess should never be green.

Fixes:
- extractOriginalLanguage now prefers the IsDefault audio track and skips
  tracks whose title shouts 'dub' / 'commentary' / 'director'. Still a
  heuristic, but much less wrong. Fallback to the first track when every
  candidate looks like a dub so we have *something* to flag.
- scan.ts: high confidence requires an authoritative source (Radarr/Sonarr)
  with no conflict. A Jellyfin-only answer is always low confidence AND
  gets needs_review=1 so it surfaces in the pipeline for manual override.
- Data migration (idempotent): downgrade existing plans backed only by the
  Jellyfin heuristic to low confidence and mark needs_review=1, so users
  don't have to rescan to benefit.
- New server/services/__tests__/jellyfin.test.ts covers the default-track
  preference and dub-skip behavior.
This commit is contained in:
2026-04-13 11:39:59 +02:00
parent e3b241bef3
commit 50d3e50280
4 changed files with 101 additions and 14 deletions

View File

@@ -218,9 +218,16 @@ async function runScan(limit: number | null = null): Promise<void> {
const tmdbId = providerIds.Tmdb ?? null;
const tvdbId = providerIds.Tvdb ?? null;
let origLang: string | null = extractOriginalLanguage(jellyfinItem);
let origLangSource = "jellyfin";
// Jellyfin has no real original_language field; extractOriginalLanguage
// guesses from the first/default audio stream. That's a DUB MAGNET —
// files uploaded from non-English regions often have a local dub first,
// so the "original" comes out as Turkish, German, etc. We record it
// as a starting point but treat it as unverified.
const jellyfinGuess = extractOriginalLanguage(jellyfinItem);
let origLang: string | null = jellyfinGuess;
let origLangSource = jellyfinGuess ? "jellyfin" : null;
let needsReview = origLang ? 0 : 1;
let authoritative = false; // set when Radarr/Sonarr answers
if (jellyfinItem.Type === "Movie" && radarrEnabled && (tmdbId || imdbId)) {
const lang = await radarrLang(
@@ -231,6 +238,7 @@ async function runScan(limit: number | null = null): Promise<void> {
if (origLang && normalizeLanguage(origLang) !== normalizeLanguage(lang)) needsReview = 1;
origLang = lang;
origLangSource = "radarr";
authoritative = true;
}
}
@@ -240,17 +248,21 @@ async function runScan(limit: number | null = null): Promise<void> {
if (origLang && normalizeLanguage(origLang) !== normalizeLanguage(lang)) needsReview = 1;
origLang = lang;
origLangSource = "sonarr";
authoritative = true;
}
}
// Compute confidence from source agreement
// High confidence requires an authoritative source (Radarr/Sonarr) and
// no conflict. A Jellyfin-only guess is ALWAYS low confidence and gets
// flagged for review — that's how 8 Mile landed as "Turkish": default
// audio was a Turkish dub, Radarr wasn't available or didn't have the
// movie, and the guess got a green 'high' badge it never earned.
let confidence: "high" | "low" = "low";
if (!origLang) {
confidence = "low"; // unknown language
} else if (needsReview) {
confidence = "low"; // sources disagree
} else {
confidence = "high"; // language known, no conflicts
if (origLang && authoritative && !needsReview) {
confidence = "high";
} else if (origLang && !authoritative) {
// Jellyfin guess only — surface it for manual review.
needsReview = 1;
}
upsertItem.run(