extract normalizeLanguage, guessOriginalLanguage to language-utils.ts

Move language normalization out of jellyfin.ts into its own module so non-Jellyfin services (ffmpeg, radarr, sonarr, analyzer) no longer depend on the Jellyfin service file. jellyfin.ts re-exports normalizeLanguage for backward compatibility. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-20 19:05:05 +02:00
parent 7cb2714793
commit 444d2eb733
6 changed files with 46 additions and 103 deletions
@@ -1,7 +1,7 @@
 import type { MediaItem, MediaStream, PlanResult } from "../types";
 import { computeAppleCompat, isAppleCompatible, transcodeTarget } from "./apple-compat";
 import { isExtractableSubtitle } from "./ffmpeg";
-import { normalizeLanguage } from "./jellyfin";
+import { normalizeLanguage } from "./language-utils";

 const AUTHORITATIVE_ORIG_SOURCES = new Set<string>(["radarr", "sonarr", "manual"]);

@@ -1,5 +1,5 @@
 import type { MediaItem, MediaStream, StreamDecision } from "../types";
-import { normalizeLanguage } from "./jellyfin";
+import { normalizeLanguage } from "./language-utils";

 // ─── Subtitle extraction helpers ──────────────────────────────────────────────

@@ -1,4 +1,5 @@
 import type { JellyfinItem, JellyfinMediaStream, JellyfinUser, MediaStream } from "../types";
+import { normalizeLanguage } from "./language-utils";

 export interface JellyfinConfig {
 	url: string;
@@ -246,102 +247,4 @@ export function mapStream(s: JellyfinMediaStream): Omit<MediaStream, "id" | "ite
 	};
 }

-// ISO 639-1 (2-letter) → ISO 639-2/B (3-letter) canonical form.
-// Used by normalizeLanguage so "en" and "eng" both resolve to "eng" and
-// the canonical-language check can flag files whose tags are still 2-letter.
-const ISO_1_TO_2: Record<string, string> = {
-	en: "eng",
-	de: "deu",
-	es: "spa",
-	fr: "fra",
-	it: "ita",
-	pt: "por",
-	ja: "jpn",
-	ko: "kor",
-	zh: "zho",
-	ar: "ara",
-	ru: "rus",
-	nl: "nld",
-	sv: "swe",
-	no: "nor",
-	da: "dan",
-	fi: "fin",
-	pl: "pol",
-	tr: "tur",
-	th: "tha",
-	hi: "hin",
-	hu: "hun",
-	cs: "ces",
-	ro: "ron",
-	el: "ell",
-	he: "heb",
-	fa: "fas",
-	uk: "ukr",
-	id: "ind",
-	ca: "cat",
-	nb: "nob",
-	nn: "nno",
-	is: "isl",
-	hr: "hrv",
-	sk: "slk",
-	bg: "bul",
-	sr: "srp",
-	sl: "slv",
-	lv: "lav",
-	lt: "lit",
-	et: "est",
-	vi: "vie",
-	ms: "msa",
-	ta: "tam",
-	te: "tel",
-};
-
-// ISO 639-2/T → ISO 639-2/B normalization + common aliases
-const LANG_ALIASES: Record<string, string> = {
-	// German: both /T (deu) and /B (ger) → deu
-	ger: "deu",
-	// Chinese
-	chi: "zho",
-	// French
-	fre: "fra",
-	// Dutch
-	dut: "nld",
-	// Modern Greek
-	gre: "ell",
-	// Hebrew
-	heb: "heb",
-	// Farsi
-	per: "fas",
-	// Romanian
-	rum: "ron",
-	// Malay
-	may: "msa",
-	// Tibetan
-	tib: "bod",
-	// Burmese
-	bur: "mya",
-	// Czech
-	cze: "ces",
-	// Slovak
-	slo: "slk",
-	// Georgian
-	geo: "kat",
-	// Icelandic
-	ice: "isl",
-	// Armenian
-	arm: "hye",
-	// Basque
-	baq: "eus",
-	// Albanian
-	alb: "sqi",
-	// Macedonian
-	mac: "mkd",
-	// Welsh
-	wel: "cym",
-};
-
-export function normalizeLanguage(lang: string): string {
-	const lower = lang.toLowerCase().trim();
-	if (ISO_1_TO_2[lower]) return ISO_1_TO_2[lower];
-	return LANG_ALIASES[lower] ?? lower;
-}
+export { normalizeLanguage } from "./language-utils";
@@ -0,0 +1,40 @@
+// ISO 639-1 (2-letter) → ISO 639-2/B (3-letter)
+const ISO_1_TO_2: Record<string, string> = {
+	en: "eng", de: "deu", es: "spa", fr: "fra", it: "ita", pt: "por",
+	ja: "jpn", ko: "kor", zh: "zho", ar: "ara", ru: "rus", nl: "nld",
+	sv: "swe", no: "nor", da: "dan", fi: "fin", pl: "pol", tr: "tur",
+	th: "tha", hi: "hin", hu: "hun", cs: "ces", ro: "ron", el: "ell",
+	he: "heb", fa: "fas", uk: "ukr", id: "ind", ca: "cat", nb: "nob",
+	nn: "nno", is: "isl", hr: "hrv", sk: "slk", bg: "bul", sr: "srp",
+	sl: "slv", lv: "lav", lt: "lit", et: "est", vi: "vie", ms: "msa",
+	ta: "tam", te: "tel",
+};
+
+// ISO 639-2/T → ISO 639-2/B normalization + common aliases
+const LANG_ALIASES: Record<string, string> = {
+	ger: "deu", chi: "zho", fre: "fra", dut: "nld", gre: "ell",
+	heb: "heb", per: "fas", rum: "ron", may: "msa", tib: "bod",
+	bur: "mya", cze: "ces", slo: "slk", geo: "kat", ice: "isl",
+	arm: "hye", baq: "eus", alb: "sqi", mac: "mkd", wel: "cym",
+};
+
+export function normalizeLanguage(lang: string): string {
+	const lower = lang.toLowerCase().trim();
+	if (ISO_1_TO_2[lower]) return ISO_1_TO_2[lower];
+	return LANG_ALIASES[lower] ?? lower;
+}
+
+const DUB_TITLE_HINTS = /(dub|dubb|synchro|commentary|director)/i;
+
+/**
+ * Guess original language from audio streams by looking at the default track.
+ * Heuristic: prefer the default audio track, skip dubs/commentary, fall back to first.
+ */
+export function guessOriginalLanguage(
+	audioStreams: { language: string | null; title: string | null; isDefault: number }[],
+): string | null {
+	if (audioStreams.length === 0) return null;
+	const notDub = (s: { title: string | null }) => !s.title || !DUB_TITLE_HINTS.test(s.title);
+	const pick = audioStreams.find((s) => s.isDefault && notDub(s)) ?? audioStreams.find(notDub) ?? audioStreams[0];
+	return pick.language ? normalizeLanguage(pick.language) : null;
+}
@@ -1,5 +1,5 @@
 import { error as logError, warn } from "../lib/log";
-import { normalizeLanguage } from "./jellyfin";
+import { normalizeLanguage } from "./language-utils";

 export interface RadarrConfig {
 	url: string;
@@ -1,5 +1,5 @@
 import { error as logError, warn } from "../lib/log";
-import { normalizeLanguage } from "./jellyfin";
+import { normalizeLanguage } from "./language-utils";

 export interface SonarrConfig {
 	url: string;