write canonical iso3 language metadata, tighten is_noop, store full jellyfin data
Some checks failed
Build and Push Docker Image / build (push) Failing after 16s

ffmpeg now writes -metadata:s:a:i language=<iso3> on every kept audio track so
files end up with canonical 3-letter tags (en → eng, ger → deu, null → und).
analyzer passes stream.profile (not title) to transcodeTarget so lossless
dts-hd ma in mkv correctly targets flac. is_noop also checks og-is-default and
canonical-language so pipeline-would-change-it cases stop showing as done.

normalizeLanguage gains 2→3 mapping, and mapStream no longer normalizes at
ingest so the raw jellyfin tag survives for the canonical check.

per-item scan work runs in a single db.transaction for large sqlite speedups,
extracted into server/services/rescan.ts so execute.ts can reuse it.

on successful job, execute calls jellyfin /Items/{id}/Refresh, waits for
DateLastRefreshed to change, refetches the item, and upserts it through the
same pipeline; plan flips to done iff the fresh streams satisfy is_noop.

schema wiped + rewritten to carry jellyfin_raw, external_raw, profile,
bit_depth, date_last_refreshed, runtime_ticks, original_title, last_executed_at
— so future scans aren't required to stay correct. user must drop data/*.db.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-13 13:56:19 +02:00
parent cc418e5874
commit 6fcaeca82c
12 changed files with 576 additions and 289 deletions

View File

@@ -1,9 +1,12 @@
import { accessSync, constants } from "node:fs";
import { Hono } from "hono";
import { stream } from "hono/streaming";
import { getDb } from "../db/index";
import { log, error as logError } from "../lib/log";
import { getAllConfig, getDb } from "../db/index";
import { log, error as logError, warn } from "../lib/log";
import { predictExtractedFiles } from "../services/ffmpeg";
import { getItem, refreshItem } from "../services/jellyfin";
import { loadLibrary as loadRadarrLibrary, isUsable as radarrUsable } from "../services/radarr";
import { upsertJellyfinItem } from "../services/rescan";
import {
getSchedulerState,
isInScheduleWindow,
@@ -13,8 +16,73 @@ import {
updateSchedulerState,
waitForWindow,
} from "../services/scheduler";
import { loadLibrary as loadSonarrLibrary, isUsable as sonarrUsable } from "../services/sonarr";
import type { Job, MediaItem, MediaStream } from "../types";
function parseLanguageList(raw: string | null | undefined, fallback: string[]): string[] {
if (!raw) return fallback;
try {
const parsed = JSON.parse(raw);
return Array.isArray(parsed) ? parsed.filter((v): v is string => typeof v === "string") : fallback;
} catch {
return fallback;
}
}
/**
* After a job finishes successfully, ask Jellyfin to re-scan the file,
* fetch the fresh item, and upsert it — including running analyzeItem so the
* review plan reflects whether the file is now fully conformant. If is_noop
* is true on the refreshed streams, the plan lands in `done`; otherwise it
* flips back to `pending` so the user sees what still needs attention.
*/
async function refreshItemFromJellyfin(itemId: number): Promise<void> {
const db = getDb();
const row = db.prepare("SELECT jellyfin_id FROM media_items WHERE id = ?").get(itemId) as
| { jellyfin_id: string }
| undefined;
if (!row) return;
const cfg = getAllConfig();
const jellyfinCfg = { url: cfg.jellyfin_url, apiKey: cfg.jellyfin_api_key, userId: cfg.jellyfin_user_id };
if (!jellyfinCfg.url || !jellyfinCfg.apiKey) return;
try {
await refreshItem(jellyfinCfg, row.jellyfin_id);
} catch (err) {
warn(`Jellyfin refresh for item ${itemId} failed: ${String(err)}`);
}
const fresh = await getItem(jellyfinCfg, row.jellyfin_id);
if (!fresh) {
warn(`Jellyfin returned no item for ${row.jellyfin_id} after refresh`);
return;
}
const radarrCfg = { url: cfg.radarr_url, apiKey: cfg.radarr_api_key };
const sonarrCfg = { url: cfg.sonarr_url, apiKey: cfg.sonarr_api_key };
const radarrEnabled = cfg.radarr_enabled === "1" && radarrUsable(radarrCfg);
const sonarrEnabled = cfg.sonarr_enabled === "1" && sonarrUsable(sonarrCfg);
const [radarrLibrary, sonarrLibrary] = await Promise.all([
radarrEnabled ? loadRadarrLibrary(radarrCfg) : Promise.resolve(null),
sonarrEnabled ? loadSonarrLibrary(sonarrCfg) : Promise.resolve(null),
]);
await upsertJellyfinItem(
db,
fresh,
{
subtitleLanguages: parseLanguageList(cfg.subtitle_languages, ["eng", "deu", "spa"]),
audioLanguages: parseLanguageList(cfg.audio_languages, []),
radarr: radarrEnabled ? radarrCfg : null,
sonarr: sonarrEnabled ? sonarrCfg : null,
radarrLibrary,
sonarrLibrary,
},
{ executed: true },
);
}
const app = new Hono();
// ─── Sequential local queue ──────────────────────────────────────────────────
@@ -435,6 +503,16 @@ async function runJob(job: Job): Promise<void> {
log(`Job ${job.id} completed successfully`);
emitJobUpdate(job.id, "done", fullOutput);
// Ask Jellyfin to rescan the file and pull the fresh metadata so our DB
// reflects what actually ended up on disk. If the refreshed streams still
// don't satisfy is_noop (e.g. a codec didn't transcode as planned), the
// plan flips back to 'pending' in the same upsert and the UI shows it.
try {
await refreshItemFromJellyfin(job.item_id);
} catch (refreshErr) {
warn(`Post-job refresh for item ${job.item_id} failed: ${String(refreshErr)}`);
}
} catch (err) {
logError(`Job ${job.id} failed:`, err);
const fullOutput = `${outputLines.join("\n")}\n${String(err)}`;

View File

@@ -2,19 +2,10 @@ import { Hono } from "hono";
import { stream } from "hono/streaming";
import { getAllConfig, getConfig, getDb, setConfig } from "../db/index";
import { log, error as logError, warn } from "../lib/log";
import { analyzeItem } from "../services/analyzer";
import { extractOriginalLanguage, getAllItems, getDevItems, mapStream, normalizeLanguage } from "../services/jellyfin";
import {
loadLibrary as loadRadarrLibrary,
getOriginalLanguage as radarrLang,
isUsable as radarrUsable,
} from "../services/radarr";
import {
loadLibrary as loadSonarrLibrary,
getOriginalLanguage as sonarrLang,
isUsable as sonarrUsable,
} from "../services/sonarr";
import type { MediaStream } from "../types";
import { getAllItems, getDevItems } from "../services/jellyfin";
import { loadLibrary as loadRadarrLibrary, isUsable as radarrUsable } from "../services/radarr";
import { upsertJellyfinItem } from "../services/rescan";
import { loadLibrary as loadSonarrLibrary, isUsable as sonarrUsable } from "../services/sonarr";
const app = new Hono();
@@ -186,63 +177,25 @@ async function runScan(limit: number | null = null): Promise<void> {
log(
`External language sources: radarr=${radarrEnabled ? `enabled (${cfg.radarr_url}, ${radarrLibrary?.byTmdbId.size ?? 0} movies in library)` : "disabled"}, sonarr=${sonarrEnabled ? `enabled (${cfg.sonarr_url}, ${sonarrLibrary?.byTvdbId.size ?? 0} series in library)` : "disabled"}`,
);
let processed = 0;
let errors = 0;
let total = 0;
const rescanCfg = {
subtitleLanguages,
audioLanguages,
radarr: radarrEnabled ? radarrCfg : null,
sonarr: sonarrEnabled ? sonarrCfg : null,
radarrLibrary,
sonarrLibrary,
};
let radarrMisses = 0;
let radarrHits = 0;
let sonarrMisses = 0;
let sonarrHits = 0;
let missingProviderIds = 0;
let processed = 0;
let errors = 0;
let total = 0;
const upsertItem = db.prepare(`
INSERT INTO media_items (
jellyfin_id, type, name, series_name, series_jellyfin_id,
season_number, episode_number, year, file_path, file_size, container,
original_language, orig_lang_source, needs_review,
imdb_id, tmdb_id, tvdb_id,
scan_status, last_scanned_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'scanned', datetime('now'))
ON CONFLICT(jellyfin_id) DO UPDATE SET
type = excluded.type, name = excluded.name, series_name = excluded.series_name,
series_jellyfin_id = excluded.series_jellyfin_id, season_number = excluded.season_number,
episode_number = excluded.episode_number, year = excluded.year, file_path = excluded.file_path,
file_size = excluded.file_size, container = excluded.container,
original_language = excluded.original_language, orig_lang_source = excluded.orig_lang_source,
needs_review = excluded.needs_review, imdb_id = excluded.imdb_id,
tmdb_id = excluded.tmdb_id, tvdb_id = excluded.tvdb_id,
scan_status = 'scanned', last_scanned_at = datetime('now')
`);
const deleteStreams = db.prepare("DELETE FROM media_streams WHERE item_id = ?");
const insertStream = db.prepare(`
INSERT INTO media_streams (
item_id, stream_index, type, codec, language, language_display,
title, is_default, is_forced, is_hearing_impaired,
channels, channel_layout, bit_rate, sample_rate
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
const upsertPlan = db.prepare(`
INSERT INTO review_plans (item_id, status, is_noop, confidence, apple_compat, job_type, notes)
VALUES (?, 'pending', ?, ?, ?, ?, ?)
ON CONFLICT(item_id) DO UPDATE SET
status = CASE WHEN review_plans.status IN ('done','error') THEN 'pending' ELSE review_plans.status END,
is_noop = excluded.is_noop,
confidence = excluded.confidence,
apple_compat = excluded.apple_compat,
job_type = excluded.job_type,
notes = excluded.notes
`);
const upsertDecision = db.prepare(`
INSERT INTO stream_decisions (plan_id, stream_id, action, target_index, transcode_codec)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT(plan_id, stream_id) DO UPDATE SET action = excluded.action, target_index = excluded.target_index, transcode_codec = excluded.transcode_codec
`);
const getItemByJellyfinId = db.prepare("SELECT id FROM media_items WHERE jellyfin_id = ?");
const getPlanByItemId = db.prepare("SELECT id FROM review_plans WHERE item_id = ?");
const getStreamsByItemId = db.prepare("SELECT * FROM media_streams WHERE item_id = ?");
const itemSource = isDev
? getDevItems(jellyfinCfg)
: getAllItems(jellyfinCfg, (_fetched, jellyfinTotal) => {
@@ -260,144 +213,12 @@ async function runScan(limit: number | null = null): Promise<void> {
emitSse("progress", { scanned: processed, total, current_item: jellyfinItem.Name, errors, running: true });
try {
const providerIds = jellyfinItem.ProviderIds ?? {};
const imdbId = providerIds.Imdb ?? null;
const tmdbId = providerIds.Tmdb ?? null;
const tvdbId = providerIds.Tvdb ?? null;
// Jellyfin has no real original_language field; extractOriginalLanguage
// guesses from the first/default audio stream. That's a DUB MAGNET —
// files uploaded from non-English regions often have a local dub first,
// so the "original" comes out as Turkish, German, etc. We record it
// as a starting point but treat it as unverified.
const jellyfinGuess = extractOriginalLanguage(jellyfinItem);
let origLang: string | null = jellyfinGuess;
let origLangSource = jellyfinGuess ? "jellyfin" : null;
let needsReview = origLang ? 0 : 1;
let authoritative = false; // set when Radarr/Sonarr answers
if (jellyfinItem.Type === "Movie" && radarrEnabled && radarrLibrary) {
if (!tmdbId && !imdbId) {
missingProviderIds++;
warn(`No tmdb/imdb id on '${jellyfinItem.Name}' — Radarr lookup skipped`);
} else {
const lang = await radarrLang(
radarrCfg,
{ tmdbId: tmdbId ?? undefined, imdbId: imdbId ?? undefined },
radarrLibrary,
);
if (lang) {
radarrHits++;
if (origLang && normalizeLanguage(origLang) !== normalizeLanguage(lang)) needsReview = 1;
origLang = lang;
origLangSource = "radarr";
authoritative = true;
} else {
radarrMisses++;
warn(
`Radarr returned no language for '${jellyfinItem.Name}' (tmdb=${tmdbId ?? "-"} imdb=${imdbId ?? "-"}) — falling back to Jellyfin guess`,
);
}
}
}
if (jellyfinItem.Type === "Episode" && sonarrEnabled && sonarrLibrary) {
if (!tvdbId) {
missingProviderIds++;
warn(`No tvdb id on '${jellyfinItem.Name}' — Sonarr lookup skipped`);
} else {
const lang = await sonarrLang(sonarrCfg, tvdbId, sonarrLibrary);
if (lang) {
sonarrHits++;
if (origLang && normalizeLanguage(origLang) !== normalizeLanguage(lang)) needsReview = 1;
origLang = lang;
origLangSource = "sonarr";
authoritative = true;
} else {
sonarrMisses++;
warn(`Sonarr returned no language for '${jellyfinItem.Name}' (tvdb=${tvdbId}) — falling back to Jellyfin guess`);
}
}
}
// High confidence requires an authoritative source (Radarr/Sonarr) and
// no conflict. A Jellyfin-only guess is ALWAYS low confidence and gets
// flagged for review — that's how 8 Mile landed as "Turkish": default
// audio was a Turkish dub, Radarr wasn't available or didn't have the
// movie, and the guess got a green 'high' badge it never earned.
let confidence: "high" | "low" = "low";
if (origLang && authoritative && !needsReview) {
confidence = "high";
} else if (origLang && !authoritative) {
// Jellyfin guess only — surface it for manual review.
needsReview = 1;
}
upsertItem.run(
jellyfinItem.Id,
jellyfinItem.Type === "Episode" ? "Episode" : "Movie",
jellyfinItem.Name,
jellyfinItem.SeriesName ?? null,
jellyfinItem.SeriesId ?? null,
jellyfinItem.ParentIndexNumber ?? null,
jellyfinItem.IndexNumber ?? null,
jellyfinItem.ProductionYear ?? null,
jellyfinItem.Path,
jellyfinItem.Size ?? null,
jellyfinItem.Container ?? null,
origLang,
origLangSource,
needsReview,
imdbId,
tmdbId,
tvdbId,
);
const itemRow = getItemByJellyfinId.get(jellyfinItem.Id) as { id: number };
const itemId = itemRow.id;
deleteStreams.run(itemId);
for (const jStream of jellyfinItem.MediaStreams ?? []) {
if (jStream.IsExternal) continue; // skip external subs — not embedded in container
const s = mapStream(jStream);
insertStream.run(
itemId,
s.stream_index,
s.type,
s.codec,
s.language,
s.language_display,
s.title,
s.is_default,
s.is_forced,
s.is_hearing_impaired,
s.channels,
s.channel_layout,
s.bit_rate,
s.sample_rate,
);
}
const streams = getStreamsByItemId.all(itemId) as MediaStream[];
const analysis = analyzeItem(
{ original_language: origLang, needs_review: needsReview, container: jellyfinItem.Container ?? null },
streams,
{ subtitleLanguages, audioLanguages },
);
// Override base confidence with scan-computed value
const finalConfidence = confidence;
upsertPlan.run(
itemId,
analysis.is_noop ? 1 : 0,
finalConfidence,
analysis.apple_compat,
analysis.job_type,
analysis.notes.length > 0 ? analysis.notes.join("\n") : null,
);
const planRow = getPlanByItemId.get(itemId) as { id: number };
for (const dec of analysis.decisions)
upsertDecision.run(planRow.id, dec.stream_id, dec.action, dec.target_index, dec.transcode_codec);
const result = await upsertJellyfinItem(db, jellyfinItem, rescanCfg);
if (result.radarrHit) radarrHits++;
if (result.radarrMiss) radarrMisses++;
if (result.sonarrHit) sonarrHits++;
if (result.sonarrMiss) sonarrMisses++;
if (result.missingProviderId) missingProviderIds++;
emitSse("log", { name: jellyfinItem.Name, type: jellyfinItem.Type, status: "scanned", file: jellyfinItem.Path });
} catch (err) {
errors++;

View File

@@ -51,59 +51,6 @@ export function getDb(): Database {
if (_db) return _db;
_db = new Database(dbPath, { create: true });
_db.exec(SCHEMA);
// Migrations for columns added after initial release
try {
_db.exec("ALTER TABLE stream_decisions ADD COLUMN custom_title TEXT");
} catch {
/* already exists */
}
try {
_db.exec("ALTER TABLE review_plans ADD COLUMN subs_extracted INTEGER NOT NULL DEFAULT 0");
} catch {
/* already exists */
}
try {
_db.exec("ALTER TABLE jobs ADD COLUMN job_type TEXT NOT NULL DEFAULT 'audio'");
} catch {
/* already exists */
}
// Apple compat pipeline columns
try {
_db.exec("ALTER TABLE review_plans ADD COLUMN confidence TEXT NOT NULL DEFAULT 'low'");
} catch {
/* already exists */
}
try {
_db.exec("ALTER TABLE review_plans ADD COLUMN apple_compat TEXT");
} catch {
/* already exists */
}
try {
_db.exec("ALTER TABLE review_plans ADD COLUMN job_type TEXT NOT NULL DEFAULT 'copy'");
} catch {
/* already exists */
}
try {
_db.exec("ALTER TABLE stream_decisions ADD COLUMN transcode_codec TEXT");
} catch {
/* already exists */
}
// Data migration (idempotent): any plan whose original_language came from
// the Jellyfin heuristic is downgraded to low confidence and flagged for
// review. Previous scans marked these 'high' when no other source
// disagreed — but Jellyfin's guess isn't authoritative, so it shouldn't
// have been green in the first place. Only touch pending/error plans so
// already-processed work isn't clobbered.
_db.exec(`
UPDATE media_items SET needs_review = 1
WHERE orig_lang_source = 'jellyfin' AND original_language IS NOT NULL AND needs_review = 0;
UPDATE review_plans SET confidence = 'low'
WHERE confidence = 'high'
AND status IN ('pending', 'error')
AND item_id IN (SELECT id FROM media_items WHERE orig_lang_source = 'jellyfin');
`);
seedDefaults(_db);
return _db;

View File

@@ -12,6 +12,7 @@ CREATE TABLE IF NOT EXISTS media_items (
jellyfin_id TEXT NOT NULL UNIQUE,
type TEXT NOT NULL,
name TEXT NOT NULL,
original_title TEXT,
series_name TEXT,
series_jellyfin_id TEXT,
season_number INTEGER,
@@ -20,15 +21,20 @@ CREATE TABLE IF NOT EXISTS media_items (
file_path TEXT NOT NULL,
file_size INTEGER,
container TEXT,
runtime_ticks INTEGER,
date_last_refreshed TEXT,
original_language TEXT,
orig_lang_source TEXT,
needs_review INTEGER NOT NULL DEFAULT 1,
imdb_id TEXT,
tmdb_id TEXT,
tvdb_id TEXT,
jellyfin_raw TEXT,
external_raw TEXT,
scan_status TEXT NOT NULL DEFAULT 'pending',
scan_error TEXT,
last_scanned_at TEXT,
last_executed_at TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
@@ -38,6 +44,7 @@ CREATE TABLE IF NOT EXISTS media_streams (
stream_index INTEGER NOT NULL,
type TEXT NOT NULL,
codec TEXT,
profile TEXT,
language TEXT,
language_display TEXT,
title TEXT,
@@ -48,26 +55,32 @@ CREATE TABLE IF NOT EXISTS media_streams (
channel_layout TEXT,
bit_rate INTEGER,
sample_rate INTEGER,
bit_depth INTEGER,
UNIQUE(item_id, stream_index)
);
CREATE TABLE IF NOT EXISTS review_plans (
id INTEGER PRIMARY KEY AUTOINCREMENT,
item_id INTEGER NOT NULL UNIQUE REFERENCES media_items(id) ON DELETE CASCADE,
status TEXT NOT NULL DEFAULT 'pending',
is_noop INTEGER NOT NULL DEFAULT 0,
notes TEXT,
reviewed_at TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
id INTEGER PRIMARY KEY AUTOINCREMENT,
item_id INTEGER NOT NULL UNIQUE REFERENCES media_items(id) ON DELETE CASCADE,
status TEXT NOT NULL DEFAULT 'pending',
is_noop INTEGER NOT NULL DEFAULT 0,
confidence TEXT NOT NULL DEFAULT 'low',
apple_compat TEXT,
job_type TEXT NOT NULL DEFAULT 'copy',
subs_extracted INTEGER NOT NULL DEFAULT 0,
notes TEXT,
reviewed_at TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS stream_decisions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
plan_id INTEGER NOT NULL REFERENCES review_plans(id) ON DELETE CASCADE,
stream_id INTEGER NOT NULL REFERENCES media_streams(id) ON DELETE CASCADE,
action TEXT NOT NULL,
target_index INTEGER,
custom_title TEXT,
id INTEGER PRIMARY KEY AUTOINCREMENT,
plan_id INTEGER NOT NULL REFERENCES review_plans(id) ON DELETE CASCADE,
stream_id INTEGER NOT NULL REFERENCES media_streams(id) ON DELETE CASCADE,
action TEXT NOT NULL,
target_index INTEGER,
custom_title TEXT,
transcode_codec TEXT,
UNIQUE(plan_id, stream_id)
);
@@ -87,7 +100,7 @@ CREATE TABLE IF NOT EXISTS jobs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
item_id INTEGER NOT NULL REFERENCES media_items(id) ON DELETE CASCADE,
command TEXT NOT NULL,
job_type TEXT NOT NULL DEFAULT 'audio',
job_type TEXT NOT NULL DEFAULT 'copy',
status TEXT NOT NULL DEFAULT 'pending',
output TEXT,
exit_code INTEGER,

View File

@@ -8,6 +8,7 @@ function stream(o: StreamOverride): MediaStream {
return {
item_id: 1,
codec: null,
profile: null,
language: null,
language_display: null,
title: null,
@@ -18,6 +19,7 @@ function stream(o: StreamOverride): MediaStream {
channel_layout: null,
bit_rate: null,
sample_rate: null,
bit_depth: null,
...o,
};
}
@@ -107,10 +109,10 @@ describe("analyzeItem — audio ordering", () => {
expect(result.is_noop).toBe(false);
});
test("audioOrderChanged is_noop=true when OG audio is already first", () => {
test("audioOrderChanged is_noop=true when OG audio is already first and default", () => {
const streams = [
stream({ id: 1, type: "Video", stream_index: 0, codec: "h264" }),
stream({ id: 2, type: "Audio", stream_index: 1, codec: "aac", language: "eng" }),
stream({ id: 2, type: "Audio", stream_index: 1, codec: "aac", language: "eng", is_default: 1 }),
stream({ id: 3, type: "Audio", stream_index: 2, codec: "aac", language: "deu" }),
];
const result = analyzeItem({ ...ITEM_DEFAULTS, original_language: "eng" }, streams, {
@@ -148,10 +150,10 @@ describe("analyzeItem — subtitles & is_noop", () => {
expect(result.is_noop).toBe(false); // subs present → not noop
});
test("no audio change, no subs → is_noop true", () => {
test("no audio change, no subs, OG already default+canonical → is_noop true", () => {
const streams = [
stream({ id: 1, type: "Video", stream_index: 0, codec: "h264" }),
stream({ id: 2, type: "Audio", stream_index: 1, codec: "aac", language: "eng" }),
stream({ id: 2, type: "Audio", stream_index: 1, codec: "aac", language: "eng", is_default: 1 }),
];
const result = analyzeItem({ ...ITEM_DEFAULTS, original_language: "eng" }, streams, {
subtitleLanguages: [],
@@ -159,6 +161,30 @@ describe("analyzeItem — subtitles & is_noop", () => {
});
expect(result.is_noop).toBe(true);
});
test("OG audio present but not default → is_noop false (pipeline would set default)", () => {
const streams = [
stream({ id: 1, type: "Video", stream_index: 0, codec: "h264" }),
stream({ id: 2, type: "Audio", stream_index: 1, codec: "aac", language: "eng", is_default: 0 }),
];
const result = analyzeItem({ ...ITEM_DEFAULTS, original_language: "eng" }, streams, {
subtitleLanguages: [],
audioLanguages: [],
});
expect(result.is_noop).toBe(false);
});
test("non-canonical language tag (en instead of eng) → is_noop false", () => {
const streams = [
stream({ id: 1, type: "Video", stream_index: 0, codec: "h264" }),
stream({ id: 2, type: "Audio", stream_index: 1, codec: "aac", language: "en", is_default: 1 }),
];
const result = analyzeItem({ ...ITEM_DEFAULTS, original_language: "eng" }, streams, {
subtitleLanguages: [],
audioLanguages: [],
});
expect(result.is_noop).toBe(false);
});
});
describe("analyzeItem — transcode targets", () => {

View File

@@ -6,6 +6,7 @@ function stream(o: Partial<MediaStream> & Pick<MediaStream, "id" | "type" | "str
return {
item_id: 1,
codec: null,
profile: null,
language: null,
language_display: null,
title: null,
@@ -16,6 +17,7 @@ function stream(o: Partial<MediaStream> & Pick<MediaStream, "id" | "type" | "str
channel_layout: null,
bit_rate: null,
sample_rate: null,
bit_depth: null,
...o,
};
}
@@ -36,6 +38,7 @@ const ITEM: MediaItem = {
jellyfin_id: "x",
type: "Movie",
name: "Test",
original_title: null,
series_name: null,
series_jellyfin_id: null,
season_number: null,
@@ -44,15 +47,20 @@ const ITEM: MediaItem = {
file_path: "/movies/Test.mkv",
file_size: null,
container: "mkv",
runtime_ticks: null,
date_last_refreshed: null,
original_language: "eng",
orig_lang_source: "jellyfin",
needs_review: 0,
imdb_id: null,
tmdb_id: null,
tvdb_id: null,
jellyfin_raw: null,
external_raw: null,
scan_status: "scanned",
scan_error: null,
last_scanned_at: null,
last_executed_at: null,
created_at: "",
};
@@ -155,6 +163,25 @@ describe("buildCommand", () => {
expect(cmd).toContain("'/movies/Test.de.srt'");
});
test("writes canonical iso3 language metadata on every kept audio stream", () => {
const streams = [
stream({ id: 1, type: "Video", stream_index: 0 }),
stream({ id: 2, type: "Audio", stream_index: 1, language: "en" }), // 2-letter → eng
stream({ id: 3, type: "Audio", stream_index: 2, language: "ger" }), // alias → deu
stream({ id: 4, type: "Audio", stream_index: 3, language: null }), // unknown → und
];
const decisions = [
decision({ stream_id: 1, action: "keep", target_index: 0 }),
decision({ stream_id: 2, action: "keep", target_index: 0 }),
decision({ stream_id: 3, action: "keep", target_index: 1 }),
decision({ stream_id: 4, action: "keep", target_index: 2 }),
];
const cmd = buildCommand(ITEM, streams, decisions);
expect(cmd).toContain("-metadata:s:a:0 language=eng");
expect(cmd).toContain("-metadata:s:a:1 language=deu");
expect(cmd).toContain("-metadata:s:a:2 language=und");
});
test("sets first kept audio as default, clears others", () => {
const streams = [
stream({ id: 1, type: "Video", stream_index: 0 }),

View File

@@ -39,22 +39,49 @@ export function analyzeItem(
if (d.action !== "keep") continue;
const stream = streams.find((s) => s.id === d.stream_id);
if (stream && stream.type === "Audio") {
d.transcode_codec = transcodeTarget(stream.codec ?? "", stream.title, item.container);
// Use Profile (DTS-HD MA, etc.) — NOT title — to pick the transcode target.
// Passing title here used to cause lossless DTS-HD MA in MKV to fall back
// to EAC3 instead of the better FLAC path when the title didn't happen to
// contain "MA".
d.transcode_codec = transcodeTarget(stream.codec ?? "", stream.profile, item.container);
}
}
const keptAudioCodecs = decisions
const keptAudioStreams = decisions
.filter((d) => d.action === "keep")
.map((d) => streams.find((s) => s.id === d.stream_id))
.filter((s): s is MediaStream => !!s && s.type === "Audio")
.map((s) => s.codec ?? "");
.filter((s): s is MediaStream => !!s && s.type === "Audio");
const keptAudioCodecs = keptAudioStreams.map((s) => s.codec ?? "");
const needsTranscode = decisions.some((d) => d.transcode_codec != null);
const apple_compat = computeAppleCompat(keptAudioCodecs, item.container);
const job_type = needsTranscode ? ("transcode" as const) : ("copy" as const);
const hasSubs = streams.some((s) => s.type === "Subtitle");
const is_noop = !anyAudioRemoved && !audioOrderChanged && !hasSubs && !needsTranscode;
// Pipeline also sets default disposition on the first kept audio and writes
// canonical iso3 language tags. If either is already wrong in the file,
// running ffmpeg would produce a different output → not a noop.
const keptAudioSorted = [...keptAudioStreams].sort((a, b) => {
const ai = decisions.find((d) => d.stream_id === a.id)?.target_index ?? 0;
const bi = decisions.find((d) => d.stream_id === b.id)?.target_index ?? 0;
return ai - bi;
});
const firstKeptAudio = keptAudioSorted[0];
const defaultMismatch = !!firstKeptAudio && firstKeptAudio.is_default !== 1;
const nonDefaultHasDefault = keptAudioSorted.slice(1).some((s) => s.is_default === 1);
const languageMismatch = keptAudioStreams.some(
(s) => s.language != null && s.language !== normalizeLanguage(s.language),
);
const is_noop =
!anyAudioRemoved &&
!audioOrderChanged &&
!hasSubs &&
!needsTranscode &&
!defaultMismatch &&
!nonDefaultHasDefault &&
!languageMismatch;
if (!origLang && item.needs_review) {
notes.push("Original language unknown — audio tracks not filtered; manual review required");

View File

@@ -259,20 +259,21 @@ function buildMaps(allStreams: MediaStream[], kept: { stream: MediaStream; dec:
* Build disposition and metadata flags for kept audio streams.
* - Marks the first kept audio stream as default, clears all others.
* - Sets harmonized language-name titles on all kept audio streams.
* - Writes canonical ISO 639-2/B 3-letter language tags (e.g. "en" → "eng",
* "ger" → "deu"). Streams with no language get "und" (ffmpeg convention).
*/
function buildStreamFlags(kept: { stream: MediaStream; dec: StreamDecision }[]): string[] {
const audioKept = kept.filter((k) => k.stream.type === "Audio");
const args: string[] = [];
// Disposition: first audio = default, rest = clear
audioKept.forEach((_, i) => {
args.push(`-disposition:a:${i}`, i === 0 ? "default" : "0");
});
// Titles for audio streams (custom_title overrides generated title)
audioKept.forEach((k, i) => {
args.push(`-disposition:a:${i}`, i === 0 ? "default" : "0");
const title = k.dec.custom_title ?? trackTitle(k.stream);
if (title) args.push(`-metadata:s:a:${i}`, `title=${shellQuote(title)}`);
const lang = k.stream.language ? normalizeLanguage(k.stream.language) : "und";
args.push(`-metadata:s:a:${i}`, `language=${lang}`);
});
return args;

View File

@@ -47,6 +47,8 @@ const ITEM_FIELDS = [
"ProductionYear",
"Size",
"Container",
"RunTimeTicks",
"DateLastRefreshed",
].join(",");
export async function* getAllItems(
@@ -189,13 +191,21 @@ export function extractOriginalLanguage(item: JellyfinItem): string | null {
return pick.Language ? normalizeLanguage(pick.Language) : null;
}
/** Map a Jellyfin MediaStream to our internal MediaStream shape (sans id/item_id). */
/**
* Map a Jellyfin MediaStream to our internal MediaStream shape (sans id/item_id).
*
* NOTE: stores the raw `Language` value from Jellyfin (e.g. "en", "eng", "ger",
* null). We intentionally do NOT normalize here because `is_noop` compares
* raw → normalized to decide whether the pipeline should rewrite the tag to
* canonical iso3. Callers that compare languages must use normalizeLanguage().
*/
export function mapStream(s: JellyfinMediaStream): Omit<MediaStream, "id" | "item_id"> {
return {
stream_index: s.Index,
type: s.Type as MediaStream["type"],
codec: s.Codec ?? null,
language: s.Language ? normalizeLanguage(s.Language) : null,
profile: s.Profile ?? null,
language: s.Language ?? null,
language_display: s.DisplayLanguage ?? null,
title: s.Title ?? null,
is_default: s.IsDefault ? 1 : 0,
@@ -205,9 +215,60 @@ export function mapStream(s: JellyfinMediaStream): Omit<MediaStream, "id" | "ite
channel_layout: s.ChannelLayout ?? null,
bit_rate: s.BitRate ?? null,
sample_rate: s.SampleRate ?? null,
bit_depth: s.BitDepth ?? null,
};
}
// ISO 639-1 (2-letter) → ISO 639-2/B (3-letter) canonical form.
// Used by normalizeLanguage so "en" and "eng" both resolve to "eng" and
// the canonical-language check can flag files whose tags are still 2-letter.
const ISO_1_TO_2: Record<string, string> = {
en: "eng",
de: "deu",
es: "spa",
fr: "fra",
it: "ita",
pt: "por",
ja: "jpn",
ko: "kor",
zh: "zho",
ar: "ara",
ru: "rus",
nl: "nld",
sv: "swe",
no: "nor",
da: "dan",
fi: "fin",
pl: "pol",
tr: "tur",
th: "tha",
hi: "hin",
hu: "hun",
cs: "ces",
ro: "ron",
el: "ell",
he: "heb",
fa: "fas",
uk: "ukr",
id: "ind",
ca: "cat",
nb: "nob",
nn: "nno",
is: "isl",
hr: "hrv",
sk: "slk",
bg: "bul",
sr: "srp",
sl: "slv",
lv: "lav",
lt: "lit",
et: "est",
vi: "vie",
ms: "msa",
ta: "tam",
te: "tel",
};
// ISO 639-2/T → ISO 639-2/B normalization + common aliases
const LANG_ALIASES: Record<string, string> = {
// German: both /T (deu) and /B (ger) → deu
@@ -254,5 +315,6 @@ const LANG_ALIASES: Record<string, string> = {
export function normalizeLanguage(lang: string): string {
const lower = lang.toLowerCase().trim();
if (ISO_1_TO_2[lower]) return ISO_1_TO_2[lower];
return LANG_ALIASES[lower] ?? lower;
}

269
server/services/rescan.ts Normal file
View File

@@ -0,0 +1,269 @@
import type { Database } from "bun:sqlite";
import type { JellyfinItem, MediaStream } from "../types";
import { analyzeItem } from "./analyzer";
import { extractOriginalLanguage, mapStream, normalizeLanguage } from "./jellyfin";
import { type RadarrLibrary, getOriginalLanguage as radarrLang } from "./radarr";
import { type SonarrLibrary, getOriginalLanguage as sonarrLang } from "./sonarr";
export interface RescanConfig {
subtitleLanguages: string[];
audioLanguages: string[];
radarr: { url: string; apiKey: string } | null;
sonarr: { url: string; apiKey: string } | null;
radarrLibrary: RadarrLibrary | null;
sonarrLibrary: SonarrLibrary | null;
}
export interface RescanResult {
itemId: number;
origLang: string | null;
origLangSource: string | null;
needsReview: number;
confidence: "high" | "low";
isNoop: boolean;
radarrHit: boolean;
radarrMiss: boolean;
sonarrHit: boolean;
sonarrMiss: boolean;
missingProviderId: boolean;
}
/**
* Upsert a single Jellyfin item (metadata + streams + review_plan + decisions)
* in one transaction. Shared by the full scan loop and the post-execute refresh.
*
* Returns the internal item id and a summary of what happened so callers can
* aggregate counters or emit SSE events.
*/
export async function upsertJellyfinItem(
db: Database,
jellyfinItem: JellyfinItem,
cfg: RescanConfig,
opts: { executed?: boolean } = {},
): Promise<RescanResult> {
if (!jellyfinItem.Name || !jellyfinItem.Path) {
throw new Error(`Jellyfin item ${jellyfinItem.Id} missing Name or Path`);
}
const itemName: string = jellyfinItem.Name;
const itemPath: string = jellyfinItem.Path;
const providerIds = jellyfinItem.ProviderIds ?? {};
const imdbId = providerIds.Imdb ?? null;
const tmdbId = providerIds.Tmdb ?? null;
const tvdbId = providerIds.Tvdb ?? null;
// See scan.ts for the "8 Mile got labelled Turkish" rationale. Jellyfin's
// first-audio-track guess is an unverified starting point.
const jellyfinGuess = extractOriginalLanguage(jellyfinItem);
let origLang: string | null = jellyfinGuess;
let origLangSource: string | null = jellyfinGuess ? "jellyfin" : null;
let needsReview = origLang ? 0 : 1;
let authoritative = false;
let externalRaw: unknown = null;
const result: RescanResult = {
itemId: -1,
origLang: null,
origLangSource: null,
needsReview: 1,
confidence: "low",
isNoop: false,
radarrHit: false,
radarrMiss: false,
sonarrHit: false,
sonarrMiss: false,
missingProviderId: false,
};
if (jellyfinItem.Type === "Movie" && cfg.radarr && cfg.radarrLibrary) {
if (!tmdbId && !imdbId) {
result.missingProviderId = true;
} else {
const movie = tmdbId ? cfg.radarrLibrary.byTmdbId.get(tmdbId) : undefined;
const movieByImdb = !movie && imdbId ? cfg.radarrLibrary.byImdbId.get(imdbId) : undefined;
externalRaw = movie ?? movieByImdb ?? null;
const lang = await radarrLang(
cfg.radarr,
{ tmdbId: tmdbId ?? undefined, imdbId: imdbId ?? undefined },
cfg.radarrLibrary,
);
if (lang) {
result.radarrHit = true;
if (origLang && normalizeLanguage(origLang) !== normalizeLanguage(lang)) needsReview = 1;
origLang = lang;
origLangSource = "radarr";
authoritative = true;
} else {
result.radarrMiss = true;
}
}
}
if (jellyfinItem.Type === "Episode" && cfg.sonarr && cfg.sonarrLibrary) {
if (!tvdbId) {
result.missingProviderId = true;
} else {
externalRaw = cfg.sonarrLibrary.byTvdbId.get(tvdbId) ?? null;
const lang = await sonarrLang(cfg.sonarr, tvdbId, cfg.sonarrLibrary);
if (lang) {
result.sonarrHit = true;
if (origLang && normalizeLanguage(origLang) !== normalizeLanguage(lang)) needsReview = 1;
origLang = lang;
origLangSource = "sonarr";
authoritative = true;
} else {
result.sonarrMiss = true;
}
}
}
let confidence: "high" | "low" = "low";
if (origLang && authoritative && !needsReview) confidence = "high";
else if (origLang && !authoritative) needsReview = 1;
const jellyfinRaw = JSON.stringify(jellyfinItem);
const externalRawJson = externalRaw ? JSON.stringify(externalRaw) : null;
// One transaction per item keeps scan throughput high on SQLite — every
// INSERT/UPDATE would otherwise hit WAL independently.
db.transaction(() => {
const upsertItem = db.prepare(`
INSERT INTO media_items (
jellyfin_id, type, name, original_title, series_name, series_jellyfin_id,
season_number, episode_number, year, file_path, file_size, container,
runtime_ticks, date_last_refreshed,
original_language, orig_lang_source, needs_review,
imdb_id, tmdb_id, tvdb_id,
jellyfin_raw, external_raw,
scan_status, last_scanned_at${opts.executed ? ", last_executed_at" : ""}
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'scanned', datetime('now')${opts.executed ? ", datetime('now')" : ""})
ON CONFLICT(jellyfin_id) DO UPDATE SET
type = excluded.type, name = excluded.name, original_title = excluded.original_title,
series_name = excluded.series_name, series_jellyfin_id = excluded.series_jellyfin_id,
season_number = excluded.season_number, episode_number = excluded.episode_number,
year = excluded.year, file_path = excluded.file_path,
file_size = excluded.file_size, container = excluded.container,
runtime_ticks = excluded.runtime_ticks, date_last_refreshed = excluded.date_last_refreshed,
original_language = excluded.original_language, orig_lang_source = excluded.orig_lang_source,
needs_review = excluded.needs_review, imdb_id = excluded.imdb_id,
tmdb_id = excluded.tmdb_id, tvdb_id = excluded.tvdb_id,
jellyfin_raw = excluded.jellyfin_raw, external_raw = excluded.external_raw,
scan_status = 'scanned', last_scanned_at = datetime('now')
${opts.executed ? ", last_executed_at = datetime('now')" : ""}
`);
upsertItem.run(
jellyfinItem.Id,
jellyfinItem.Type === "Episode" ? "Episode" : "Movie",
itemName,
jellyfinItem.OriginalTitle ?? null,
jellyfinItem.SeriesName ?? null,
jellyfinItem.SeriesId ?? null,
jellyfinItem.ParentIndexNumber ?? null,
jellyfinItem.IndexNumber ?? null,
jellyfinItem.ProductionYear ?? null,
itemPath,
jellyfinItem.Size ?? null,
jellyfinItem.Container ?? null,
jellyfinItem.RunTimeTicks ?? null,
jellyfinItem.DateLastRefreshed ?? null,
origLang,
origLangSource,
needsReview,
imdbId,
tmdbId,
tvdbId,
jellyfinRaw,
externalRawJson,
);
const itemRow = db.prepare("SELECT id FROM media_items WHERE jellyfin_id = ?").get(jellyfinItem.Id) as {
id: number;
};
const itemId = itemRow.id;
result.itemId = itemId;
db.prepare("DELETE FROM media_streams WHERE item_id = ?").run(itemId);
const insertStream = db.prepare(`
INSERT INTO media_streams (
item_id, stream_index, type, codec, profile, language, language_display,
title, is_default, is_forced, is_hearing_impaired,
channels, channel_layout, bit_rate, sample_rate, bit_depth
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
for (const jStream of jellyfinItem.MediaStreams ?? []) {
if (jStream.IsExternal) continue;
const s = mapStream(jStream);
insertStream.run(
itemId,
s.stream_index,
s.type,
s.codec,
s.profile,
s.language,
s.language_display,
s.title,
s.is_default,
s.is_forced,
s.is_hearing_impaired,
s.channels,
s.channel_layout,
s.bit_rate,
s.sample_rate,
s.bit_depth,
);
}
const streams = db.prepare("SELECT * FROM media_streams WHERE item_id = ?").all(itemId) as MediaStream[];
const analysis = analyzeItem(
{ original_language: origLang, needs_review: needsReview, container: jellyfinItem.Container ?? null },
streams,
{ subtitleLanguages: cfg.subtitleLanguages, audioLanguages: cfg.audioLanguages },
);
db
.prepare(`
INSERT INTO review_plans (item_id, status, is_noop, confidence, apple_compat, job_type, notes)
VALUES (?, 'pending', ?, ?, ?, ?, ?)
ON CONFLICT(item_id) DO UPDATE SET
status = CASE
WHEN excluded.is_noop = 1 THEN 'done'
WHEN review_plans.status IN ('done','error') THEN 'pending'
ELSE review_plans.status
END,
is_noop = excluded.is_noop,
confidence = excluded.confidence,
apple_compat = excluded.apple_compat,
job_type = excluded.job_type,
notes = excluded.notes
`)
.run(
itemId,
analysis.is_noop ? 1 : 0,
confidence,
analysis.apple_compat,
analysis.job_type,
analysis.notes.length > 0 ? analysis.notes.join("\n") : null,
);
const planRow = db.prepare("SELECT id FROM review_plans WHERE item_id = ?").get(itemId) as { id: number };
const upsertDecision = db.prepare(`
INSERT INTO stream_decisions (plan_id, stream_id, action, target_index, transcode_codec)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT(plan_id, stream_id) DO UPDATE SET
action = excluded.action,
target_index = excluded.target_index,
transcode_codec = excluded.transcode_codec
`);
for (const dec of analysis.decisions) {
upsertDecision.run(planRow.id, dec.stream_id, dec.action, dec.target_index, dec.transcode_codec);
}
result.origLang = origLang;
result.origLangSource = origLangSource;
result.needsReview = needsReview;
result.confidence = confidence;
result.isNoop = analysis.is_noop;
})();
return result;
}

View File

@@ -5,6 +5,7 @@ export interface MediaItem {
jellyfin_id: string;
type: "Movie" | "Episode";
name: string;
original_title: string | null;
series_name: string | null;
series_jellyfin_id: string | null;
season_number: number | null;
@@ -13,15 +14,20 @@ export interface MediaItem {
file_path: string;
file_size: number | null;
container: string | null;
runtime_ticks: number | null;
date_last_refreshed: string | null;
original_language: string | null;
orig_lang_source: "jellyfin" | "radarr" | "sonarr" | "manual" | null;
needs_review: number;
imdb_id: string | null;
tmdb_id: string | null;
tvdb_id: string | null;
jellyfin_raw: string | null;
external_raw: string | null;
scan_status: "pending" | "scanned" | "error";
scan_error: string | null;
last_scanned_at: string | null;
last_executed_at: string | null;
created_at: string;
}
@@ -31,6 +37,10 @@ export interface MediaStream {
stream_index: number;
type: "Video" | "Audio" | "Subtitle" | "Data" | "EmbeddedImage";
codec: string | null;
profile: string | null;
/** Raw language tag as reported by Jellyfin (e.g. "en", "eng", "ger", null).
* Not normalized on ingest — callers use normalizeLanguage() for comparison
* so we can detect non-canonical tags that the pipeline should rewrite. */
language: string | null;
language_display: string | null;
title: string | null;
@@ -41,6 +51,7 @@ export interface MediaStream {
channel_layout: string | null;
bit_rate: number | null;
sample_rate: number | null;
bit_depth: number | null;
}
export interface ReviewPlan {
@@ -120,6 +131,7 @@ export interface JellyfinMediaStream {
Type: string;
Index: number;
Codec?: string;
Profile?: string;
Language?: string;
DisplayLanguage?: string;
Title?: string;
@@ -131,12 +143,14 @@ export interface JellyfinMediaStream {
ChannelLayout?: string;
BitRate?: number;
SampleRate?: number;
BitDepth?: number;
}
export interface JellyfinItem {
Id: string;
Type: string;
Name: string;
OriginalTitle?: string;
SeriesName?: string;
SeriesId?: string;
ParentIndexNumber?: number;
@@ -145,6 +159,8 @@ export interface JellyfinItem {
Path?: string;
Size?: number;
Container?: string;
RunTimeTicks?: number;
DateLastRefreshed?: string;
MediaStreams?: JellyfinMediaStream[];
ProviderIds?: Record<string, string>;
}