All checks were successful
Build and Push Docker Image / build (push) Successful in 28s
Two bugs compounded: 1. extractOriginalLanguage() in jellyfin.ts picked the FIRST audio stream's language and called it 'original'. Files sourced from non-English regions often have a local dub as track 0, so 8 Mile with a Turkish dub first got labelled Turkish. 2. scan.ts promoted any single-source answer to confidence='high' — even the pure Jellyfin guess, as long as no second source (Radarr/Sonarr) contradicted it. Jellyfin's dub-magnet guess should never be green. Fixes: - extractOriginalLanguage now prefers the IsDefault audio track and skips tracks whose title shouts 'dub' / 'commentary' / 'director'. Still a heuristic, but much less wrong. Fallback to the first track when every candidate looks like a dub so we have *something* to flag. - scan.ts: high confidence requires an authoritative source (Radarr/Sonarr) with no conflict. A Jellyfin-only answer is always low confidence AND gets needs_review=1 so it surfaces in the pipeline for manual override. - Data migration (idempotent): downgrade existing plans backed only by the Jellyfin heuristic to low confidence and mark needs_review=1, so users don't have to rescan to benefit. - New server/services/__tests__/jellyfin.test.ts covers the default-track preference and dub-skip behavior.
354 lines
14 KiB
TypeScript
354 lines
14 KiB
TypeScript
import { Hono } from "hono";
|
|
import { stream } from "hono/streaming";
|
|
import { getAllConfig, getConfig, getDb, setConfig } from "../db/index";
|
|
import { log, error as logError, warn } from "../lib/log";
|
|
import { analyzeItem } from "../services/analyzer";
|
|
import { extractOriginalLanguage, getAllItems, getDevItems, mapStream, normalizeLanguage } from "../services/jellyfin";
|
|
import { getOriginalLanguage as radarrLang } from "../services/radarr";
|
|
import { getOriginalLanguage as sonarrLang } from "../services/sonarr";
|
|
import type { MediaStream } from "../types";
|
|
|
|
const app = new Hono();
|
|
|
|
// ─── State ────────────────────────────────────────────────────────────────────
|
|
|
|
let scanAbort: AbortController | null = null;
|
|
const scanListeners = new Set<(data: string) => void>();
|
|
|
|
function emitSse(type: string, data: unknown): void {
|
|
const line = `event: ${type}\ndata: ${JSON.stringify(data)}\n\n`;
|
|
for (const listener of scanListeners) listener(line);
|
|
}
|
|
|
|
function currentScanLimit(): number | null {
|
|
const v = getConfig("scan_limit");
|
|
return v ? Number(v) : null;
|
|
}
|
|
|
|
// ─── Status ───────────────────────────────────────────────────────────────────
|
|
|
|
app.get("/", (c) => {
|
|
const db = getDb();
|
|
const running = getConfig("scan_running") === "1";
|
|
const total = (db.prepare("SELECT COUNT(*) as n FROM media_items").get() as { n: number }).n;
|
|
const scanned = (
|
|
db.prepare("SELECT COUNT(*) as n FROM media_items WHERE scan_status = 'scanned'").get() as { n: number }
|
|
).n;
|
|
const errors = (db.prepare("SELECT COUNT(*) as n FROM media_items WHERE scan_status = 'error'").get() as { n: number })
|
|
.n;
|
|
const recentItems = db
|
|
.prepare("SELECT name, type, scan_status, file_path FROM media_items ORDER BY last_scanned_at DESC LIMIT 50")
|
|
.all() as { name: string; type: string; scan_status: string; file_path: string }[];
|
|
|
|
return c.json({ running, progress: { scanned, total, errors }, recentItems, scanLimit: currentScanLimit() });
|
|
});
|
|
|
|
// ─── Start ────────────────────────────────────────────────────────────────────
|
|
|
|
app.post("/start", async (c) => {
|
|
const db = getDb();
|
|
// Atomic claim: only succeed if scan_running is not already '1'.
|
|
const claim = db.prepare("UPDATE config SET value = '1' WHERE key = 'scan_running' AND value != '1'").run();
|
|
if (claim.changes === 0) {
|
|
return c.json({ ok: false, error: "Scan already running" }, 409);
|
|
}
|
|
|
|
const body = await c.req.json<{ limit?: number }>().catch(() => ({ limit: undefined }));
|
|
const formLimit = body.limit ?? null;
|
|
const envLimit = process.env.SCAN_LIMIT ? Number(process.env.SCAN_LIMIT) : null;
|
|
const limit = formLimit ?? envLimit ?? null;
|
|
setConfig("scan_limit", limit != null ? String(limit) : "");
|
|
|
|
runScan(limit).catch((err) => {
|
|
logError("Scan failed:", err);
|
|
setConfig("scan_running", "0");
|
|
emitSse("error", { message: String(err) });
|
|
});
|
|
|
|
return c.json({ ok: true });
|
|
});
|
|
|
|
// ─── Stop ─────────────────────────────────────────────────────────────────────
|
|
|
|
app.post("/stop", (c) => {
|
|
scanAbort?.abort();
|
|
setConfig("scan_running", "0");
|
|
return c.json({ ok: true });
|
|
});
|
|
|
|
// ─── SSE ──────────────────────────────────────────────────────────────────────
|
|
|
|
app.get("/events", (c) => {
|
|
return stream(c, async (s) => {
|
|
c.header("Content-Type", "text/event-stream");
|
|
c.header("Cache-Control", "no-cache");
|
|
c.header("Connection", "keep-alive");
|
|
|
|
const queue: string[] = [];
|
|
let resolve: (() => void) | null = null;
|
|
|
|
const listener = (data: string) => {
|
|
queue.push(data);
|
|
resolve?.();
|
|
};
|
|
|
|
scanListeners.add(listener);
|
|
s.onAbort(() => {
|
|
scanListeners.delete(listener);
|
|
});
|
|
|
|
try {
|
|
while (!s.closed) {
|
|
if (queue.length > 0) {
|
|
await s.write(queue.shift()!);
|
|
} else {
|
|
await new Promise<void>((res) => {
|
|
resolve = res;
|
|
setTimeout(res, 25_000);
|
|
});
|
|
resolve = null;
|
|
if (queue.length === 0) await s.write(": keepalive\n\n");
|
|
}
|
|
}
|
|
} finally {
|
|
scanListeners.delete(listener);
|
|
}
|
|
});
|
|
});
|
|
|
|
// ─── Core scan logic ──────────────────────────────────────────────────────────
|
|
|
|
async function runScan(limit: number | null = null): Promise<void> {
|
|
log(`Scan started${limit ? ` (limit: ${limit})` : ""}`);
|
|
scanAbort = new AbortController();
|
|
const { signal } = scanAbort;
|
|
const isDev = process.env.NODE_ENV === "development";
|
|
const db = getDb();
|
|
|
|
if (isDev) {
|
|
// Order matters only if foreign keys are enforced without CASCADE; we
|
|
// have ON DELETE CASCADE on media_streams/review_plans/stream_decisions/
|
|
// subtitle_files/jobs, so deleting media_items would be enough. List
|
|
// them explicitly for clarity and to survive future schema drift.
|
|
db.prepare("DELETE FROM jobs").run();
|
|
db.prepare("DELETE FROM subtitle_files").run();
|
|
db.prepare("DELETE FROM stream_decisions").run();
|
|
db.prepare("DELETE FROM review_plans").run();
|
|
db.prepare("DELETE FROM media_streams").run();
|
|
db.prepare("DELETE FROM media_items").run();
|
|
}
|
|
|
|
const cfg = getAllConfig();
|
|
const jellyfinCfg = { url: cfg.jellyfin_url, apiKey: cfg.jellyfin_api_key, userId: cfg.jellyfin_user_id };
|
|
const subtitleLanguages: string[] = JSON.parse(cfg.subtitle_languages ?? '["eng","deu","spa"]');
|
|
const audioLanguages: string[] = JSON.parse(cfg.audio_languages ?? "[]");
|
|
const radarrEnabled = cfg.radarr_enabled === "1";
|
|
const sonarrEnabled = cfg.sonarr_enabled === "1";
|
|
|
|
let processed = 0;
|
|
let errors = 0;
|
|
let total = 0;
|
|
|
|
const upsertItem = db.prepare(`
|
|
INSERT INTO media_items (
|
|
jellyfin_id, type, name, series_name, series_jellyfin_id,
|
|
season_number, episode_number, year, file_path, file_size, container,
|
|
original_language, orig_lang_source, needs_review,
|
|
imdb_id, tmdb_id, tvdb_id,
|
|
scan_status, last_scanned_at
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'scanned', datetime('now'))
|
|
ON CONFLICT(jellyfin_id) DO UPDATE SET
|
|
type = excluded.type, name = excluded.name, series_name = excluded.series_name,
|
|
series_jellyfin_id = excluded.series_jellyfin_id, season_number = excluded.season_number,
|
|
episode_number = excluded.episode_number, year = excluded.year, file_path = excluded.file_path,
|
|
file_size = excluded.file_size, container = excluded.container,
|
|
original_language = excluded.original_language, orig_lang_source = excluded.orig_lang_source,
|
|
needs_review = excluded.needs_review, imdb_id = excluded.imdb_id,
|
|
tmdb_id = excluded.tmdb_id, tvdb_id = excluded.tvdb_id,
|
|
scan_status = 'scanned', last_scanned_at = datetime('now')
|
|
`);
|
|
|
|
const deleteStreams = db.prepare("DELETE FROM media_streams WHERE item_id = ?");
|
|
const insertStream = db.prepare(`
|
|
INSERT INTO media_streams (
|
|
item_id, stream_index, type, codec, language, language_display,
|
|
title, is_default, is_forced, is_hearing_impaired,
|
|
channels, channel_layout, bit_rate, sample_rate
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
`);
|
|
const upsertPlan = db.prepare(`
|
|
INSERT INTO review_plans (item_id, status, is_noop, confidence, apple_compat, job_type, notes)
|
|
VALUES (?, 'pending', ?, ?, ?, ?, ?)
|
|
ON CONFLICT(item_id) DO UPDATE SET
|
|
status = CASE WHEN review_plans.status IN ('done','error') THEN 'pending' ELSE review_plans.status END,
|
|
is_noop = excluded.is_noop,
|
|
confidence = excluded.confidence,
|
|
apple_compat = excluded.apple_compat,
|
|
job_type = excluded.job_type,
|
|
notes = excluded.notes
|
|
`);
|
|
const upsertDecision = db.prepare(`
|
|
INSERT INTO stream_decisions (plan_id, stream_id, action, target_index, transcode_codec)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
ON CONFLICT(plan_id, stream_id) DO UPDATE SET action = excluded.action, target_index = excluded.target_index, transcode_codec = excluded.transcode_codec
|
|
`);
|
|
const getItemByJellyfinId = db.prepare("SELECT id FROM media_items WHERE jellyfin_id = ?");
|
|
const getPlanByItemId = db.prepare("SELECT id FROM review_plans WHERE item_id = ?");
|
|
const getStreamsByItemId = db.prepare("SELECT * FROM media_streams WHERE item_id = ?");
|
|
|
|
const itemSource = isDev
|
|
? getDevItems(jellyfinCfg)
|
|
: getAllItems(jellyfinCfg, (_fetched, jellyfinTotal) => {
|
|
total = limit != null ? Math.min(limit, jellyfinTotal) : jellyfinTotal;
|
|
});
|
|
for await (const jellyfinItem of itemSource) {
|
|
if (signal.aborted) break;
|
|
if (!isDev && limit != null && processed >= limit) break;
|
|
if (!jellyfinItem.Name || !jellyfinItem.Path) {
|
|
warn(`Skipping item without name/path: id=${jellyfinItem.Id}`);
|
|
continue;
|
|
}
|
|
|
|
processed++;
|
|
emitSse("progress", { scanned: processed, total, current_item: jellyfinItem.Name, errors, running: true });
|
|
|
|
try {
|
|
const providerIds = jellyfinItem.ProviderIds ?? {};
|
|
const imdbId = providerIds.Imdb ?? null;
|
|
const tmdbId = providerIds.Tmdb ?? null;
|
|
const tvdbId = providerIds.Tvdb ?? null;
|
|
|
|
// Jellyfin has no real original_language field; extractOriginalLanguage
|
|
// guesses from the first/default audio stream. That's a DUB MAGNET —
|
|
// files uploaded from non-English regions often have a local dub first,
|
|
// so the "original" comes out as Turkish, German, etc. We record it
|
|
// as a starting point but treat it as unverified.
|
|
const jellyfinGuess = extractOriginalLanguage(jellyfinItem);
|
|
let origLang: string | null = jellyfinGuess;
|
|
let origLangSource = jellyfinGuess ? "jellyfin" : null;
|
|
let needsReview = origLang ? 0 : 1;
|
|
let authoritative = false; // set when Radarr/Sonarr answers
|
|
|
|
if (jellyfinItem.Type === "Movie" && radarrEnabled && (tmdbId || imdbId)) {
|
|
const lang = await radarrLang(
|
|
{ url: cfg.radarr_url, apiKey: cfg.radarr_api_key },
|
|
{ tmdbId: tmdbId ?? undefined, imdbId: imdbId ?? undefined },
|
|
);
|
|
if (lang) {
|
|
if (origLang && normalizeLanguage(origLang) !== normalizeLanguage(lang)) needsReview = 1;
|
|
origLang = lang;
|
|
origLangSource = "radarr";
|
|
authoritative = true;
|
|
}
|
|
}
|
|
|
|
if (jellyfinItem.Type === "Episode" && sonarrEnabled && tvdbId) {
|
|
const lang = await sonarrLang({ url: cfg.sonarr_url, apiKey: cfg.sonarr_api_key }, tvdbId);
|
|
if (lang) {
|
|
if (origLang && normalizeLanguage(origLang) !== normalizeLanguage(lang)) needsReview = 1;
|
|
origLang = lang;
|
|
origLangSource = "sonarr";
|
|
authoritative = true;
|
|
}
|
|
}
|
|
|
|
// High confidence requires an authoritative source (Radarr/Sonarr) and
|
|
// no conflict. A Jellyfin-only guess is ALWAYS low confidence and gets
|
|
// flagged for review — that's how 8 Mile landed as "Turkish": default
|
|
// audio was a Turkish dub, Radarr wasn't available or didn't have the
|
|
// movie, and the guess got a green 'high' badge it never earned.
|
|
let confidence: "high" | "low" = "low";
|
|
if (origLang && authoritative && !needsReview) {
|
|
confidence = "high";
|
|
} else if (origLang && !authoritative) {
|
|
// Jellyfin guess only — surface it for manual review.
|
|
needsReview = 1;
|
|
}
|
|
|
|
upsertItem.run(
|
|
jellyfinItem.Id,
|
|
jellyfinItem.Type === "Episode" ? "Episode" : "Movie",
|
|
jellyfinItem.Name,
|
|
jellyfinItem.SeriesName ?? null,
|
|
jellyfinItem.SeriesId ?? null,
|
|
jellyfinItem.ParentIndexNumber ?? null,
|
|
jellyfinItem.IndexNumber ?? null,
|
|
jellyfinItem.ProductionYear ?? null,
|
|
jellyfinItem.Path,
|
|
jellyfinItem.Size ?? null,
|
|
jellyfinItem.Container ?? null,
|
|
origLang,
|
|
origLangSource,
|
|
needsReview,
|
|
imdbId,
|
|
tmdbId,
|
|
tvdbId,
|
|
);
|
|
|
|
const itemRow = getItemByJellyfinId.get(jellyfinItem.Id) as { id: number };
|
|
const itemId = itemRow.id;
|
|
|
|
deleteStreams.run(itemId);
|
|
for (const jStream of jellyfinItem.MediaStreams ?? []) {
|
|
if (jStream.IsExternal) continue; // skip external subs — not embedded in container
|
|
const s = mapStream(jStream);
|
|
insertStream.run(
|
|
itemId,
|
|
s.stream_index,
|
|
s.type,
|
|
s.codec,
|
|
s.language,
|
|
s.language_display,
|
|
s.title,
|
|
s.is_default,
|
|
s.is_forced,
|
|
s.is_hearing_impaired,
|
|
s.channels,
|
|
s.channel_layout,
|
|
s.bit_rate,
|
|
s.sample_rate,
|
|
);
|
|
}
|
|
|
|
const streams = getStreamsByItemId.all(itemId) as MediaStream[];
|
|
const analysis = analyzeItem(
|
|
{ original_language: origLang, needs_review: needsReview, container: jellyfinItem.Container ?? null },
|
|
streams,
|
|
{ subtitleLanguages, audioLanguages },
|
|
);
|
|
// Override base confidence with scan-computed value
|
|
const finalConfidence = confidence;
|
|
upsertPlan.run(
|
|
itemId,
|
|
analysis.is_noop ? 1 : 0,
|
|
finalConfidence,
|
|
analysis.apple_compat,
|
|
analysis.job_type,
|
|
analysis.notes.length > 0 ? analysis.notes.join("\n") : null,
|
|
);
|
|
const planRow = getPlanByItemId.get(itemId) as { id: number };
|
|
for (const dec of analysis.decisions)
|
|
upsertDecision.run(planRow.id, dec.stream_id, dec.action, dec.target_index, dec.transcode_codec);
|
|
|
|
emitSse("log", { name: jellyfinItem.Name, type: jellyfinItem.Type, status: "scanned", file: jellyfinItem.Path });
|
|
} catch (err) {
|
|
errors++;
|
|
logError(`Error scanning ${jellyfinItem.Name}:`, err);
|
|
try {
|
|
db
|
|
.prepare("UPDATE media_items SET scan_status = 'error', scan_error = ? WHERE jellyfin_id = ?")
|
|
.run(String(err), jellyfinItem.Id);
|
|
} catch {
|
|
/* ignore */
|
|
}
|
|
emitSse("log", { name: jellyfinItem.Name, type: jellyfinItem.Type, status: "error", file: jellyfinItem.Path });
|
|
}
|
|
}
|
|
|
|
setConfig("scan_running", "0");
|
|
log(`Scan complete: ${processed} scanned, ${errors} errors`);
|
|
emitSse("complete", { scanned: processed, total, errors });
|
|
}
|
|
|
|
export default app;
|