Add catalog link page and Jellyfin dedupe

This commit is contained in:
MiTHRAL 2026-05-15 15:38:20 -04:00
parent 942c8c3a54
commit 4d82e0d55b
5 changed files with 250 additions and 26 deletions

View file

@ -5,6 +5,7 @@ ARCHIVE_STATUS_STATE=state/status-message.json
MEDIA_CATALOG_STATE=state/media-catalog.json
MEDIA_LIBRARY_STATE=state/media-library.json
BOT_SETTINGS_STATE=state/bot-settings.json
PUBLIC_CATALOG_URL=
JELLYFIN_SYNC_INTERVAL_SECONDS=900
CHECK_INTERVAL_SECONDS=60
HTTP_USER_AGENT=ArchiveStatusBot/1.0

View file

@ -5,6 +5,7 @@ ARCHIVE_STATUS_STATE=state/status-message.json
MEDIA_CATALOG_STATE=state/media-catalog.json
MEDIA_LIBRARY_STATE=state/media-library.json
BOT_SETTINGS_STATE=state/bot-settings.json
PUBLIC_CATALOG_URL=
JELLYFIN_SYNC_INTERVAL_SECONDS=900
CHECK_INTERVAL_SECONDS=60
HTTP_USER_AGENT=ArchiveStatusBot/1.0

View file

@ -201,7 +201,9 @@ The editor supports adding, editing, and deleting movie/show rows before saving
Discord publishing uses one message with an attached `media-catalog.md` file so the channel does not get flooded by a long embed wall.
You can also sync directly from Jellyfin instead of using CSVs. In Jellyfin, create an API key from the admin dashboard, then enter the Jellyfin URL and key in the `Media` tab. `Sync now` replaces the editable library with the current Jellyfin movies and shows. `Auto-sync changes` checks Jellyfin periodically and republishes only when the catalog fingerprint changes.
The dashboard also serves a read-only catalog page at `/catalog`. Set the public reverse-proxy URL for that page in the `Media` tabs `Catalog URL` field, or with `PUBLIC_CATALOG_URL`, and Discord posts will include an `Open Catalog` button.
You can also sync directly from Jellyfin instead of using CSVs. In Jellyfin, create an API key from the admin dashboard, then enter the Jellyfin URL and key in the `Media` tab. `Sync now` replaces the editable library with the current Jellyfin movies and shows. `Auto-sync changes` checks Jellyfin periodically and republishes only when the catalog fingerprint changes. Jellyfin results are deduplicated across libraries using provider IDs first, then normalized title and year.
Channel selections are stored in:

View file

@ -588,6 +588,10 @@
<label for="mediaChannelId">Channel ID</label>
<input id="mediaChannelId" inputmode="numeric" placeholder="Discord channel ID">
</div>
<div class="media-field">
<label for="catalogUrl">Catalog URL</label>
<input id="catalogUrl" placeholder="https://archive.example.com/catalog">
</div>
<div class="media-field">
<label for="moviesCsv">Movies.csv</label>
<input id="moviesCsv" type="file" accept=".csv,text/csv">
@ -653,7 +657,7 @@
let services = [];
let results = new Map();
let csrfToken = "";
let channels = { statusChannelId: "", mediaChannelId: "" };
let channels = { statusChannelId: "", mediaChannelId: "", catalogUrl: "" };
let mediaLibrary = { movies: [], shows: [] };
let activeMediaTab = "movies";
let jellyfin = { url: "", configured: false, autoSync: false };
@ -738,7 +742,8 @@
function renderSummary(payload) {
channels = payload.channels || {
statusChannelId: payload.channelId || channels.statusChannelId || "",
mediaChannelId: channels.mediaChannelId || payload.channelId || ""
mediaChannelId: channels.mediaChannelId || payload.channelId || "",
catalogUrl: channels.catalogUrl || ""
};
document.querySelector("#statusChannelId").value = channels.statusChannelId || "";
const online = payload.results.filter((result) => result.ok).length;
@ -851,6 +856,10 @@
function renderMediaStatus(payload) {
channels.mediaChannelId = payload.channelId || channels.mediaChannelId || "";
document.querySelector("#mediaChannelId").value = channels.mediaChannelId;
if (payload.channels) {
channels = payload.channels;
}
document.querySelector("#catalogUrl").value = channels.catalogUrl || "";
if (payload.library) {
mediaLibrary = normalizeMediaLibrary(payload.library);
renderMediaLibrary();
@ -912,6 +921,7 @@
async function syncJellyfin(forcePublish = false) {
setJellyfinMessage(forcePublish ? "Syncing and publishing..." : "Syncing Jellyfin...");
await saveChannelSettings();
const settings = currentJellyfinSettings();
const payload = await api("/api/jellyfin/sync", {
method: "POST",
@ -1029,7 +1039,8 @@
function currentChannelSettings() {
const statusChannelId = document.querySelector("#statusChannelId").value.trim() || channels.statusChannelId || "";
const mediaChannelId = document.querySelector("#mediaChannelId").value.trim() || channels.mediaChannelId || statusChannelId;
return { statusChannelId, mediaChannelId };
const catalogUrl = document.querySelector("#catalogUrl").value.trim() || channels.catalogUrl || "";
return { statusChannelId, mediaChannelId, catalogUrl };
}
async function saveChannelSettings() {
@ -1040,6 +1051,7 @@
channels = payload.channels || currentChannelSettings();
document.querySelector("#statusChannelId").value = channels.statusChannelId || "";
document.querySelector("#mediaChannelId").value = channels.mediaChannelId || "";
document.querySelector("#catalogUrl").value = channels.catalogUrl || "";
return channels;
}
@ -1099,6 +1111,7 @@
method: "POST",
body: JSON.stringify({
channelId: document.querySelector("#mediaChannelId").value.trim() || channels.mediaChannelId,
catalogUrl: document.querySelector("#catalogUrl").value.trim() || channels.catalogUrl,
movies: mediaLibrary.movies,
shows: mediaLibrary.shows
})

View file

@ -8,6 +8,7 @@ import asyncio
import csv
import hashlib
import hmac
import html
import io
import json
import os
@ -590,13 +591,25 @@ def channel_settings(runtime: BotRuntime) -> dict[str, str]:
data = load_state(runtime.settings_path)
status_channel = str(data.get("status_channel_id", "")).strip() or runtime.default_channel_id
media_channel = str(data.get("media_channel_id", "")).strip() or status_channel
catalog_url = str(data.get("catalog_url", "")).strip() or os.getenv("PUBLIC_CATALOG_URL", "").strip()
return {
"statusChannelId": status_channel,
"mediaChannelId": media_channel,
"catalogUrl": catalog_url,
}
def save_channel_settings(runtime: BotRuntime, status_channel_id: str, media_channel_id: str) -> dict[str, str]:
def validate_catalog_url(value: str) -> str:
catalog_url = value.strip()
if not catalog_url:
return ""
parsed = urllib.parse.urlparse(catalog_url)
if parsed.scheme not in {"http", "https"} or not parsed.netloc:
raise ValueError("Catalog URL must be a valid http(s) URL")
return catalog_url
def save_channel_settings(runtime: BotRuntime, status_channel_id: str, media_channel_id: str, catalog_url: str = "") -> dict[str, str]:
status_channel = validate_channel_id(status_channel_id, "Status")
media_channel = validate_channel_id(media_channel_id or status_channel, "Media")
state = load_state(runtime.settings_path)
@ -604,6 +617,7 @@ def save_channel_settings(runtime: BotRuntime, status_channel_id: str, media_cha
{
"status_channel_id": status_channel,
"media_channel_id": media_channel,
"catalog_url": validate_catalog_url(catalog_url),
"updated_at": datetime.now(timezone.utc).isoformat(),
}
)
@ -655,6 +669,14 @@ def save_jellyfin_settings(runtime: BotRuntime, data: dict[str, Any]) -> dict[st
return jellyfin_settings(runtime)
def save_catalog_url_setting(runtime: BotRuntime, catalog_url: str) -> dict[str, str]:
state = load_state(runtime.settings_path)
state["catalog_url"] = validate_catalog_url(catalog_url)
state["updated_at"] = datetime.now(timezone.utc).isoformat()
save_state(runtime.settings_path, state)
return channel_settings(runtime)
def normalize_csv_key(value: str) -> str:
return "".join(character for character in value.lower() if character.isalnum())
@ -1042,7 +1064,7 @@ def fetch_jellyfin_items(settings: dict[str, Any], item_type: str) -> list[dict[
{
"Recursive": "true",
"IncludeItemTypes": item_type,
"Fields": "Genres,Overview,OfficialRating,RecursiveItemCount,ChildCount,PremiereDate,RunTimeTicks",
"Fields": "Genres,Overview,OfficialRating,RecursiveItemCount,ChildCount,PremiereDate,RunTimeTicks,ProviderIds",
"SortBy": "SortName",
"SortOrder": "Ascending",
"EnableImages": "false",
@ -1060,14 +1082,63 @@ def fetch_jellyfin_items(settings: dict[str, Any], item_type: str) -> list[dict[
start_index += limit
def jellyfin_item_dedupe_key(item: dict[str, Any], item_type: str) -> tuple[str, str]:
provider_ids = item.get("ProviderIds")
if isinstance(provider_ids, dict):
for provider in ("Tmdb", "Imdb", "Tvdb"):
value = str(provider_ids.get(provider, "")).strip().casefold()
if value:
return item_type.casefold(), f"{provider.casefold()}:{value}"
title = clean_media_text(str(item.get("Name", "")), 120) or ""
year = jellyfin_item_year(item) or ""
return item_type.casefold(), f"title:{title.casefold()}:{year}"
def dedupe_jellyfin_items(items: list[dict[str, Any]], item_type: str) -> list[dict[str, Any]]:
provider_deduped: dict[tuple[str, str], dict[str, Any]] = {}
for item in items:
key = jellyfin_item_dedupe_key(item, item_type)
current = provider_deduped.get(key)
if current is None:
provider_deduped[key] = item
continue
current_overview = str(current.get("Overview", "") or current.get("ShortOverview", ""))
next_overview = str(item.get("Overview", "") or item.get("ShortOverview", ""))
if len(next_overview) > len(current_overview):
provider_deduped[key] = item
title_deduped: dict[tuple[str, str], dict[str, Any]] = {}
for item in provider_deduped.values():
title = clean_media_text(str(item.get("Name", "")), 120) or ""
key = (title.casefold(), jellyfin_item_year(item) or "")
current = title_deduped.get(key)
if current is None:
title_deduped[key] = item
continue
current_has_provider = bool(current.get("ProviderIds"))
next_has_provider = bool(item.get("ProviderIds"))
if next_has_provider and not current_has_provider:
title_deduped[key] = item
continue
current_overview = str(current.get("Overview", "") or current.get("ShortOverview", ""))
next_overview = str(item.get("Overview", "") or item.get("ShortOverview", ""))
if len(next_overview) > len(current_overview):
title_deduped[key] = item
return sorted(title_deduped.values(), key=lambda item: (str(item.get("SortName", item.get("Name", ""))).casefold(), str(item.get("ProductionYear", ""))))
def fetch_jellyfin_library(runtime: BotRuntime) -> tuple[list[MediaItem], list[MediaItem]]:
state = load_state(runtime.settings_path)
settings = {
"url": str(state.get("jellyfin_url", "")).strip(),
"apiKey": str(state.get("jellyfin_api_key", "")).strip(),
}
movies = [jellyfin_movie_from_item(item) for item in fetch_jellyfin_items(settings, "Movie")]
shows = [jellyfin_show_from_item(item) for item in fetch_jellyfin_items(settings, "Series")]
movie_items = dedupe_jellyfin_items(fetch_jellyfin_items(settings, "Movie"), "Movie")
show_items = dedupe_jellyfin_items(fetch_jellyfin_items(settings, "Series"), "Series")
movies = [jellyfin_movie_from_item(item) for item in movie_items]
shows = [jellyfin_show_from_item(item) for item in show_items]
return (
sorted(movies, key=lambda item: (item.title.casefold(), item.year or "")),
sorted(shows, key=lambda item: (item.title.casefold(), item.year or "")),
@ -1312,11 +1383,109 @@ def render_media_catalog_markdown(movies: list[MediaItem], shows: list[MediaItem
return "\n".join(lines).strip() + "\n"
def render_catalog_html(runtime: BotRuntime) -> bytes:
movies, shows = load_media_library(runtime)
updated_at = load_state(runtime.media_library_path).get("updated_at")
updated = "Never"
if updated_at:
try:
updated = datetime.fromisoformat(str(updated_at)).strftime("%Y-%m-%d %H:%M UTC")
except ValueError:
updated = str(updated_at)
def item_block(item: MediaItem) -> str:
meta = []
if item.year:
meta.append(item.year)
if item.genres:
meta.append(item.genres)
if item.rating:
meta.append(item.rating)
if item.runtime:
meta.append(item.runtime)
if item.media_type == "show":
if item.seasons:
meta.append(f"{item.seasons} season{'s' if item.seasons != 1 else ''}")
if item.episodes:
meta.append(f"{item.episodes} episode{'s' if item.episodes != 1 else ''}")
summary = f"<p>{html.escape(item.summary)}</p>" if item.summary else ""
return (
f'<article class="item" data-type="{html.escape(item.media_type)}" '
f'data-search="{html.escape((item.title + " " + (item.genres or "") + " " + (item.year or "")).casefold())}">'
f"<h2>{html.escape(item.title)}</h2>"
f'<div class="meta">{html.escape(" · ".join(meta))}</div>'
f"{summary}</article>"
)
items = "\n".join(item_block(item) for item in [*movies, *shows])
body = f"""<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>The Mithral Archive Media Catalog</title>
<style>
:root {{ color-scheme: dark; --bg: #111214; --panel: #1b1d21; --line: #30343b; --text: #f1f2f4; --muted: #a2a8b3; --action: #e7e9ed; --action-text: #15171a; }}
* {{ box-sizing: border-box; }}
body {{ margin: 0; background: var(--bg); color: var(--text); font: 14px/1.45 ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Helvetica Neue", sans-serif; }}
header {{ position: sticky; top: 0; z-index: 1; background: #15171a; border-bottom: 1px solid var(--line); padding: 14px 18px; }}
h1 {{ margin: 0 0 10px; font-size: 20px; letter-spacing: 0; }}
.controls {{ display: grid; grid-template-columns: minmax(180px, 1fr) auto auto auto; gap: 8px; align-items: center; }}
input, button {{ border: 1px solid var(--line); border-radius: 6px; background: #14161a; color: var(--text); padding: 8px 9px; font: inherit; }}
button.active {{ background: var(--action); color: var(--action-text); border-color: var(--action); }}
main {{ padding: 18px; display: grid; gap: 1px; background: var(--line); }}
.item {{ background: var(--panel); padding: 13px 14px; }}
.item[hidden] {{ display: none; }}
h2 {{ margin: 0 0 4px; font-size: 16px; }}
.meta {{ color: var(--muted); }}
p {{ margin: 8px 0 0; color: #d5d8de; }}
.stats {{ color: var(--muted); margin-bottom: 8px; }}
@media (max-width: 720px) {{ .controls {{ grid-template-columns: 1fr 1fr; }} .controls input {{ grid-column: 1 / -1; }} }}
</style>
</head>
<body>
<header>
<h1>The Mithral Archive Media Catalog</h1>
<div class="stats">{len(movies)} movies · {len(shows)} shows · Updated {html.escape(updated)}</div>
<div class="controls">
<input id="search" type="search" placeholder="Search title, genre, or year" autocomplete="off">
<button class="active" type="button" data-filter="all">All</button>
<button type="button" data-filter="movie">Movies</button>
<button type="button" data-filter="show">Shows</button>
</div>
</header>
<main id="items">{items}</main>
<script>
const search = document.querySelector("#search");
const buttons = document.querySelectorAll("[data-filter]");
let filter = "all";
function applyFilter() {{
const query = search.value.trim().toLowerCase();
document.querySelectorAll(".item").forEach((item) => {{
const typeMatch = filter === "all" || item.dataset.type === filter;
const searchMatch = !query || item.dataset.search.includes(query);
item.hidden = !(typeMatch && searchMatch);
}});
}}
search.addEventListener("input", applyFilter);
buttons.forEach((button) => button.addEventListener("click", () => {{
filter = button.dataset.filter;
buttons.forEach((item) => item.classList.toggle("active", item === button));
applyFilter();
}}));
</script>
</body>
</html>
"""
return body.encode("utf-8")
def publish_media_markdown_message(
token: str,
channel_id: str,
movies: list[MediaItem],
shows: list[MediaItem],
catalog_url: str = "",
) -> str:
markdown = render_media_catalog_markdown(movies, shows)
payload = {
@ -1327,6 +1496,20 @@ def publish_media_markdown_message(
),
"allowed_mentions": {"parse": []},
}
if catalog_url:
payload["components"] = [
{
"type": 1,
"components": [
{
"type": 2,
"style": 5,
"label": "Open Catalog",
"url": catalog_url,
}
],
}
]
message = discord_multipart_request(
"POST",
token,
@ -1368,7 +1551,13 @@ def publish_media_items(
for old_id in existing_ids:
discord_delete_message(runtime.token, delete_channel, old_id)
message_id = publish_media_markdown_message(runtime.token, channel, movies_all, shows_all)
message_id = publish_media_markdown_message(
runtime.token,
channel,
movies_all,
shows_all,
catalog_url=settings.get("catalogUrl", ""),
)
save_state(
runtime.media_state_path,
@ -1421,6 +1610,7 @@ def media_catalog_status(runtime: BotRuntime) -> dict[str, Any]:
movies, shows = load_media_library(runtime)
return {
"channelId": str(state.get("channel_id", "")).strip() or settings["mediaChannelId"],
"channels": settings,
"messageIds": state.get("message_ids", []) if isinstance(state.get("message_ids"), list) else [],
"movieCount": state.get("movie_count"),
"showCount": state.get("show_count"),
@ -1666,14 +1856,18 @@ def make_dashboard_handler(runtime: BotRuntime, auth: DashboardAuth | None) -> t
print(f"[dashboard] {self.address_string()} - {format % args}", flush=True)
def do_GET(self) -> None:
if self.path in {"/", "/dashboard"}:
path = urllib.parse.urlparse(self.path).path
if path in {"/", "/dashboard"}:
self.send_dashboard()
return
if self.path == "/favicon.ico":
if path == "/catalog":
self.send_catalog()
return
if path == "/favicon.ico":
self.send_response(HTTPStatus.NO_CONTENT)
self.end_headers()
return
if self.path == "/api/session":
if path == "/api/session":
session = self.require_auth()
if session is None:
return
@ -1686,22 +1880,22 @@ def make_dashboard_handler(runtime: BotRuntime, auth: DashboardAuth | None) -> t
},
)
return
if self.path == "/api/status":
if path == "/api/status":
if self.require_auth() is None:
return
self.send_json(HTTPStatus.OK, runtime_status(runtime))
return
if self.path == "/api/media":
if path == "/api/media":
if self.require_auth() is None:
return
self.send_json(HTTPStatus.OK, media_catalog_status(runtime))
return
if self.path == "/api/settings":
if path == "/api/settings":
if self.require_auth() is None:
return
self.send_json(HTTPStatus.OK, {"channels": channel_settings(runtime)})
return
if self.path == "/api/jellyfin":
if path == "/api/jellyfin":
if self.require_auth() is None:
return
self.send_json(HTTPStatus.OK, {"jellyfin": jellyfin_settings(runtime)})
@ -1709,37 +1903,38 @@ def make_dashboard_handler(runtime: BotRuntime, auth: DashboardAuth | None) -> t
self.send_error(HTTPStatus.NOT_FOUND)
def do_POST(self) -> None:
if self.path == "/api/login":
path = urllib.parse.urlparse(self.path).path
if path == "/api/login":
self.handle_login()
return
session = self.require_auth(require_csrf=True)
if session is None:
return
if self.path == "/api/logout":
if path == "/api/logout":
self.handle_logout(session[0])
return
if self.path == "/api/check":
if path == "/api/check":
self.handle_check()
return
if self.path == "/api/services":
if path == "/api/services":
self.handle_services()
return
if self.path == "/api/media":
if path == "/api/media":
self.handle_media_catalog()
return
if self.path == "/api/media/import":
if path == "/api/media/import":
self.handle_media_import()
return
if self.path == "/api/media/library":
if path == "/api/media/library":
self.handle_media_library()
return
if self.path == "/api/settings":
if path == "/api/settings":
self.handle_settings()
return
if self.path == "/api/jellyfin/settings":
if path == "/api/jellyfin/settings":
self.handle_jellyfin_settings()
return
if self.path == "/api/jellyfin/sync":
if path == "/api/jellyfin/sync":
self.handle_jellyfin_sync()
return
self.send_error(HTTPStatus.NOT_FOUND)
@ -1798,6 +1993,15 @@ def make_dashboard_handler(runtime: BotRuntime, auth: DashboardAuth | None) -> t
self.end_headers()
self.wfile.write(body)
def send_catalog(self) -> None:
body = render_catalog_html(runtime)
self.send_response(HTTPStatus.OK)
self.send_header("Content-Type", "text/html; charset=utf-8")
self.send_header("Cache-Control", "no-store")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
def read_json(self) -> dict[str, Any]:
raw_length = self.headers.get("Content-Length", "0")
try:
@ -1918,6 +2122,8 @@ def make_dashboard_handler(runtime: BotRuntime, auth: DashboardAuth | None) -> t
try:
data = self.read_json()
if "movies" in data or "shows" in data:
if "catalogUrl" in data:
save_catalog_url_setting(runtime, str(data.get("catalogUrl", "")))
movies = media_items_from_data(data.get("movies", []), "movie")
shows = media_items_from_data(data.get("shows", []), "show")
save_media_library(runtime, movies, shows)
@ -1992,6 +2198,7 @@ def make_dashboard_handler(runtime: BotRuntime, auth: DashboardAuth | None) -> t
runtime,
str(channels.get("statusChannelId", "")),
str(channels.get("mediaChannelId", "")),
str(channels.get("catalogUrl", "")),
)
except Exception as exc:
self.send_json(HTTPStatus.BAD_REQUEST, {"error": str(exc)})