From 3554b1612fe2fbb993811d3c6b7465c3ea114a8e Mon Sep 17 00:00:00 2001 From: rambros Date: Fri, 27 Mar 2026 13:00:03 +0530 Subject: [PATCH] unify content_type & mime_type --- src/core/backup_database.py | 41 +++++++++++++++++++++++++++++------- src/core/backup_reader.py | 12 +++++++---- src/core/exporter.py | 14 ++++++------ src/fluxer/writer.py | 10 ++++----- src/stoat/migrate_message.py | 12 +++++++++-- src/stoat/writer.py | 5 ++++- 6 files changed, 67 insertions(+), 27 deletions(-) diff --git a/src/core/backup_database.py b/src/core/backup_database.py index f081bcc..ecbd1b8 100644 --- a/src/core/backup_database.py +++ b/src/core/backup_database.py @@ -31,8 +31,33 @@ class BackupDatabase: self._conn.execute("PRAGMA journal_mode=WAL") self._conn.execute("PRAGMA synchronous=NORMAL") self._conn.execute("PRAGMA cache_size=-32000") # 32 MB page cache + self._migrate_db() # Run migrations on existing DBs self._init_db() + # A temporary function to handle backward compatibility for the content_type (mime_type will be deprecated) + # It will be removed in the future + def _migrate_db(self): + """Handles backward compatibility by renaming columns in existing databases.""" + with self._lock: + # Check 'media_pool' table + res = self._conn.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='media_pool'").fetchone() + if res[0] > 0: + cols = self._conn.execute("PRAGMA table_info(media_pool)").fetchall() + col_names = [c["name"] for c in cols] + if "mime_type" in col_names and "content_type" not in col_names: + logger.info("Migrating media_pool: renaming 'mime_type' to 'content_type'") + self._conn.execute("ALTER TABLE media_pool RENAME COLUMN mime_type TO content_type") + + # Check 'server_assets' table + res = self._conn.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='server_assets'").fetchone() + if res[0] > 0: + cols = self._conn.execute("PRAGMA table_info(server_assets)").fetchall() + col_names = [c["name"] for c in cols] + if "mime_type" in col_names and "content_type" not in col_names: + logger.info("Migrating server_assets: renaming 'mime_type' to 'content_type'") + self._conn.execute("ALTER TABLE server_assets RENAME COLUMN mime_type TO content_type") + self._conn.commit() + def _init_db(self): """Initializes the database schema.""" with self._lock: @@ -224,7 +249,7 @@ class BackupDatabase: hash TEXT PRIMARY KEY, local_path TEXT, size INTEGER, - mime_type TEXT, + content_type TEXT, first_seen_url TEXT ) """) @@ -237,7 +262,7 @@ class BackupDatabase: type TEXT, filename TEXT, url TEXT, - mime_type TEXT + content_type TEXT ) """) @@ -327,13 +352,13 @@ class BackupDatabase: "type": a.get("type"), "filename": a.get("filename"), "url": a.get("url"), - "mime_type": a.get("mime_type") + "content_type": a.get("content_type") } for a in assets ] self._conn.executemany(""" - INSERT OR REPLACE INTO server_assets (id, name, type, filename, url, mime_type) - VALUES (:id, :name, :type, :filename, :url, :mime_type) + INSERT OR REPLACE INTO server_assets (id, name, type, filename, url, content_type) + VALUES (:id, :name, :type, :filename, :url, :content_type) """, formatted) self._conn.commit() @@ -457,13 +482,13 @@ class BackupDatabase: row = self._conn.execute("SELECT * FROM media_pool WHERE first_seen_url = ?", (url,)).fetchone() return dict(row) if row else None - def add_media_to_pool(self, file_hash: str, local_path: str, size: int, mime_type: str, url: str): + def add_media_to_pool(self, file_hash: str, local_path: str, size: int, content_type: str, url: str): # NOTE: No commit here — caller (save_messages_batch) commits at end of batch with self._lock: self._conn.execute(""" - INSERT OR REPLACE INTO media_pool (hash, local_path, size, mime_type, first_seen_url) + INSERT OR REPLACE INTO media_pool (hash, local_path, size, content_type, first_seen_url) VALUES (?, ?, ?, ?, ?) - """, (file_hash, str(local_path), size, mime_type, url)) + """, (file_hash, str(local_path), size, content_type, url)) def get_stats_by_channel(self) -> Dict[int, Dict[str, Any]]: """Returns aggregate stats for all channels with backups.""" with self._lock: diff --git a/src/core/backup_reader.py b/src/core/backup_reader.py index c6463e8..408e16e 100644 --- a/src/core/backup_reader.py +++ b/src/core/backup_reader.py @@ -444,7 +444,7 @@ class BackupMember: class BackupAttachment: """Minimal stand-in for discord.Attachment.""" - __slots__ = ("id", "filename", "size", "url", "proxy_url", "_backup_root", "local_hash") + __slots__ = ("id", "filename", "size", "url", "proxy_url", "_backup_root", "local_hash", "content_type") def __init__(self, data: dict, backup_root: Path | None = None, media_pool: dict | None = None): if not isinstance(data, dict): @@ -458,10 +458,14 @@ class BackupAttachment: self.proxy_url = self.url self._backup_root = backup_root self.local_hash = data.get("local_hash") + self.content_type = data.get("content_type") - # Resolve local path via media pool if possible + # Resolve local path and ensure content_type via media pool if possible if media_pool and self.local_hash in media_pool: - self.url = media_pool[self.local_hash]["local_path"] + pool_entry = media_pool[self.local_hash] + self.url = pool_entry["local_path"] + if not self.content_type: + self.content_type = pool_entry.get("content_type") elif self.local_hash: # Fallback conjecture if pool didn't have it (e.g. ad-hoc load) pass @@ -493,7 +497,7 @@ class BackupEmoji: return self.id = parse_snowflake(data["id"]) self.name = data["name"] - self.animated = data.get("mime_type") == "image/gif" + self.animated = data.get("content_type") == "image/gif" filename = data.get("filename", "") self._file_path = media_dir / filename if media_dir and filename else None # Use the local path if available, else original URL diff --git a/src/core/exporter.py b/src/core/exporter.py index 1ca5740..e60aaf7 100644 --- a/src/core/exporter.py +++ b/src/core/exporter.py @@ -173,7 +173,7 @@ class DiscordExporter: "type": "emoji", "filename": filename, "url": str(e.url), - "mime_type": "image/gif" if e.animated else "image/png" + "content_type": "image/gif" if e.animated else "image/png" }) except Exception as ex: logger.error(f"Failed to download emoji {e.name}: {ex}") @@ -191,10 +191,10 @@ class DiscordExporter: with open(sticker_path, "wb") as f: f.write(data) - mime_type = "image/png" - if ext == "json": mime_type = "application/json" - elif ext == "gif": mime_type = "image/gif" - elif ext == "webp": mime_type = "image/webp" + content_type = "image/png" + if ext == "json": content_type = "application/json" + elif ext == "gif": content_type = "image/gif" + elif ext == "webp": content_type = "image/webp" sticker_data.append({ "id": str(s.id), @@ -202,7 +202,7 @@ class DiscordExporter: "type": "sticker", "filename": filename, "url": str(s.url), - "mime_type": mime_type + "content_type": content_type }) except Exception as ex: logger.error(f"Failed to download sticker {getattr(s, 'name', 'unknown')}: {ex}") @@ -631,7 +631,7 @@ class DiscordExporter: "filename": filename, "size": existing["size"], "url": str(url), - "content_type": existing["mime_type"], + "content_type": existing["content_type"], "local_hash": existing["hash"] } diff --git a/src/fluxer/writer.py b/src/fluxer/writer.py index beb74de..eb5c21b 100644 --- a/src/fluxer/writer.py +++ b/src/fluxer/writer.py @@ -454,14 +454,14 @@ class FluxerWriter: import base64 image_data = base64.b64encode(banner).decode("ascii") if banner.startswith(b"\x89PNG"): - mime_type = "image/png" + content_type = "image/png" elif banner.startswith(b"\xff\xd8\xff"): - mime_type = "image/jpeg" + content_type = "image/jpeg" elif banner.startswith(b"GIF89a") or banner.startswith(b"GIF87a"): - mime_type = "image/gif" + content_type = "image/gif" else: - mime_type = "image/png" - kwargs["banner"] = f"data:{mime_type};base64,{image_data}" + content_type = "image/png" + kwargs["banner"] = f"data:{content_type};base64,{image_data}" try: await self.client.modify_guild( diff --git a/src/stoat/migrate_message.py b/src/stoat/migrate_message.py index c872fde..c0861bd 100644 --- a/src/stoat/migrate_message.py +++ b/src/stoat/migrate_message.py @@ -440,7 +440,11 @@ async def migrate_messages( for att in attachments_to_process: try: att_data = await context.discord_reader.download_attachment(att) - files.append({"filename": att.filename, "data": att_data}) + files.append({ + "filename": att.filename, + "data": att_data, + "content_type": getattr(att, "content_type", None) + }) stats["attachments"] += 1 except Exception as e: logger.error(f"Failed to download attachment {att.filename}: {e}") @@ -528,7 +532,11 @@ async def migrate_messages( # Keep original apng as fallback filename = f"sticker_{s.name}_{s.id}.{ext}" - files.append({"filename": filename, "data": sticker_data}) + files.append({ + "filename": filename, + "data": sticker_data, + "content_type": f"image/{ext}" if ext != "json" else "application/json" + }) stats["attachments"] += 1 logger.debug(f"Added sticker {s.name} as attachment (extension: {ext})") except Exception as e: diff --git a/src/stoat/writer.py b/src/stoat/writer.py index 75e57e5..77ed74e 100644 --- a/src/stoat/writer.py +++ b/src/stoat/writer.py @@ -341,7 +341,10 @@ class StoatWriter: if files: attachments = [] for f in files: - attachments.append((f["filename"], f["data"])) + if f.get("content_type"): + attachments.append((f["filename"], f["data"], f["content_type"])) + else: + attachments.append((f["filename"], f["data"])) try: # Stoat requires SendableEmbed objects, not raw dicts