unify content_type & mime_type

This commit is contained in:
rambros 2026-03-27 13:00:03 +05:30
parent 93c54c4d0f
commit 3554b1612f
6 changed files with 67 additions and 27 deletions

View file

@ -31,8 +31,33 @@ class BackupDatabase:
self._conn.execute("PRAGMA journal_mode=WAL") self._conn.execute("PRAGMA journal_mode=WAL")
self._conn.execute("PRAGMA synchronous=NORMAL") self._conn.execute("PRAGMA synchronous=NORMAL")
self._conn.execute("PRAGMA cache_size=-32000") # 32 MB page cache self._conn.execute("PRAGMA cache_size=-32000") # 32 MB page cache
self._migrate_db() # Run migrations on existing DBs
self._init_db() self._init_db()
# A temporary function to handle backward compatibility for the content_type (mime_type will be deprecated)
# It will be removed in the future
def _migrate_db(self):
"""Handles backward compatibility by renaming columns in existing databases."""
with self._lock:
# Check 'media_pool' table
res = self._conn.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='media_pool'").fetchone()
if res[0] > 0:
cols = self._conn.execute("PRAGMA table_info(media_pool)").fetchall()
col_names = [c["name"] for c in cols]
if "mime_type" in col_names and "content_type" not in col_names:
logger.info("Migrating media_pool: renaming 'mime_type' to 'content_type'")
self._conn.execute("ALTER TABLE media_pool RENAME COLUMN mime_type TO content_type")
# Check 'server_assets' table
res = self._conn.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='server_assets'").fetchone()
if res[0] > 0:
cols = self._conn.execute("PRAGMA table_info(server_assets)").fetchall()
col_names = [c["name"] for c in cols]
if "mime_type" in col_names and "content_type" not in col_names:
logger.info("Migrating server_assets: renaming 'mime_type' to 'content_type'")
self._conn.execute("ALTER TABLE server_assets RENAME COLUMN mime_type TO content_type")
self._conn.commit()
def _init_db(self): def _init_db(self):
"""Initializes the database schema.""" """Initializes the database schema."""
with self._lock: with self._lock:
@ -224,7 +249,7 @@ class BackupDatabase:
hash TEXT PRIMARY KEY, hash TEXT PRIMARY KEY,
local_path TEXT, local_path TEXT,
size INTEGER, size INTEGER,
mime_type TEXT, content_type TEXT,
first_seen_url TEXT first_seen_url TEXT
) )
""") """)
@ -237,7 +262,7 @@ class BackupDatabase:
type TEXT, type TEXT,
filename TEXT, filename TEXT,
url TEXT, url TEXT,
mime_type TEXT content_type TEXT
) )
""") """)
@ -327,13 +352,13 @@ class BackupDatabase:
"type": a.get("type"), "type": a.get("type"),
"filename": a.get("filename"), "filename": a.get("filename"),
"url": a.get("url"), "url": a.get("url"),
"mime_type": a.get("mime_type") "content_type": a.get("content_type")
} }
for a in assets for a in assets
] ]
self._conn.executemany(""" self._conn.executemany("""
INSERT OR REPLACE INTO server_assets (id, name, type, filename, url, mime_type) INSERT OR REPLACE INTO server_assets (id, name, type, filename, url, content_type)
VALUES (:id, :name, :type, :filename, :url, :mime_type) VALUES (:id, :name, :type, :filename, :url, :content_type)
""", formatted) """, formatted)
self._conn.commit() self._conn.commit()
@ -457,13 +482,13 @@ class BackupDatabase:
row = self._conn.execute("SELECT * FROM media_pool WHERE first_seen_url = ?", (url,)).fetchone() row = self._conn.execute("SELECT * FROM media_pool WHERE first_seen_url = ?", (url,)).fetchone()
return dict(row) if row else None return dict(row) if row else None
def add_media_to_pool(self, file_hash: str, local_path: str, size: int, mime_type: str, url: str): def add_media_to_pool(self, file_hash: str, local_path: str, size: int, content_type: str, url: str):
# NOTE: No commit here — caller (save_messages_batch) commits at end of batch # NOTE: No commit here — caller (save_messages_batch) commits at end of batch
with self._lock: with self._lock:
self._conn.execute(""" self._conn.execute("""
INSERT OR REPLACE INTO media_pool (hash, local_path, size, mime_type, first_seen_url) INSERT OR REPLACE INTO media_pool (hash, local_path, size, content_type, first_seen_url)
VALUES (?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?)
""", (file_hash, str(local_path), size, mime_type, url)) """, (file_hash, str(local_path), size, content_type, url))
def get_stats_by_channel(self) -> Dict[int, Dict[str, Any]]: def get_stats_by_channel(self) -> Dict[int, Dict[str, Any]]:
"""Returns aggregate stats for all channels with backups.""" """Returns aggregate stats for all channels with backups."""
with self._lock: with self._lock:

View file

@ -444,7 +444,7 @@ class BackupMember:
class BackupAttachment: class BackupAttachment:
"""Minimal stand-in for discord.Attachment.""" """Minimal stand-in for discord.Attachment."""
__slots__ = ("id", "filename", "size", "url", "proxy_url", "_backup_root", "local_hash") __slots__ = ("id", "filename", "size", "url", "proxy_url", "_backup_root", "local_hash", "content_type")
def __init__(self, data: dict, backup_root: Path | None = None, media_pool: dict | None = None): def __init__(self, data: dict, backup_root: Path | None = None, media_pool: dict | None = None):
if not isinstance(data, dict): if not isinstance(data, dict):
@ -458,10 +458,14 @@ class BackupAttachment:
self.proxy_url = self.url self.proxy_url = self.url
self._backup_root = backup_root self._backup_root = backup_root
self.local_hash = data.get("local_hash") self.local_hash = data.get("local_hash")
self.content_type = data.get("content_type")
# Resolve local path via media pool if possible # Resolve local path and ensure content_type via media pool if possible
if media_pool and self.local_hash in media_pool: if media_pool and self.local_hash in media_pool:
self.url = media_pool[self.local_hash]["local_path"] pool_entry = media_pool[self.local_hash]
self.url = pool_entry["local_path"]
if not self.content_type:
self.content_type = pool_entry.get("content_type")
elif self.local_hash: elif self.local_hash:
# Fallback conjecture if pool didn't have it (e.g. ad-hoc load) # Fallback conjecture if pool didn't have it (e.g. ad-hoc load)
pass pass
@ -493,7 +497,7 @@ class BackupEmoji:
return return
self.id = parse_snowflake(data["id"]) self.id = parse_snowflake(data["id"])
self.name = data["name"] self.name = data["name"]
self.animated = data.get("mime_type") == "image/gif" self.animated = data.get("content_type") == "image/gif"
filename = data.get("filename", "") filename = data.get("filename", "")
self._file_path = media_dir / filename if media_dir and filename else None self._file_path = media_dir / filename if media_dir and filename else None
# Use the local path if available, else original URL # Use the local path if available, else original URL

View file

@ -173,7 +173,7 @@ class DiscordExporter:
"type": "emoji", "type": "emoji",
"filename": filename, "filename": filename,
"url": str(e.url), "url": str(e.url),
"mime_type": "image/gif" if e.animated else "image/png" "content_type": "image/gif" if e.animated else "image/png"
}) })
except Exception as ex: except Exception as ex:
logger.error(f"Failed to download emoji {e.name}: {ex}") logger.error(f"Failed to download emoji {e.name}: {ex}")
@ -191,10 +191,10 @@ class DiscordExporter:
with open(sticker_path, "wb") as f: with open(sticker_path, "wb") as f:
f.write(data) f.write(data)
mime_type = "image/png" content_type = "image/png"
if ext == "json": mime_type = "application/json" if ext == "json": content_type = "application/json"
elif ext == "gif": mime_type = "image/gif" elif ext == "gif": content_type = "image/gif"
elif ext == "webp": mime_type = "image/webp" elif ext == "webp": content_type = "image/webp"
sticker_data.append({ sticker_data.append({
"id": str(s.id), "id": str(s.id),
@ -202,7 +202,7 @@ class DiscordExporter:
"type": "sticker", "type": "sticker",
"filename": filename, "filename": filename,
"url": str(s.url), "url": str(s.url),
"mime_type": mime_type "content_type": content_type
}) })
except Exception as ex: except Exception as ex:
logger.error(f"Failed to download sticker {getattr(s, 'name', 'unknown')}: {ex}") logger.error(f"Failed to download sticker {getattr(s, 'name', 'unknown')}: {ex}")
@ -631,7 +631,7 @@ class DiscordExporter:
"filename": filename, "filename": filename,
"size": existing["size"], "size": existing["size"],
"url": str(url), "url": str(url),
"content_type": existing["mime_type"], "content_type": existing["content_type"],
"local_hash": existing["hash"] "local_hash": existing["hash"]
} }

View file

@ -454,14 +454,14 @@ class FluxerWriter:
import base64 import base64
image_data = base64.b64encode(banner).decode("ascii") image_data = base64.b64encode(banner).decode("ascii")
if banner.startswith(b"\x89PNG"): if banner.startswith(b"\x89PNG"):
mime_type = "image/png" content_type = "image/png"
elif banner.startswith(b"\xff\xd8\xff"): elif banner.startswith(b"\xff\xd8\xff"):
mime_type = "image/jpeg" content_type = "image/jpeg"
elif banner.startswith(b"GIF89a") or banner.startswith(b"GIF87a"): elif banner.startswith(b"GIF89a") or banner.startswith(b"GIF87a"):
mime_type = "image/gif" content_type = "image/gif"
else: else:
mime_type = "image/png" content_type = "image/png"
kwargs["banner"] = f"data:{mime_type};base64,{image_data}" kwargs["banner"] = f"data:{content_type};base64,{image_data}"
try: try:
await self.client.modify_guild( await self.client.modify_guild(

View file

@ -440,7 +440,11 @@ async def migrate_messages(
for att in attachments_to_process: for att in attachments_to_process:
try: try:
att_data = await context.discord_reader.download_attachment(att) att_data = await context.discord_reader.download_attachment(att)
files.append({"filename": att.filename, "data": att_data}) files.append({
"filename": att.filename,
"data": att_data,
"content_type": getattr(att, "content_type", None)
})
stats["attachments"] += 1 stats["attachments"] += 1
except Exception as e: except Exception as e:
logger.error(f"Failed to download attachment {att.filename}: {e}") logger.error(f"Failed to download attachment {att.filename}: {e}")
@ -528,7 +532,11 @@ async def migrate_messages(
# Keep original apng as fallback # Keep original apng as fallback
filename = f"sticker_{s.name}_{s.id}.{ext}" filename = f"sticker_{s.name}_{s.id}.{ext}"
files.append({"filename": filename, "data": sticker_data}) files.append({
"filename": filename,
"data": sticker_data,
"content_type": f"image/{ext}" if ext != "json" else "application/json"
})
stats["attachments"] += 1 stats["attachments"] += 1
logger.debug(f"Added sticker {s.name} as attachment (extension: {ext})") logger.debug(f"Added sticker {s.name} as attachment (extension: {ext})")
except Exception as e: except Exception as e:

View file

@ -341,6 +341,9 @@ class StoatWriter:
if files: if files:
attachments = [] attachments = []
for f in files: for f in files:
if f.get("content_type"):
attachments.append((f["filename"], f["data"], f["content_type"]))
else:
attachments.append((f["filename"], f["data"])) attachments.append((f["filename"], f["data"]))
try: try: