unify content_type & mime_type

This commit is contained in:
rambros 2026-03-27 13:00:03 +05:30
parent 93c54c4d0f
commit 3554b1612f
6 changed files with 67 additions and 27 deletions

View file

@ -31,8 +31,33 @@ class BackupDatabase:
self._conn.execute("PRAGMA journal_mode=WAL")
self._conn.execute("PRAGMA synchronous=NORMAL")
self._conn.execute("PRAGMA cache_size=-32000") # 32 MB page cache
self._migrate_db() # Run migrations on existing DBs
self._init_db()
# A temporary function to handle backward compatibility for the content_type (mime_type will be deprecated)
# It will be removed in the future
def _migrate_db(self):
"""Handles backward compatibility by renaming columns in existing databases."""
with self._lock:
# Check 'media_pool' table
res = self._conn.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='media_pool'").fetchone()
if res[0] > 0:
cols = self._conn.execute("PRAGMA table_info(media_pool)").fetchall()
col_names = [c["name"] for c in cols]
if "mime_type" in col_names and "content_type" not in col_names:
logger.info("Migrating media_pool: renaming 'mime_type' to 'content_type'")
self._conn.execute("ALTER TABLE media_pool RENAME COLUMN mime_type TO content_type")
# Check 'server_assets' table
res = self._conn.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='server_assets'").fetchone()
if res[0] > 0:
cols = self._conn.execute("PRAGMA table_info(server_assets)").fetchall()
col_names = [c["name"] for c in cols]
if "mime_type" in col_names and "content_type" not in col_names:
logger.info("Migrating server_assets: renaming 'mime_type' to 'content_type'")
self._conn.execute("ALTER TABLE server_assets RENAME COLUMN mime_type TO content_type")
self._conn.commit()
def _init_db(self):
"""Initializes the database schema."""
with self._lock:
@ -224,7 +249,7 @@ class BackupDatabase:
hash TEXT PRIMARY KEY,
local_path TEXT,
size INTEGER,
mime_type TEXT,
content_type TEXT,
first_seen_url TEXT
)
""")
@ -237,7 +262,7 @@ class BackupDatabase:
type TEXT,
filename TEXT,
url TEXT,
mime_type TEXT
content_type TEXT
)
""")
@ -327,13 +352,13 @@ class BackupDatabase:
"type": a.get("type"),
"filename": a.get("filename"),
"url": a.get("url"),
"mime_type": a.get("mime_type")
"content_type": a.get("content_type")
}
for a in assets
]
self._conn.executemany("""
INSERT OR REPLACE INTO server_assets (id, name, type, filename, url, mime_type)
VALUES (:id, :name, :type, :filename, :url, :mime_type)
INSERT OR REPLACE INTO server_assets (id, name, type, filename, url, content_type)
VALUES (:id, :name, :type, :filename, :url, :content_type)
""", formatted)
self._conn.commit()
@ -457,13 +482,13 @@ class BackupDatabase:
row = self._conn.execute("SELECT * FROM media_pool WHERE first_seen_url = ?", (url,)).fetchone()
return dict(row) if row else None
def add_media_to_pool(self, file_hash: str, local_path: str, size: int, mime_type: str, url: str):
def add_media_to_pool(self, file_hash: str, local_path: str, size: int, content_type: str, url: str):
# NOTE: No commit here — caller (save_messages_batch) commits at end of batch
with self._lock:
self._conn.execute("""
INSERT OR REPLACE INTO media_pool (hash, local_path, size, mime_type, first_seen_url)
INSERT OR REPLACE INTO media_pool (hash, local_path, size, content_type, first_seen_url)
VALUES (?, ?, ?, ?, ?)
""", (file_hash, str(local_path), size, mime_type, url))
""", (file_hash, str(local_path), size, content_type, url))
def get_stats_by_channel(self) -> Dict[int, Dict[str, Any]]:
"""Returns aggregate stats for all channels with backups."""
with self._lock:

View file

@ -444,7 +444,7 @@ class BackupMember:
class BackupAttachment:
"""Minimal stand-in for discord.Attachment."""
__slots__ = ("id", "filename", "size", "url", "proxy_url", "_backup_root", "local_hash")
__slots__ = ("id", "filename", "size", "url", "proxy_url", "_backup_root", "local_hash", "content_type")
def __init__(self, data: dict, backup_root: Path | None = None, media_pool: dict | None = None):
if not isinstance(data, dict):
@ -458,10 +458,14 @@ class BackupAttachment:
self.proxy_url = self.url
self._backup_root = backup_root
self.local_hash = data.get("local_hash")
self.content_type = data.get("content_type")
# Resolve local path via media pool if possible
# Resolve local path and ensure content_type via media pool if possible
if media_pool and self.local_hash in media_pool:
self.url = media_pool[self.local_hash]["local_path"]
pool_entry = media_pool[self.local_hash]
self.url = pool_entry["local_path"]
if not self.content_type:
self.content_type = pool_entry.get("content_type")
elif self.local_hash:
# Fallback conjecture if pool didn't have it (e.g. ad-hoc load)
pass
@ -493,7 +497,7 @@ class BackupEmoji:
return
self.id = parse_snowflake(data["id"])
self.name = data["name"]
self.animated = data.get("mime_type") == "image/gif"
self.animated = data.get("content_type") == "image/gif"
filename = data.get("filename", "")
self._file_path = media_dir / filename if media_dir and filename else None
# Use the local path if available, else original URL

View file

@ -173,7 +173,7 @@ class DiscordExporter:
"type": "emoji",
"filename": filename,
"url": str(e.url),
"mime_type": "image/gif" if e.animated else "image/png"
"content_type": "image/gif" if e.animated else "image/png"
})
except Exception as ex:
logger.error(f"Failed to download emoji {e.name}: {ex}")
@ -191,10 +191,10 @@ class DiscordExporter:
with open(sticker_path, "wb") as f:
f.write(data)
mime_type = "image/png"
if ext == "json": mime_type = "application/json"
elif ext == "gif": mime_type = "image/gif"
elif ext == "webp": mime_type = "image/webp"
content_type = "image/png"
if ext == "json": content_type = "application/json"
elif ext == "gif": content_type = "image/gif"
elif ext == "webp": content_type = "image/webp"
sticker_data.append({
"id": str(s.id),
@ -202,7 +202,7 @@ class DiscordExporter:
"type": "sticker",
"filename": filename,
"url": str(s.url),
"mime_type": mime_type
"content_type": content_type
})
except Exception as ex:
logger.error(f"Failed to download sticker {getattr(s, 'name', 'unknown')}: {ex}")
@ -631,7 +631,7 @@ class DiscordExporter:
"filename": filename,
"size": existing["size"],
"url": str(url),
"content_type": existing["mime_type"],
"content_type": existing["content_type"],
"local_hash": existing["hash"]
}

View file

@ -454,14 +454,14 @@ class FluxerWriter:
import base64
image_data = base64.b64encode(banner).decode("ascii")
if banner.startswith(b"\x89PNG"):
mime_type = "image/png"
content_type = "image/png"
elif banner.startswith(b"\xff\xd8\xff"):
mime_type = "image/jpeg"
content_type = "image/jpeg"
elif banner.startswith(b"GIF89a") or banner.startswith(b"GIF87a"):
mime_type = "image/gif"
content_type = "image/gif"
else:
mime_type = "image/png"
kwargs["banner"] = f"data:{mime_type};base64,{image_data}"
content_type = "image/png"
kwargs["banner"] = f"data:{content_type};base64,{image_data}"
try:
await self.client.modify_guild(

View file

@ -440,7 +440,11 @@ async def migrate_messages(
for att in attachments_to_process:
try:
att_data = await context.discord_reader.download_attachment(att)
files.append({"filename": att.filename, "data": att_data})
files.append({
"filename": att.filename,
"data": att_data,
"content_type": getattr(att, "content_type", None)
})
stats["attachments"] += 1
except Exception as e:
logger.error(f"Failed to download attachment {att.filename}: {e}")
@ -528,7 +532,11 @@ async def migrate_messages(
# Keep original apng as fallback
filename = f"sticker_{s.name}_{s.id}.{ext}"
files.append({"filename": filename, "data": sticker_data})
files.append({
"filename": filename,
"data": sticker_data,
"content_type": f"image/{ext}" if ext != "json" else "application/json"
})
stats["attachments"] += 1
logger.debug(f"Added sticker {s.name} as attachment (extension: {ext})")
except Exception as e:

View file

@ -341,7 +341,10 @@ class StoatWriter:
if files:
attachments = []
for f in files:
attachments.append((f["filename"], f["data"]))
if f.get("content_type"):
attachments.append((f["filename"], f["data"], f["content_type"]))
else:
attachments.append((f["filename"], f["data"]))
try:
# Stoat requires SendableEmbed objects, not raw dicts