use int for discord snowflake ids

This commit is contained in:
rambros 2026-03-28 02:46:19 +05:30
parent 0111bc0eae
commit 5750133b3a
4 changed files with 255 additions and 138 deletions

View file

@ -4,20 +4,11 @@ import json
import threading import threading
from pathlib import Path from pathlib import Path
from typing import Dict, Any, List, Optional, Union from typing import Dict, Any, List, Optional, Union
from src.core.utils import parse_snowflake
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def parse_snowflake(value: Any) -> Optional[int]:
"""Safely parses a Discord ID (Snowflake) from any input, handling 'None' strings."""
if value is None:
return None
s = str(value).strip()
if not s or s.lower() == "none" or s == "NULL":
return None
try:
return int(s)
except ValueError:
return None
class BackupDatabase: class BackupDatabase:
"""Manages the SQLite database for local Discord backups.""" """Manages the SQLite database for local Discord backups."""
@ -39,47 +30,101 @@ class BackupDatabase:
def _migrate_db(self): def _migrate_db(self):
"""Handles backward compatibility by renaming columns in existing databases.""" """Handles backward compatibility by renaming columns in existing databases."""
with self._lock: with self._lock:
# Check 'media_pool' table conn = self._conn
self._conn.execute("") # 1. MIME Type to Content Type Migrations
res = self._conn.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='media_pool'").fetchone() for table in ["media_pool", "server_assets"]:
if res[0] > 0: res = conn.execute(f"SELECT count(*) FROM sqlite_master WHERE type='table' AND name='{table}'").fetchone()
cols = self._conn.execute("PRAGMA table_info(media_pool)").fetchall() if res and res[0] > 0:
cols = conn.execute(f"PRAGMA table_info({table})").fetchall()
col_names = [c["name"] for c in cols] col_names = [c["name"] for c in cols]
if "mime_type" in col_names and "content_type" not in col_names: if "mime_type" in col_names and "content_type" not in col_names:
logger.info("Migrating media_pool: renaming 'mime_type' to 'content_type'") logger.info(f"Migrating {table}: renaming 'mime_type' to 'content_type'")
self._conn.execute("ALTER TABLE media_pool RENAME COLUMN mime_type TO content_type") conn.execute(f"ALTER TABLE {table} RENAME COLUMN mime_type TO content_type")
# Check 'server_assets' table # 2. Universal ID Migration (TEXT -> INTEGER)
res = self._conn.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='server_assets'").fetchone() # Mapping of table names to columns that must be INTEGER (Snowflakes)
if res[0] > 0: id_migrations = {
cols = self._conn.execute("PRAGMA table_info(server_assets)").fetchall() "guild_profile": ["id", "owner_id"],
col_names = [c["name"] for c in cols] "roles": ["id", "permissions"],
if "mime_type" in col_names and "content_type" not in col_names: "channels": ["id", "category_id"],
logger.info("Migrating server_assets: renaming 'mime_type' to 'content_type'") "permissions": ["channel_id", "target_id"],
self._conn.execute("ALTER TABLE server_assets RENAME COLUMN mime_type TO content_type") "users": ["id"],
res = self._conn.execute("SELECT count(*) FROM messages LIMIT 1").fetchone() "messages": ["id", "channel_id", "author_id", "message_reference"],
if res[0] > 0: "attachments": ["id", "message_id"],
cols = self._conn.execute("PRAGMA table_info(messages)").fetchall() "embeds": ["message_id"],
id_type = next(col for col in cols if col[1] == "id")[2] "reactions": ["message_id", "emoji_id"],
if id_type == "TEXT": "message_stickers": ["message_id", "sticker_id"],
logger.info("Migrating messages: Changing id column type to integer") "threads": ["id", "parent_id"],
self._conn.execute("ALTER TABLE messages RENAME TO messages_old") "forum_tags": ["id", "forum_id", "emoji_id"],
self._conn.execute("""CREATE TABLE IF NOT EXISTS messages "server_assets": ["id"]
( }
id INTEGER PRIMARY KEY,
channel_id TEXT, for table, id_cols in id_migrations.items():
author_id TEXT, res = conn.execute(f"SELECT count(*) FROM sqlite_master WHERE type='table' AND name='{table}'").fetchone()
content TEXT, if not res or res[0] == 0:
timestamp TEXT, continue
type INTEGER,
message_reference TEXT, cols = conn.execute(f"PRAGMA table_info({table})").fetchall()
is_pinned INTEGER, needs_migration = False
extra_data TEXT for col in cols:
) if col[1] in id_cols and col[2] == "TEXT":
""") needs_migration = True
self._conn.execute("INSERT INTO messages SELECT * FROM messages_old") break
self._conn.execute("DROP TABLE messages_old")
self._conn.commit() if needs_migration:
logger.info(f"Migrating {table}: converting ID columns to INTEGER")
# Special Case: messages already handled id, but now generic
# We use a temporary table to handle the schema change
conn.execute(f"ALTER TABLE {table} RENAME TO {table}_old")
# We can't easily generate the CREATE TABLE here without duplicating _init_db logic
# So we call _init_db to create the NEW table, then copy data
# But _init_db has 'IF NOT EXISTS', so we just call it once at the end?
# No, we need the table NOW for the INSERT.
# I'll just manually define the inserts or do it in _init_db.
# Actually, a better way is to do the CREATE TABLE here for this specific table.
# I'll have to duplicate the schema from _init_db for the migration.
# Alternatively, since we are already in _migrate_db, we can just do the
# specific CREATE TABLE for the table we are migrating.
if table == "guild_profile":
conn.execute("CREATE TABLE guild_profile (id INTEGER PRIMARY KEY, name TEXT, description TEXT, icon_file TEXT, icon_url TEXT, banner_file TEXT, banner_url TEXT, owner_id INTEGER, last_backup TEXT, ignore_channels TEXT)")
elif table == "roles":
conn.execute("CREATE TABLE roles (id INTEGER PRIMARY KEY, name TEXT, color INTEGER, position INTEGER, permissions INTEGER, hoist INTEGER, mentionable INTEGER)")
elif table == "channels":
conn.execute("CREATE TABLE channels (id INTEGER PRIMARY KEY, name TEXT, type INTEGER, position INTEGER, category_id INTEGER, topic TEXT, nsfw INTEGER, bitrate INTEGER, slowmode_delay INTEGER)")
elif table == "permissions":
conn.execute("CREATE TABLE permissions (id INTEGER PRIMARY KEY AUTOINCREMENT, channel_id INTEGER, target_id INTEGER, target_type TEXT, allow INTEGER, deny INTEGER)")
elif table == "users":
conn.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, username TEXT, display_name TEXT, avatar_file TEXT, avatar_url TEXT, roles TEXT)")
elif table == "messages":
conn.execute("CREATE TABLE messages (id INTEGER PRIMARY KEY, channel_id INTEGER, author_id INTEGER, content TEXT, timestamp TEXT, type INTEGER, message_reference INTEGER, is_pinned INTEGER, extra_data TEXT)")
elif table == "attachments":
conn.execute("CREATE TABLE attachments (id INTEGER PRIMARY KEY, message_id INTEGER, filename TEXT, size INTEGER, url TEXT, content_type TEXT, local_hash TEXT)")
elif table == "embeds":
conn.execute("CREATE TABLE embeds (id INTEGER PRIMARY KEY AUTOINCREMENT, message_id INTEGER, title TEXT, description TEXT, url TEXT, color INTEGER, timestamp TEXT, thumbnail_url TEXT, image_url TEXT, author_name TEXT, author_url TEXT, author_icon_url TEXT, footer_text TEXT, footer_icon_url TEXT, fields TEXT)")
elif table == "reactions":
conn.execute("CREATE TABLE reactions (id INTEGER PRIMARY KEY AUTOINCREMENT, message_id INTEGER, emoji_id INTEGER, emoji_name TEXT, count INTEGER)")
elif table == "message_stickers":
conn.execute("CREATE TABLE message_stickers (message_id INTEGER, sticker_id INTEGER, name TEXT, url TEXT, format_type INTEGER, local_hash TEXT, PRIMARY KEY (message_id, sticker_id))")
elif table == "threads":
conn.execute("CREATE TABLE threads (id INTEGER PRIMARY KEY, name TEXT, type INTEGER, parent_id INTEGER, message_count INTEGER, member_count INTEGER, archived INTEGER, archive_timestamp TEXT, auto_archive_duration INTEGER, locked INTEGER, applied_tags TEXT)")
elif table == "forum_tags":
conn.execute("CREATE TABLE forum_tags (id INTEGER PRIMARY KEY, forum_id INTEGER, name TEXT, moderated INTEGER, emoji_id INTEGER, emoji_name TEXT)")
elif table == "server_assets":
conn.execute("CREATE TABLE server_assets (id INTEGER PRIMARY KEY, name TEXT, type TEXT, filename TEXT, url TEXT, content_type INTEGER)")
old_cols = [c[1] for c in conn.execute(f"PRAGMA table_info({table}_old)").fetchall()]
new_cols = [c[1] for c in conn.execute(f"PRAGMA table_info({table})").fetchall()]
common_cols = [c for c in old_cols if c in new_cols]
col_str = ", ".join(common_cols)
conn.execute(f"INSERT INTO {table} ({col_str}) SELECT {col_str} FROM {table}_old")
conn.execute(f"DROP TABLE {table}_old")
conn.commit()
def _init_db(self): def _init_db(self):
"""Initializes the database schema.""" """Initializes the database schema."""
@ -89,14 +134,14 @@ class BackupDatabase:
# Guild Profile # Guild Profile
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS guild_profile ( CREATE TABLE IF NOT EXISTS guild_profile (
id TEXT PRIMARY KEY, id INTEGER PRIMARY KEY,
name TEXT, name TEXT,
description TEXT, description TEXT,
icon_file TEXT, icon_file TEXT,
icon_url TEXT, icon_url TEXT,
banner_file TEXT, banner_file TEXT,
banner_url TEXT, banner_url TEXT,
owner_id TEXT, owner_id INTEGER,
last_backup TEXT, last_backup TEXT,
ignore_channels TEXT ignore_channels TEXT
) )
@ -105,11 +150,11 @@ class BackupDatabase:
# Roles # Roles
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS roles ( CREATE TABLE IF NOT EXISTS roles (
id TEXT PRIMARY KEY, id INTEGER PRIMARY KEY,
name TEXT, name TEXT,
color INTEGER, color INTEGER,
position INTEGER, position INTEGER,
permissions TEXT, permissions INTEGER,
hoist INTEGER, hoist INTEGER,
mentionable INTEGER mentionable INTEGER
) )
@ -118,11 +163,11 @@ class BackupDatabase:
# Channels # Channels
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS channels ( CREATE TABLE IF NOT EXISTS channels (
id TEXT PRIMARY KEY, id INTEGER PRIMARY KEY,
name TEXT, name TEXT,
type INTEGER, type INTEGER,
position INTEGER, position INTEGER,
category_id TEXT, category_id INTEGER,
topic TEXT, topic TEXT,
nsfw INTEGER, nsfw INTEGER,
bitrate INTEGER, bitrate INTEGER,
@ -134,8 +179,8 @@ class BackupDatabase:
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS permissions ( CREATE TABLE IF NOT EXISTS permissions (
id INTEGER PRIMARY KEY AUTOINCREMENT, id INTEGER PRIMARY KEY AUTOINCREMENT,
channel_id TEXT, channel_id INTEGER,
target_id TEXT, target_id INTEGER,
target_type TEXT, target_type TEXT,
allow INTEGER, allow INTEGER,
deny INTEGER deny INTEGER
@ -146,7 +191,7 @@ class BackupDatabase:
# Users (Author cache) # Users (Author cache)
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS users ( CREATE TABLE IF NOT EXISTS users (
id TEXT PRIMARY KEY, id INTEGER PRIMARY KEY,
username TEXT, username TEXT,
display_name TEXT, display_name TEXT,
avatar_file TEXT, avatar_file TEXT,
@ -159,12 +204,12 @@ class BackupDatabase:
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS messages ( CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY, id INTEGER PRIMARY KEY,
channel_id TEXT, channel_id INTEGER,
author_id TEXT, author_id INTEGER,
content TEXT, content TEXT,
timestamp TEXT, timestamp TEXT,
type INTEGER, type INTEGER,
message_reference TEXT, message_reference INTEGER,
is_pinned INTEGER, is_pinned INTEGER,
extra_data TEXT extra_data TEXT
) )
@ -175,8 +220,8 @@ class BackupDatabase:
# Attachments # Attachments
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS attachments ( CREATE TABLE IF NOT EXISTS attachments (
id TEXT PRIMARY KEY, id INTEGER PRIMARY KEY,
message_id TEXT, message_id INTEGER,
filename TEXT, filename TEXT,
size INTEGER, size INTEGER,
url TEXT, url TEXT,
@ -190,7 +235,7 @@ class BackupDatabase:
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS embeds ( CREATE TABLE IF NOT EXISTS embeds (
id INTEGER PRIMARY KEY AUTOINCREMENT, id INTEGER PRIMARY KEY AUTOINCREMENT,
message_id TEXT, message_id INTEGER,
title TEXT, title TEXT,
description TEXT, description TEXT,
url TEXT, url TEXT,
@ -212,8 +257,8 @@ class BackupDatabase:
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS reactions ( CREATE TABLE IF NOT EXISTS reactions (
id INTEGER PRIMARY KEY AUTOINCREMENT, id INTEGER PRIMARY KEY AUTOINCREMENT,
message_id TEXT, message_id INTEGER,
emoji_id TEXT, emoji_id INTEGER,
emoji_name TEXT, emoji_name TEXT,
count INTEGER count INTEGER
) )
@ -223,8 +268,8 @@ class BackupDatabase:
# Message Stickers # Message Stickers
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS message_stickers ( CREATE TABLE IF NOT EXISTS message_stickers (
message_id TEXT, message_id INTEGER,
sticker_id TEXT, sticker_id INTEGER,
name TEXT, name TEXT,
url TEXT, url TEXT,
format_type INTEGER, format_type INTEGER,
@ -237,10 +282,10 @@ class BackupDatabase:
# Threads # Threads
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS threads ( CREATE TABLE IF NOT EXISTS threads (
id TEXT PRIMARY KEY, id INTEGER PRIMARY KEY,
name TEXT, name TEXT,
type INTEGER, type INTEGER,
parent_id TEXT, parent_id INTEGER,
message_count INTEGER, message_count INTEGER,
member_count INTEGER, member_count INTEGER,
archived INTEGER, archived INTEGER,
@ -255,11 +300,11 @@ class BackupDatabase:
# Forum Tags (Definitions for a forum channel) # Forum Tags (Definitions for a forum channel)
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS forum_tags ( CREATE TABLE IF NOT EXISTS forum_tags (
id TEXT PRIMARY KEY, id INTEGER PRIMARY KEY,
forum_id TEXT, forum_id INTEGER,
name TEXT, name TEXT,
moderated INTEGER, moderated INTEGER,
emoji_id TEXT, emoji_id INTEGER,
emoji_name TEXT emoji_name TEXT
) )
""") """)
@ -280,7 +325,7 @@ class BackupDatabase:
# Server Assets (Emojis, Stickers, etc.) # Server Assets (Emojis, Stickers, etc.)
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS server_assets ( CREATE TABLE IF NOT EXISTS server_assets (
id TEXT PRIMARY KEY, id INTEGER PRIMARY KEY,
name TEXT, name TEXT,
type TEXT, type TEXT,
filename TEXT, filename TEXT,
@ -299,10 +344,10 @@ class BackupDatabase:
INSERT OR REPLACE INTO guild_profile (id, name, description, icon_file, icon_url, banner_file, banner_url, owner_id, last_backup, ignore_channels) INSERT OR REPLACE INTO guild_profile (id, name, description, icon_file, icon_url, banner_file, banner_url, owner_id, last_backup, ignore_channels)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", ( """, (
str(data.get("id")), data.get("name"), data.get("description"), parse_snowflake(data.get("id")), data.get("name"), data.get("description"),
data.get("icon_file"), data.get("icon_url"), data.get("icon_file"), data.get("icon_url"),
data.get("banner_file"), data.get("banner_url"), data.get("banner_file"), data.get("banner_url"),
str(data.get("owner_id")), parse_snowflake(data.get("owner_id")),
data.get("last_backup"), json.dumps(data.get("ignore_channels", [])) data.get("last_backup"), json.dumps(data.get("ignore_channels", []))
)) ))
self._conn.commit() self._conn.commit()
@ -323,7 +368,7 @@ class BackupDatabase:
with self._lock: with self._lock:
formatted = [ formatted = [
{ {
"id": str(r["id"]), "id": parse_snowflake(r["id"]),
"name": r["name"], "name": r["name"],
"color": r["color"], "color": r["color"],
"position": r["position"], "position": r["position"],
@ -370,7 +415,7 @@ class BackupDatabase:
with self._lock: with self._lock:
formatted = [ formatted = [
{ {
"id": str(a["id"]), "id": parse_snowflake(a["id"]),
"name": a.get("name"), "name": a.get("name"),
"type": a.get("type"), "type": a.get("type"),
"filename": a.get("filename"), "filename": a.get("filename"),
@ -430,7 +475,7 @@ class BackupDatabase:
for rea in msg["reactions"]: for rea in msg["reactions"]:
all_reactions.append({ all_reactions.append({
"message_id": msg["id"], "message_id": msg["id"],
"emoji_id": str(rea["emoji_id"]) if rea.get("emoji_id") else None, "emoji_id": parse_snowflake(rea["emoji_id"]) if rea.get("emoji_id") else None,
"emoji_name": rea.get("emoji_name"), "emoji_name": rea.get("emoji_name"),
"count": rea.get("count", 0) "count": rea.get("count", 0)
}) })
@ -440,7 +485,7 @@ class BackupDatabase:
for st in msg["stickers"]: for st in msg["stickers"]:
all_stickers.append({ all_stickers.append({
"message_id": msg["id"], "message_id": msg["id"],
"sticker_id": str(st["id"]), "sticker_id": parse_snowflake(st["id"]),
"name": st.get("name"), "name": st.get("name"),
"url": st.get("url"), "url": st.get("url"),
"format_type": st.get("format_type"), "format_type": st.get("format_type"),
@ -492,7 +537,7 @@ class BackupDatabase:
def get_last_message_id(self, channel_id: str) -> Optional[str]: def get_last_message_id(self, channel_id: str) -> Optional[str]:
with self._lock: with self._lock:
row = self._conn.execute("SELECT id FROM messages WHERE channel_id = ? ORDER BY id DESC LIMIT 1", (str(channel_id),)).fetchone() row = self._conn.execute("SELECT id FROM messages WHERE channel_id = ? ORDER BY id DESC LIMIT 1", (parse_snowflake(channel_id),)).fetchone()
return row["id"] if row else None return row["id"] if row else None
def get_media_by_hash(self, file_hash: str) -> Optional[Dict[str, Any]]: def get_media_by_hash(self, file_hash: str) -> Optional[Dict[str, Any]]:
@ -623,7 +668,7 @@ class BackupDatabase:
"""Returns forum tag definitions.""" """Returns forum tag definitions."""
with self._lock: with self._lock:
if forum_id: if forum_id:
rows = self._conn.execute("SELECT * FROM forum_tags WHERE forum_id = ?", (str(forum_id),)).fetchall() rows = self._conn.execute("SELECT * FROM forum_tags WHERE forum_id = ?", (parse_snowflake(forum_id),)).fetchall()
else: else:
rows = self._conn.execute("SELECT * FROM forum_tags").fetchall() rows = self._conn.execute("SELECT * FROM forum_tags").fetchall()
return [dict(r) for r in rows] return [dict(r) for r in rows]
@ -631,13 +676,13 @@ class BackupDatabase:
def get_threads_by_parent(self, parent_id: str) -> List[Dict[str, Any]]: def get_threads_by_parent(self, parent_id: str) -> List[Dict[str, Any]]:
"""Returns all threads belonging to a parent channel.""" """Returns all threads belonging to a parent channel."""
with self._lock: with self._lock:
rows = self._conn.execute("SELECT * FROM threads WHERE parent_id = ?", (str(parent_id),)).fetchall() rows = self._conn.execute("SELECT * FROM threads WHERE parent_id = ?", (parse_snowflake(parent_id),)).fetchall()
return [dict(r) for r in rows] return [dict(r) for r in rows]
def get_thread(self, thread_id: str) -> Optional[Dict[str, Any]]: def get_thread(self, thread_id: str) -> Optional[Dict[str, Any]]:
"""Retrieves a single thread's metadata.""" """Retrieves a single thread's metadata."""
with self._lock: with self._lock:
row = self._conn.execute("SELECT * FROM threads WHERE id = ?", (str(thread_id),)).fetchone() row = self._conn.execute("SELECT * FROM threads WHERE id = ?", (parse_snowflake(thread_id),)).fetchone()
return dict(row) if row else None return dict(row) if row else None
def get_all_users(self) -> List[Dict[str, Any]]: def get_all_users(self) -> List[Dict[str, Any]]:
@ -647,7 +692,7 @@ class BackupDatabase:
def get_user(self, user_id: str) -> Optional[Dict[str, Any]]: def get_user(self, user_id: str) -> Optional[Dict[str, Any]]:
with self._lock: with self._lock:
row = self._conn.execute("SELECT * FROM users WHERE id = ?", (str(user_id),)).fetchone() row = self._conn.execute("SELECT * FROM users WHERE id = ?", (parse_snowflake(user_id),)).fetchone()
if row: if row:
data = dict(row) data = dict(row)
if data.get("roles"): if data.get("roles"):
@ -673,11 +718,11 @@ class BackupDatabase:
def get_messages_paged(self, channel_id: str, limit: int = 100, offset: int = 0, after_id: Optional[str] = None) -> List[Dict[str, Any]]: def get_messages_paged(self, channel_id: str, limit: int = 100, offset: int = 0, after_id: Optional[str] = None) -> List[Dict[str, Any]]:
with self._lock: with self._lock:
query = "SELECT * FROM messages WHERE channel_id = ?" query = "SELECT * FROM messages WHERE channel_id = ?"
params = [str(channel_id)] params = [parse_snowflake(channel_id)]
if after_id: if after_id:
query += " AND id > ?" query += " AND id > ?"
params.append(str(after_id)) params.append(parse_snowflake(after_id))
query += " ORDER BY id ASC LIMIT ? OFFSET ?" query += " ORDER BY id ASC LIMIT ? OFFSET ?"
params.extend([limit, offset]) params.extend([limit, offset])
@ -748,7 +793,7 @@ class BackupDatabase:
def delete_channel_messages(self, channel_id: Union[str, int]): def delete_channel_messages(self, channel_id: Union[str, int]):
"""Deletes all messages and related metadata for a specific channel and its threads.""" """Deletes all messages and related metadata for a specific channel and its threads."""
cid = str(channel_id) cid = parse_snowflake(channel_id)
with self._lock: with self._lock:
# 1. Identify all channel IDs involved (parent + all threads) # 1. Identify all channel IDs involved (parent + all threads)
target_ids = [cid] target_ids = [cid]

View file

@ -6,6 +6,7 @@ from pathlib import Path
from typing import Optional, Dict, Any from typing import Optional, Dict, Any
import threading import threading
import sys import sys
from src.core.utils import parse_snowflake
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -27,16 +28,72 @@ class MigrationDatabase:
return self._local.conn return self._local.conn
def _init_db(self): def _init_db(self):
"""Initialize tables if they don't exist.""" """Initialize tables if they don't exist and handle migrations."""
conn = sqlite3.connect(self.db_path) conn = sqlite3.connect(self.db_path)
cursor = conn.cursor() cursor = conn.cursor()
# 1. MIME Type to Content Type Migrations (if applicable - not in this class usually)
# 2. Universal ID Migration (TEXT -> INTEGER)
# Mapping of table names to columns that must be INTEGER (Snowflakes)
id_migrations = {
"message_mappings": ["channel_id", "source_msg_id", "target_msg_id"],
"thread_mappings": ["channel_id", "thread_id", "source_msg_id", "target_msg_id"],
"channel_tracking": ["channel_id", "last_msg_id"],
"thread_tracking": ["channel_id", "thread_id", "last_msg_id"],
"server_mappings": ["source_id", "target_id"],
"asset_mappings": ["source_id", "target_id"],
"user_alias": ["user_id"]
}
for table, id_cols in id_migrations.items():
cursor.execute(f"SELECT count(*) FROM sqlite_master WHERE type='table' AND name='{table}'")
res = cursor.fetchone()
if not res or res[0] == 0:
continue
cursor.execute(f"PRAGMA table_info({table})")
cols = cursor.fetchall()
needs_migration = False
for col in cols:
if col[1] in id_cols and col[2] == "TEXT":
needs_migration = True
break
if needs_migration:
logger.info(f"MigrationDatabase: Migrating {table}: converting ID columns to INTEGER")
cursor.execute(f"ALTER TABLE {table} RENAME TO {table}_old")
if table == "message_mappings":
cursor.execute("CREATE TABLE message_mappings (channel_id INTEGER, source_msg_id INTEGER, target_msg_id INTEGER, timestamp TEXT, PRIMARY KEY (channel_id, source_msg_id))")
elif table == "thread_mappings":
cursor.execute("CREATE TABLE thread_mappings (channel_id INTEGER, thread_id INTEGER, source_msg_id INTEGER, target_msg_id INTEGER, timestamp TEXT, PRIMARY KEY (channel_id, thread_id, source_msg_id))")
elif table == "channel_tracking":
cursor.execute("CREATE TABLE channel_tracking (channel_id INTEGER PRIMARY KEY, last_msg_id INTEGER, last_msg_ts TEXT, msg_count INTEGER DEFAULT 0, file_count INTEGER DEFAULT 0)")
elif table == "thread_tracking":
cursor.execute("CREATE TABLE thread_tracking (channel_id INTEGER, thread_id INTEGER, last_msg_id INTEGER, last_msg_ts TEXT, msg_count INTEGER DEFAULT 0, file_count INTEGER DEFAULT 0, completed INTEGER DEFAULT 0, PRIMARY KEY (channel_id, thread_id))")
elif table == "server_mappings":
cursor.execute("CREATE TABLE server_mappings (category TEXT, source_id INTEGER, target_id INTEGER, PRIMARY KEY (category, source_id))")
elif table == "asset_mappings":
cursor.execute("CREATE TABLE asset_mappings (category TEXT, source_id INTEGER, target_id INTEGER, PRIMARY KEY (category, source_id))")
elif table == "user_alias":
cursor.execute("CREATE TABLE user_alias (user_id INTEGER PRIMARY KEY, alias TEXT UNIQUE)")
old_cols = [c[1] for c in cursor.execute(f"PRAGMA table_info({table}_old)").fetchall()]
new_cols = [c[1] for c in cursor.execute(f"PRAGMA table_info({table})").fetchall()]
common_cols = [c for c in old_cols if c in new_cols]
col_str = ", ".join(common_cols)
cursor.execute(f"INSERT OR IGNORE INTO {table} ({col_str}) SELECT {col_str} FROM {table}_old")
cursor.execute(f"DROP TABLE {table}_old")
# Initial Creation / Ensure Schema Correctness
# Table for message mappings: SourceID -> TargetID # Table for message mappings: SourceID -> TargetID
cursor.execute(""" cursor.execute("""
CREATE TABLE IF NOT EXISTS message_mappings ( CREATE TABLE IF NOT EXISTS message_mappings (
channel_id TEXT, channel_id INTEGER,
source_msg_id TEXT, source_msg_id INTEGER,
target_msg_id TEXT, target_msg_id INTEGER,
timestamp TEXT, timestamp TEXT,
PRIMARY KEY (channel_id, source_msg_id) PRIMARY KEY (channel_id, source_msg_id)
) )
@ -45,10 +102,10 @@ class MigrationDatabase:
# Table for thread mappings # Table for thread mappings
cursor.execute(""" cursor.execute("""
CREATE TABLE IF NOT EXISTS thread_mappings ( CREATE TABLE IF NOT EXISTS thread_mappings (
channel_id TEXT, channel_id INTEGER,
thread_id TEXT, thread_id INTEGER,
source_msg_id TEXT, source_msg_id INTEGER,
target_msg_id TEXT, target_msg_id INTEGER,
timestamp TEXT, timestamp TEXT,
PRIMARY KEY (channel_id, thread_id, source_msg_id) PRIMARY KEY (channel_id, thread_id, source_msg_id)
) )
@ -57,8 +114,8 @@ class MigrationDatabase:
# Table for per-channel stats and tracking # Table for per-channel stats and tracking
cursor.execute(""" cursor.execute("""
CREATE TABLE IF NOT EXISTS channel_tracking ( CREATE TABLE IF NOT EXISTS channel_tracking (
channel_id TEXT PRIMARY KEY, channel_id INTEGER PRIMARY KEY,
last_msg_id TEXT, last_msg_id INTEGER,
last_msg_ts TEXT, last_msg_ts TEXT,
msg_count INTEGER DEFAULT 0, msg_count INTEGER DEFAULT 0,
file_count INTEGER DEFAULT 0 file_count INTEGER DEFAULT 0
@ -68,9 +125,9 @@ class MigrationDatabase:
# Table for per-thread stats # Table for per-thread stats
cursor.execute(""" cursor.execute("""
CREATE TABLE IF NOT EXISTS thread_tracking ( CREATE TABLE IF NOT EXISTS thread_tracking (
channel_id TEXT, channel_id INTEGER,
thread_id TEXT, thread_id INTEGER,
last_msg_id TEXT, last_msg_id INTEGER,
last_msg_ts TEXT, last_msg_ts TEXT,
msg_count INTEGER DEFAULT 0, msg_count INTEGER DEFAULT 0,
file_count INTEGER DEFAULT 0, file_count INTEGER DEFAULT 0,
@ -89,8 +146,8 @@ class MigrationDatabase:
cursor.execute(""" cursor.execute("""
CREATE TABLE IF NOT EXISTS server_mappings ( CREATE TABLE IF NOT EXISTS server_mappings (
category TEXT, category TEXT,
source_id TEXT, source_id INTEGER,
target_id TEXT, target_id INTEGER,
PRIMARY KEY (category, source_id) PRIMARY KEY (category, source_id)
) )
""") """)
@ -99,8 +156,8 @@ class MigrationDatabase:
cursor.execute(""" cursor.execute("""
CREATE TABLE IF NOT EXISTS asset_mappings ( CREATE TABLE IF NOT EXISTS asset_mappings (
category TEXT, category TEXT,
source_id TEXT, source_id INTEGER,
target_id TEXT, target_id INTEGER,
PRIMARY KEY (category, source_id) PRIMARY KEY (category, source_id)
) )
""") """)
@ -136,7 +193,7 @@ class MigrationDatabase:
# Table for auto-generated user aliases (user_id -> alias) # Table for auto-generated user aliases (user_id -> alias)
cursor.execute(""" cursor.execute("""
CREATE TABLE IF NOT EXISTS user_alias ( CREATE TABLE IF NOT EXISTS user_alias (
user_id TEXT PRIMARY KEY, user_id INTEGER PRIMARY KEY,
alias TEXT UNIQUE alias TEXT UNIQUE
) )
""") """)
@ -152,7 +209,7 @@ class MigrationDatabase:
conn = self._get_conn() conn = self._get_conn()
conn.execute( conn.execute(
"INSERT OR REPLACE INTO message_mappings (channel_id, source_msg_id, target_msg_id, timestamp) VALUES (?, ?, ?, ?)", "INSERT OR REPLACE INTO message_mappings (channel_id, source_msg_id, target_msg_id, timestamp) VALUES (?, ?, ?, ?)",
(channel_id, source_id, target_id, timestamp) (parse_snowflake(channel_id), parse_snowflake(source_id), parse_snowflake(target_id), timestamp)
) )
conn.commit() conn.commit()
@ -160,7 +217,7 @@ class MigrationDatabase:
conn = self._get_conn() conn = self._get_conn()
row = conn.execute( row = conn.execute(
"SELECT target_msg_id FROM message_mappings WHERE channel_id = ? AND source_msg_id = ?", "SELECT target_msg_id FROM message_mappings WHERE channel_id = ? AND source_msg_id = ?",
(channel_id, source_id) (parse_snowflake(channel_id), parse_snowflake(source_id))
).fetchone() ).fetchone()
return row["target_msg_id"] if row else None return row["target_msg_id"] if row else None
@ -205,7 +262,7 @@ class MigrationDatabase:
conn = self._get_conn() conn = self._get_conn()
# Check for existing alias # Check for existing alias
row = conn.execute("SELECT alias FROM user_alias WHERE user_id = ?", (str(user_id),)).fetchone() row = conn.execute("SELECT alias FROM user_alias WHERE user_id = ?", (parse_snowflake(user_id),)).fetchone()
if row: if row:
return row["alias"] return row["alias"]
@ -215,7 +272,7 @@ class MigrationDatabase:
new_alias = self._generate_alias() new_alias = self._generate_alias()
conn.execute( conn.execute(
"INSERT INTO user_alias (user_id, alias) VALUES (?, ?)", "INSERT INTO user_alias (user_id, alias) VALUES (?, ?)",
(str(user_id), new_alias) (parse_snowflake(user_id), new_alias)
) )
conn.commit() conn.commit()
return new_alias return new_alias
@ -239,7 +296,7 @@ class MigrationDatabase:
conn = self._get_conn() conn = self._get_conn()
conn.execute( conn.execute(
"INSERT OR REPLACE INTO server_mappings (category, source_id, target_id) VALUES (?, ?, ?)", "INSERT OR REPLACE INTO server_mappings (category, source_id, target_id) VALUES (?, ?, ?)",
(category, str(source_id), str(target_id)) (category, parse_snowflake(source_id), parse_snowflake(target_id))
) )
conn.commit() conn.commit()
@ -247,7 +304,7 @@ class MigrationDatabase:
conn = self._get_conn() conn = self._get_conn()
row = conn.execute( row = conn.execute(
"SELECT target_id FROM server_mappings WHERE category = ? AND source_id = ?", "SELECT target_id FROM server_mappings WHERE category = ? AND source_id = ?",
(category, str(source_id)) (category, parse_snowflake(source_id))
).fetchone() ).fetchone()
return row["target_id"] if row else None return row["target_id"] if row else None
@ -263,7 +320,7 @@ class MigrationDatabase:
conn = self._get_conn() conn = self._get_conn()
conn.execute( conn.execute(
"DELETE FROM server_mappings WHERE category = ? AND source_id = ?", "DELETE FROM server_mappings WHERE category = ? AND source_id = ?",
(category, str(source_id)) (category, parse_snowflake(source_id))
) )
conn.commit() conn.commit()
@ -281,7 +338,7 @@ class MigrationDatabase:
conn = self._get_conn() conn = self._get_conn()
conn.execute( conn.execute(
"INSERT OR REPLACE INTO asset_mappings (category, source_id, target_id) VALUES (?, ?, ?)", "INSERT OR REPLACE INTO asset_mappings (category, source_id, target_id) VALUES (?, ?, ?)",
(category, str(source_id), str(target_id)) (category, parse_snowflake(source_id), parse_snowflake(target_id))
) )
conn.commit() conn.commit()
@ -289,7 +346,7 @@ class MigrationDatabase:
conn = self._get_conn() conn = self._get_conn()
row = conn.execute( row = conn.execute(
"SELECT target_id FROM asset_mappings WHERE category = ? AND source_id = ?", "SELECT target_id FROM asset_mappings WHERE category = ? AND source_id = ?",
(category, str(source_id)) (category, parse_snowflake(source_id))
).fetchone() ).fetchone()
return row["target_id"] if row else None return row["target_id"] if row else None
@ -305,7 +362,7 @@ class MigrationDatabase:
conn = self._get_conn() conn = self._get_conn()
conn.execute( conn.execute(
"DELETE FROM asset_mappings WHERE category = ? AND source_id = ?", "DELETE FROM asset_mappings WHERE category = ? AND source_id = ?",
(category, str(source_id)) (category, parse_snowflake(source_id))
) )
conn.commit() conn.commit()
@ -332,23 +389,23 @@ class MigrationDatabase:
def update_channel_tracking(self, channel_id: str, last_msg_id: str = None, last_msg_ts: str = None, msg_inc: int = 0, file_inc: int = 0): def update_channel_tracking(self, channel_id: str, last_msg_id: str = None, last_msg_ts: str = None, msg_inc: int = 0, file_inc: int = 0):
conn = self._get_conn() conn = self._get_conn()
# Initialize if missing # Initialize if missing
conn.execute("INSERT OR IGNORE INTO channel_tracking (channel_id) VALUES (?)", (channel_id,)) conn.execute("INSERT OR IGNORE INTO channel_tracking (channel_id) VALUES (?)", (parse_snowflake(channel_id),))
if last_msg_id: if last_msg_id:
conn.execute("UPDATE channel_tracking SET last_msg_id = ? WHERE channel_id = ?", (last_msg_id, channel_id)) conn.execute("UPDATE channel_tracking SET last_msg_id = ? WHERE channel_id = ?", (parse_snowflake(last_msg_id), parse_snowflake(channel_id)))
if last_msg_ts: if last_msg_ts:
conn.execute("UPDATE channel_tracking SET last_msg_ts = ? WHERE channel_id = ?", (last_msg_ts, channel_id)) conn.execute("UPDATE channel_tracking SET last_msg_ts = ? WHERE channel_id = ?", (last_msg_ts, parse_snowflake(channel_id)))
if msg_inc != 0 or file_inc != 0: if msg_inc != 0 or file_inc != 0:
conn.execute( conn.execute(
"UPDATE channel_tracking SET msg_count = msg_count + ?, file_count = file_count + ? WHERE channel_id = ?", "UPDATE channel_tracking SET msg_count = msg_count + ?, file_count = file_count + ? WHERE channel_id = ?",
(msg_inc, file_inc, channel_id) (msg_inc, file_inc, parse_snowflake(channel_id))
) )
conn.commit() conn.commit()
def get_channel_tracking(self, channel_id: str) -> Dict[str, Any]: def get_channel_tracking(self, channel_id: str) -> Dict[str, Any]:
conn = self._get_conn() conn = self._get_conn()
row = conn.execute("SELECT * FROM channel_tracking WHERE channel_id = ?", (channel_id,)).fetchone() row = conn.execute("SELECT * FROM channel_tracking WHERE channel_id = ?", (parse_snowflake(channel_id),)).fetchone()
if row: if row:
return dict(row) return dict(row)
return {"last_msg_id": None, "last_msg_ts": None, "msg_count": 0, "file_count": 0} return {"last_msg_id": None, "last_msg_ts": None, "msg_count": 0, "file_count": 0}
@ -358,7 +415,7 @@ class MigrationDatabase:
conn = self._get_conn() conn = self._get_conn()
conn.execute( conn.execute(
"INSERT OR REPLACE INTO thread_mappings (channel_id, thread_id, source_msg_id, target_msg_id, timestamp) VALUES (?, ?, ?, ?, ?)", "INSERT OR REPLACE INTO thread_mappings (channel_id, thread_id, source_msg_id, target_msg_id, timestamp) VALUES (?, ?, ?, ?, ?)",
(channel_id, thread_id, source_id, target_id, timestamp) (parse_snowflake(channel_id), parse_snowflake(thread_id), parse_snowflake(source_id), parse_snowflake(target_id), timestamp)
) )
conn.commit() conn.commit()
@ -366,31 +423,31 @@ class MigrationDatabase:
conn = self._get_conn() conn = self._get_conn()
row = conn.execute( row = conn.execute(
"SELECT target_msg_id FROM thread_mappings WHERE channel_id = ? AND thread_id = ? AND source_msg_id = ?", "SELECT target_msg_id FROM thread_mappings WHERE channel_id = ? AND thread_id = ? AND source_msg_id = ?",
(channel_id, thread_id, source_id) (parse_snowflake(channel_id), parse_snowflake(thread_id), parse_snowflake(source_id))
).fetchone() ).fetchone()
return row["target_msg_id"] if row else None return row["target_msg_id"] if row else None
def update_thread_tracking(self, channel_id: str, thread_id: str, last_msg_id: str = None, last_msg_ts: str = None, msg_inc: int = 0, file_inc: int = 0, completed: int = None): def update_thread_tracking(self, channel_id: str, thread_id: str, last_msg_id: str = None, last_msg_ts: str = None, msg_inc: int = 0, file_inc: int = 0, completed: int = None):
conn = self._get_conn() conn = self._get_conn()
conn.execute("INSERT OR IGNORE INTO thread_tracking (channel_id, thread_id) VALUES (?, ?)", (channel_id, thread_id)) conn.execute("INSERT OR IGNORE INTO thread_tracking (channel_id, thread_id) VALUES (?, ?)", (parse_snowflake(channel_id), parse_snowflake(thread_id)))
if last_msg_id: if last_msg_id:
conn.execute("UPDATE thread_tracking SET last_msg_id = ? WHERE channel_id = ? AND thread_id = ?", (last_msg_id, channel_id, thread_id)) conn.execute("UPDATE thread_tracking SET last_msg_id = ? WHERE channel_id = ? AND thread_id = ?", (parse_snowflake(last_msg_id), parse_snowflake(channel_id), parse_snowflake(thread_id)))
if last_msg_ts: if last_msg_ts:
conn.execute("UPDATE thread_tracking SET last_msg_ts = ? WHERE channel_id = ? AND thread_id = ?", (last_msg_ts, channel_id, thread_id)) conn.execute("UPDATE thread_tracking SET last_msg_ts = ? WHERE channel_id = ? AND thread_id = ?", (last_msg_ts, parse_snowflake(channel_id), parse_snowflake(thread_id)))
if completed is not None: if completed is not None:
conn.execute("UPDATE thread_tracking SET completed = ? WHERE channel_id = ? AND thread_id = ?", (completed, channel_id, thread_id)) conn.execute("UPDATE thread_tracking SET completed = ? WHERE channel_id = ? AND thread_id = ?", (completed, parse_snowflake(channel_id), parse_snowflake(thread_id)))
if msg_inc != 0 or file_inc != 0: if msg_inc != 0 or file_inc != 0:
conn.execute( conn.execute(
"UPDATE thread_tracking SET msg_count = msg_count + ?, file_count = file_count + ? WHERE channel_id = ? AND thread_id = ?", "UPDATE thread_tracking SET msg_count = msg_count + ?, file_count = file_count + ? WHERE channel_id = ? AND thread_id = ?",
(msg_inc, file_inc, channel_id, thread_id) (msg_inc, file_inc, parse_snowflake(channel_id), parse_snowflake(thread_id))
) )
conn.commit() conn.commit()
def get_thread_tracking(self, channel_id: str, thread_id: str) -> Dict[str, Any]: def get_thread_tracking(self, channel_id: str, thread_id: str) -> Dict[str, Any]:
conn = self._get_conn() conn = self._get_conn()
row = conn.execute("SELECT * FROM thread_tracking WHERE channel_id = ? AND thread_id = ?", (channel_id, thread_id)).fetchone() row = conn.execute("SELECT * FROM thread_tracking WHERE channel_id = ? AND thread_id = ?", (parse_snowflake(channel_id), parse_snowflake(thread_id))).fetchone()
if row: if row:
return dict(row) return dict(row)
return {"last_msg_id": None, "last_msg_ts": None, "msg_count": 0, "file_count": 0} return {"last_msg_id": None, "last_msg_ts": None, "msg_count": 0, "file_count": 0}
@ -398,10 +455,10 @@ class MigrationDatabase:
def clear_channel_data(self, channel_id: str): def clear_channel_data(self, channel_id: str):
"""Purge all mappings and tracking data for a specific channel and its threads.""" """Purge all mappings and tracking data for a specific channel and its threads."""
conn = self._get_conn() conn = self._get_conn()
conn.execute("DELETE FROM message_mappings WHERE channel_id = ?", (channel_id,)) conn.execute("DELETE FROM message_mappings WHERE channel_id = ?", (parse_snowflake(channel_id),))
conn.execute("DELETE FROM thread_mappings WHERE channel_id = ?", (channel_id,)) conn.execute("DELETE FROM thread_mappings WHERE channel_id = ?", (parse_snowflake(channel_id),))
conn.execute("DELETE FROM channel_tracking WHERE channel_id = ?", (channel_id,)) conn.execute("DELETE FROM channel_tracking WHERE channel_id = ?", (parse_snowflake(channel_id),))
conn.execute("DELETE FROM thread_tracking WHERE channel_id = ?", (channel_id,)) conn.execute("DELETE FROM thread_tracking WHERE channel_id = ?", (parse_snowflake(channel_id),))
conn.commit() conn.commit()
logger.info(f"Cleared all tracking and mapping data for channel: {channel_id}") logger.info(f"Cleared all tracking and mapping data for channel: {channel_id}")

View file

@ -109,7 +109,7 @@ class DiscordExporter:
"name": r.name, "name": r.name,
"color": r.color.value, "color": r.color.value,
"position": r.position, "position": r.position,
"permissions": str(r.permissions.value), "permissions": r.permissions.value,
"hoist": 1 if r.hoist else 0, "hoist": 1 if r.hoist else 0,
"mentionable": 1 if r.mentionable else 0 "mentionable": 1 if r.mentionable else 0
}) })
@ -563,9 +563,10 @@ class DiscordExporter:
}) })
# 5. Message data # 5. Message data
from src.core.utils import parse_snowflake
message_reference = None message_reference = None
if msg.reference and msg.reference.message_id: if msg.reference and msg.reference.message_id:
message_reference = str(msg.reference.message_id) message_reference = parse_snowflake(msg.reference.message_id)
# 5.5 Forwarded snapshots # 5.5 Forwarded snapshots
content = msg.content or "" content = msg.content or ""

View file

@ -1,5 +1,19 @@
from typing import Any, Optional
import re import re
import logging import logging
def parse_snowflake(value: Any) -> Optional[int]:
"""Safely parses a Discord ID (Snowflake) from any input, handling 'None' strings."""
if value is None:
return None
s = str(value).strip()
if not s or s.lower() == "none" or s == "NULL":
return None
try:
return int(s)
except ValueError:
return None
from src.core.state import MigrationState from src.core.state import MigrationState
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)