Merge pull request #14 from rambros3d/tupperware

Backup discord masquerade messages
This commit is contained in:
RamBros 2026-04-03 08:32:20 +05:30 committed by GitHub
commit 9f27af971c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 131 additions and 23 deletions

View file

@ -98,9 +98,9 @@ class BackupDatabase:
elif table == "permissions": elif table == "permissions":
conn.execute("CREATE TABLE permissions (id INTEGER PRIMARY KEY AUTOINCREMENT, channel_id INTEGER, target_id INTEGER, target_type TEXT, allow INTEGER, deny INTEGER)") conn.execute("CREATE TABLE permissions (id INTEGER PRIMARY KEY AUTOINCREMENT, channel_id INTEGER, target_id INTEGER, target_type TEXT, allow INTEGER, deny INTEGER)")
elif table == "users": elif table == "users":
conn.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, username TEXT, display_name TEXT, avatar_file TEXT, avatar_url TEXT, roles TEXT)") conn.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, username TEXT, display_name TEXT, avatar_file TEXT, avatar_url TEXT, roles TEXT, type INTEGER DEFAULT 0)")
elif table == "messages": elif table == "messages":
conn.execute("CREATE TABLE messages (id INTEGER PRIMARY KEY, channel_id INTEGER, author_id INTEGER, content TEXT, timestamp TEXT, type INTEGER, message_reference INTEGER, is_pinned INTEGER, extra_data TEXT)") conn.execute("CREATE TABLE messages (id INTEGER PRIMARY KEY, channel_id INTEGER, author_id INTEGER, content TEXT, timestamp TEXT, type INTEGER, message_reference INTEGER, is_pinned INTEGER, extra_data TEXT, custom_display_name TEXT, custom_avatar_url TEXT)")
elif table == "attachments": elif table == "attachments":
conn.execute("CREATE TABLE attachments (id INTEGER PRIMARY KEY, message_id INTEGER, filename TEXT, size INTEGER, url TEXT, content_type TEXT, local_hash TEXT)") conn.execute("CREATE TABLE attachments (id INTEGER PRIMARY KEY, message_id INTEGER, filename TEXT, size INTEGER, url TEXT, content_type TEXT, local_hash TEXT)")
elif table == "embeds": elif table == "embeds":
@ -123,6 +123,25 @@ class BackupDatabase:
conn.execute(f"INSERT INTO {table} ({col_str}) SELECT {col_str} FROM {table}_old") conn.execute(f"INSERT INTO {table} ({col_str}) SELECT {col_str} FROM {table}_old")
conn.execute(f"DROP TABLE {table}_old") conn.execute(f"DROP TABLE {table}_old")
# 3. Custom Author Profile Migration
res = conn.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='messages'").fetchone()
if res and res[0] > 0:
cols = conn.execute("PRAGMA table_info(messages)").fetchall()
col_names = [c["name"] for c in cols]
if "custom_display_name" not in col_names:
logger.info("Migrating messages: adding custom author profile columns")
conn.execute("ALTER TABLE messages ADD COLUMN custom_display_name TEXT")
conn.execute("ALTER TABLE messages ADD COLUMN custom_avatar_url TEXT")
# 4. User Type Categorization Migration
res = conn.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='users'").fetchone()
if res and res[0] > 0:
cols = conn.execute("PRAGMA table_info(users)").fetchall()
col_names = [c["name"] for c in cols]
if "type" not in col_names:
logger.info("Migrating users: adding type column")
conn.execute("ALTER TABLE users ADD COLUMN type INTEGER DEFAULT 0")
conn.commit() conn.commit()
@ -196,7 +215,8 @@ class BackupDatabase:
display_name TEXT, display_name TEXT,
avatar_file TEXT, avatar_file TEXT,
avatar_url TEXT, avatar_url TEXT,
roles TEXT roles TEXT,
type INTEGER DEFAULT 0
) )
""") """)
@ -211,7 +231,9 @@ class BackupDatabase:
type INTEGER, type INTEGER,
message_reference INTEGER, message_reference INTEGER,
is_pinned INTEGER, is_pinned INTEGER,
extra_data TEXT extra_data TEXT,
custom_display_name TEXT,
custom_avatar_url TEXT
) )
""") """)
conn.execute("CREATE INDEX IF NOT EXISTS idx_messages_channel ON messages(channel_id)") conn.execute("CREATE INDEX IF NOT EXISTS idx_messages_channel ON messages(channel_id)")
@ -405,8 +427,8 @@ class BackupDatabase:
"""Saves users to the author cache.""" """Saves users to the author cache."""
with self._lock: with self._lock:
self._conn.executemany(""" self._conn.executemany("""
INSERT OR REPLACE INTO users (id, username, display_name, avatar_file, avatar_url, roles) INSERT OR REPLACE INTO users (id, username, display_name, avatar_file, avatar_url, roles, type)
VALUES (:id, :username, :display_name, :avatar_file, :avatar_url, :roles) VALUES (:id, :username, :display_name, :avatar_file, :avatar_url, :roles, :type)
""", users) """, users)
self._conn.commit() self._conn.commit()
@ -454,8 +476,8 @@ class BackupDatabase:
conn = self._conn conn = self._conn
# Insert messages # Insert messages
conn.executemany(""" conn.executemany("""
INSERT OR REPLACE INTO messages (id, channel_id, author_id, content, timestamp, type, message_reference, is_pinned, extra_data) INSERT OR REPLACE INTO messages (id, channel_id, author_id, content, timestamp, type, message_reference, is_pinned, extra_data, custom_display_name, custom_avatar_url)
VALUES (:id, :channel_id, :author_id, :content, :timestamp, :type, :message_reference, :is_pinned, :extra_data) VALUES (:id, :channel_id, :author_id, :content, :timestamp, :type, :message_reference, :is_pinned, :extra_data, :custom_display_name, :custom_avatar_url)
""", messages) """, messages)
# Extract attachments, reactions, and stickers # Extract attachments, reactions, and stickers

View file

@ -1339,6 +1339,18 @@ class BackupReader:
user_id = parse_snowflake(msg_data.get("author_id", 0)) or 0 user_id = parse_snowflake(msg_data.get("author_id", 0)) or 0
author = self._resolve_author(user_id) author = self._resolve_author(user_id)
# Check for custom author profile (Webhooks / Masquerade)
over_name = msg_data.get("custom_display_name")
over_avatar = msg_data.get("custom_avatar_url")
if over_name:
# Create an ephemeral author object for this message
author = BackupMember({
"id": str(user_id),
"username": over_name,
"display_name": over_name,
"avatar_url": over_avatar
})
self._ensure_media_pool_loaded() self._ensure_media_pool_loaded()
channel_id = parse_snowflake(msg_data["channel_id"]) channel_id = parse_snowflake(msg_data["channel_id"])

View file

@ -18,6 +18,7 @@ class DiscordExporter:
self.server_name = "" self.server_name = ""
self.server_id = "" self.server_id = ""
self.user_cache = {} self.user_cache = {}
self.member_cache: Dict[int, Any] = {} # Pre-fetched member objects (id -> Member)
self.base_dir = Path(base_dir) if base_dir else Path(".") self.base_dir = Path(base_dir) if base_dir else Path(".")
self.is_running = True self.is_running = True
self.db: Optional[BackupDatabase] = None self.db: Optional[BackupDatabase] = None
@ -62,6 +63,20 @@ class DiscordExporter:
hash_sha256.update(chunk) hash_sha256.update(chunk)
return hash_sha256.hexdigest() return hash_sha256.hexdigest()
async def prefetch_members(self):
"""Pre-fetches all guild members into a local cache for role resolution.
msg.author is a discord.User (no roles). This cache allows us to
resolve roles without an API call per message during message export.
"""
try:
members = await self.reader.get_members()
self.member_cache = {m.id: m for m in members}
logger.info(f"Pre-fetched {len(self.member_cache)} members for role resolution.")
except Exception as e:
logger.warning(f"Could not pre-fetch members (roles will be empty): {e}")
self.member_cache = {}
async def export_metadata(self): async def export_metadata(self):
"""Saves server metadata to the SQLite database.""" """Saves server metadata to the SQLite database."""
metadata = await self.reader.get_server_metadata() metadata = await self.reader.get_server_metadata()
@ -439,37 +454,65 @@ class DiscordExporter:
return accumulated_count, accumulated_threads, accumulated_files return accumulated_count, accumulated_threads, accumulated_files
async def _format_user(self, user): async def _format_user(self, user, is_webhook=False):
"""Formats user data for the author or a mention. """Formats user data for the author or a mention.
Avatar downloads are intentionally deferred to keep this off the hot For Webhooks, we use a generic name and the default Discord avatar system
message-formatting path. Call _flush_pending_avatars() after each batch. for the base profile in the user cache.
""" """
user_id = str(user.id) user_id_int = int(user.id)
user_id = str(user_id_int)
if user_id in self.user_cache: if user_id in self.user_cache:
return None return None
username = user.name
display_name = getattr(user, "display_name", user.name)
avatar = user.avatar
avatar_url = str(user.display_avatar.url) if user.display_avatar else None
if is_webhook:
# For webhooks, we use the ID as the username for technical clarity,
# and the current name as the display name.
username = user_id
display_name = user.name
# Discord default avatar formula: (ID >> 22) % 5
default_index = (user_id_int >> 22) % 5
avatar_url = f"https://cdn.discordapp.com/embed/avatars/{default_index}.png"
avatar = None # Don't download character avatar as the "base" webhook avatar
# New user discovered — schedule avatar download but don't block here # New user discovered — schedule avatar download but don't block here
avatar_file = None avatar_file = None
if user.avatar: if avatar:
av_name = f"{user_id}.png" av_name = f"{user_id}.png"
av_target = self.users_path / av_name av_target = self.users_path / av_name
avatar_file = f"users/{av_name}" avatar_file = f"users/{av_name}"
if not av_target.exists(): if not av_target.exists():
# Queue for deferred download # Queue for deferred download
self._pending_avatars.append((user_id, user.avatar, av_target)) self._pending_avatars.append((user_id, avatar, av_target))
roles = [] roles = []
if hasattr(user, "roles"): if hasattr(user, "roles"):
roles = [str(r.id) for r in user.roles if not r.is_default()] roles = [str(r.id) for r in user.roles if not r.is_default()]
# Determine user type
# 0: Regular User, 1: Bot, 2: Webhook, 3: System
u_type = 0
if is_webhook:
u_type = 2
elif getattr(user, "system", False):
u_type = 3
elif getattr(user, "bot", False):
u_type = 1
user_data = { user_data = {
"id": user_id, "id": user_id,
"username": user.name, "username": username,
"display_name": getattr(user, "display_name", user.name), "display_name": display_name,
"avatar_file": avatar_file, "avatar_file": avatar_file,
"avatar_url": str(user.display_avatar.url) if user.avatar else None, "avatar_url": avatar_url,
"roles": json.dumps(roles) "roles": json.dumps(roles),
"type": u_type
} }
self.user_cache[user_id] = user_data self.user_cache[user_id] = user_data
return user_data return user_data
@ -494,13 +537,21 @@ class DiscordExporter:
new_users = [] new_users = []
# 1. Author handling # 1. Author handling
u_data = await self._format_user(msg.author) is_webhook = bool(getattr(msg, "webhook_id", None))
author = msg.author
# msg.author is discord.User (no roles). Resolve to Member for role data.
if not is_webhook:
member = self.member_cache.get(msg.author.id)
if member:
author = member
u_data = await self._format_user(author, is_webhook=is_webhook)
if u_data: new_users.append(u_data) if u_data: new_users.append(u_data)
# 1.5 Mentions handling (ensure all mentioned users are saved) # 1.5 Mentions handling (ensure all mentioned users are saved)
if msg.mentions: if msg.mentions:
for mention in msg.mentions: for mention in msg.mentions:
u_ment = await self._format_user(mention) # Mentions can be Member objects already, so roles work naturally
u_ment = await self._format_user(mention, is_webhook=False)
if u_ment: new_users.append(u_ment) if u_ment: new_users.append(u_ment)
# 2. Attachments handling (Content-Addressable Storage) # 2. Attachments handling (Content-Addressable Storage)
@ -603,6 +654,15 @@ class DiscordExporter:
for s_emb in snapshot.embeds: for s_emb in snapshot.embeds:
embeds.append(s_emb.to_dict()) embeds.append(s_emb.to_dict())
# 5.6 Author Overrides (Webhooks / Masquerade)
custom_display_name = None
custom_avatar_url = None
# Webhooks or bots with masquerade often use per-message names/avatars
if getattr(msg, "webhook_id", None) or (msg.author and msg.author.bot):
custom_display_name = msg.author.name
custom_avatar_url = str(msg.author.display_avatar.url) if msg.author.display_avatar else None
m_data = { m_data = {
"id": str(msg.id), "id": str(msg.id),
"channel_id": str(msg.channel.id), "channel_id": str(msg.channel.id),
@ -616,7 +676,9 @@ class DiscordExporter:
"stickers": stickers, "stickers": stickers,
"embeds": embeds, "embeds": embeds,
"reactions": reactions, "reactions": reactions,
"extra_data": None "extra_data": None,
"custom_display_name": custom_display_name,
"custom_avatar_url": custom_avatar_url
} }
return m_data, new_users return m_data, new_users

View file

@ -329,8 +329,13 @@ class OperationPane(Container):
for pne in self.query("#op_target_pane"): pne.display = False for pne in self.query("#op_target_pane"): pne.display = False
enabled = (v.get("discord_token") and v.get("discord_server") and not d_missing) enabled = (v.get("discord_token") and v.get("discord_server") and not d_missing)
for bid in ("#op_backup_msgs", "#op_backup_sync", "#op_autotest"): for btn in self.query("#op_backup_msgs"):
for btn in self.query(bid): btn.disabled = not enabled btn.disabled = not enabled
for btn in self.query("#op_backup_sync"):
btn.display = self.has_backup
btn.disabled = not (enabled and self.has_backup)
for btn in self.query("#op_autotest"):
btn.disabled = not enabled
for btn in self.query("#op_backup_stats"): for btn in self.query("#op_backup_stats"):
btn.display = self.has_backup btn.display = self.has_backup
@ -2348,6 +2353,9 @@ class OperationPane(Container):
modal_confirm.dismiss() modal_confirm.dismiss()
return return
modal_confirm.cancel_callback = lambda: setattr(self.exporter, "is_running", False)
modal_confirm.phase_progress()
await self._logic_full_backup( await self._logic_full_backup(
modal=modal_confirm, modal=modal_confirm,
selected_channels=selected_channels, selected_channels=selected_channels,
@ -2376,6 +2384,10 @@ class OperationPane(Container):
await self.exporter.export_roles() await self.exporter.export_roles()
await self.exporter.export_assets() await self.exporter.export_assets()
# Pre-fetch all members once for role resolution during message export
modal.set_status("Pre-fetching server members...")
await self.exporter.prefetch_members()
# 2. Channel Messages # 2. Channel Messages
total_chans = len(selected_channels) total_chans = len(selected_channels)
modal.write(f"\n[bold cyan]Backing up {total_chans} channels...[/bold cyan]") modal.write(f"\n[bold cyan]Backing up {total_chans} channels...[/bold cyan]")