Merge pull request #14 from rambros3d/tupperware

Backup discord masquerade messages
This commit is contained in:
RamBros 2026-04-03 08:32:20 +05:30 committed by GitHub
commit 9f27af971c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 131 additions and 23 deletions

View file

@ -98,9 +98,9 @@ class BackupDatabase:
elif table == "permissions":
conn.execute("CREATE TABLE permissions (id INTEGER PRIMARY KEY AUTOINCREMENT, channel_id INTEGER, target_id INTEGER, target_type TEXT, allow INTEGER, deny INTEGER)")
elif table == "users":
conn.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, username TEXT, display_name TEXT, avatar_file TEXT, avatar_url TEXT, roles TEXT)")
conn.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, username TEXT, display_name TEXT, avatar_file TEXT, avatar_url TEXT, roles TEXT, type INTEGER DEFAULT 0)")
elif table == "messages":
conn.execute("CREATE TABLE messages (id INTEGER PRIMARY KEY, channel_id INTEGER, author_id INTEGER, content TEXT, timestamp TEXT, type INTEGER, message_reference INTEGER, is_pinned INTEGER, extra_data TEXT)")
conn.execute("CREATE TABLE messages (id INTEGER PRIMARY KEY, channel_id INTEGER, author_id INTEGER, content TEXT, timestamp TEXT, type INTEGER, message_reference INTEGER, is_pinned INTEGER, extra_data TEXT, custom_display_name TEXT, custom_avatar_url TEXT)")
elif table == "attachments":
conn.execute("CREATE TABLE attachments (id INTEGER PRIMARY KEY, message_id INTEGER, filename TEXT, size INTEGER, url TEXT, content_type TEXT, local_hash TEXT)")
elif table == "embeds":
@ -124,6 +124,25 @@ class BackupDatabase:
conn.execute(f"INSERT INTO {table} ({col_str}) SELECT {col_str} FROM {table}_old")
conn.execute(f"DROP TABLE {table}_old")
# 3. Custom Author Profile Migration
res = conn.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='messages'").fetchone()
if res and res[0] > 0:
cols = conn.execute("PRAGMA table_info(messages)").fetchall()
col_names = [c["name"] for c in cols]
if "custom_display_name" not in col_names:
logger.info("Migrating messages: adding custom author profile columns")
conn.execute("ALTER TABLE messages ADD COLUMN custom_display_name TEXT")
conn.execute("ALTER TABLE messages ADD COLUMN custom_avatar_url TEXT")
# 4. User Type Categorization Migration
res = conn.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='users'").fetchone()
if res and res[0] > 0:
cols = conn.execute("PRAGMA table_info(users)").fetchall()
col_names = [c["name"] for c in cols]
if "type" not in col_names:
logger.info("Migrating users: adding type column")
conn.execute("ALTER TABLE users ADD COLUMN type INTEGER DEFAULT 0")
conn.commit()
def _init_db(self):
@ -196,7 +215,8 @@ class BackupDatabase:
display_name TEXT,
avatar_file TEXT,
avatar_url TEXT,
roles TEXT
roles TEXT,
type INTEGER DEFAULT 0
)
""")
@ -211,7 +231,9 @@ class BackupDatabase:
type INTEGER,
message_reference INTEGER,
is_pinned INTEGER,
extra_data TEXT
extra_data TEXT,
custom_display_name TEXT,
custom_avatar_url TEXT
)
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_messages_channel ON messages(channel_id)")
@ -405,8 +427,8 @@ class BackupDatabase:
"""Saves users to the author cache."""
with self._lock:
self._conn.executemany("""
INSERT OR REPLACE INTO users (id, username, display_name, avatar_file, avatar_url, roles)
VALUES (:id, :username, :display_name, :avatar_file, :avatar_url, :roles)
INSERT OR REPLACE INTO users (id, username, display_name, avatar_file, avatar_url, roles, type)
VALUES (:id, :username, :display_name, :avatar_file, :avatar_url, :roles, :type)
""", users)
self._conn.commit()
@ -454,8 +476,8 @@ class BackupDatabase:
conn = self._conn
# Insert messages
conn.executemany("""
INSERT OR REPLACE INTO messages (id, channel_id, author_id, content, timestamp, type, message_reference, is_pinned, extra_data)
VALUES (:id, :channel_id, :author_id, :content, :timestamp, :type, :message_reference, :is_pinned, :extra_data)
INSERT OR REPLACE INTO messages (id, channel_id, author_id, content, timestamp, type, message_reference, is_pinned, extra_data, custom_display_name, custom_avatar_url)
VALUES (:id, :channel_id, :author_id, :content, :timestamp, :type, :message_reference, :is_pinned, :extra_data, :custom_display_name, :custom_avatar_url)
""", messages)
# Extract attachments, reactions, and stickers

View file

@ -1339,6 +1339,18 @@ class BackupReader:
user_id = parse_snowflake(msg_data.get("author_id", 0)) or 0
author = self._resolve_author(user_id)
# Check for custom author profile (Webhooks / Masquerade)
over_name = msg_data.get("custom_display_name")
over_avatar = msg_data.get("custom_avatar_url")
if over_name:
# Create an ephemeral author object for this message
author = BackupMember({
"id": str(user_id),
"username": over_name,
"display_name": over_name,
"avatar_url": over_avatar
})
self._ensure_media_pool_loaded()
channel_id = parse_snowflake(msg_data["channel_id"])

View file

@ -18,6 +18,7 @@ class DiscordExporter:
self.server_name = ""
self.server_id = ""
self.user_cache = {}
self.member_cache: Dict[int, Any] = {} # Pre-fetched member objects (id -> Member)
self.base_dir = Path(base_dir) if base_dir else Path(".")
self.is_running = True
self.db: Optional[BackupDatabase] = None
@ -62,6 +63,20 @@ class DiscordExporter:
hash_sha256.update(chunk)
return hash_sha256.hexdigest()
async def prefetch_members(self):
"""Pre-fetches all guild members into a local cache for role resolution.
msg.author is a discord.User (no roles). This cache allows us to
resolve roles without an API call per message during message export.
"""
try:
members = await self.reader.get_members()
self.member_cache = {m.id: m for m in members}
logger.info(f"Pre-fetched {len(self.member_cache)} members for role resolution.")
except Exception as e:
logger.warning(f"Could not pre-fetch members (roles will be empty): {e}")
self.member_cache = {}
async def export_metadata(self):
"""Saves server metadata to the SQLite database."""
metadata = await self.reader.get_server_metadata()
@ -439,37 +454,65 @@ class DiscordExporter:
return accumulated_count, accumulated_threads, accumulated_files
async def _format_user(self, user):
async def _format_user(self, user, is_webhook=False):
"""Formats user data for the author or a mention.
Avatar downloads are intentionally deferred to keep this off the hot
message-formatting path. Call _flush_pending_avatars() after each batch.
For Webhooks, we use a generic name and the default Discord avatar system
for the base profile in the user cache.
"""
user_id = str(user.id)
user_id_int = int(user.id)
user_id = str(user_id_int)
if user_id in self.user_cache:
return None
username = user.name
display_name = getattr(user, "display_name", user.name)
avatar = user.avatar
avatar_url = str(user.display_avatar.url) if user.display_avatar else None
if is_webhook:
# For webhooks, we use the ID as the username for technical clarity,
# and the current name as the display name.
username = user_id
display_name = user.name
# Discord default avatar formula: (ID >> 22) % 5
default_index = (user_id_int >> 22) % 5
avatar_url = f"https://cdn.discordapp.com/embed/avatars/{default_index}.png"
avatar = None # Don't download character avatar as the "base" webhook avatar
# New user discovered — schedule avatar download but don't block here
avatar_file = None
if user.avatar:
if avatar:
av_name = f"{user_id}.png"
av_target = self.users_path / av_name
avatar_file = f"users/{av_name}"
if not av_target.exists():
# Queue for deferred download
self._pending_avatars.append((user_id, user.avatar, av_target))
self._pending_avatars.append((user_id, avatar, av_target))
roles = []
if hasattr(user, "roles"):
roles = [str(r.id) for r in user.roles if not r.is_default()]
# Determine user type
# 0: Regular User, 1: Bot, 2: Webhook, 3: System
u_type = 0
if is_webhook:
u_type = 2
elif getattr(user, "system", False):
u_type = 3
elif getattr(user, "bot", False):
u_type = 1
user_data = {
"id": user_id,
"username": user.name,
"display_name": getattr(user, "display_name", user.name),
"username": username,
"display_name": display_name,
"avatar_file": avatar_file,
"avatar_url": str(user.display_avatar.url) if user.avatar else None,
"roles": json.dumps(roles)
"avatar_url": avatar_url,
"roles": json.dumps(roles),
"type": u_type
}
self.user_cache[user_id] = user_data
return user_data
@ -494,13 +537,21 @@ class DiscordExporter:
new_users = []
# 1. Author handling
u_data = await self._format_user(msg.author)
is_webhook = bool(getattr(msg, "webhook_id", None))
author = msg.author
# msg.author is discord.User (no roles). Resolve to Member for role data.
if not is_webhook:
member = self.member_cache.get(msg.author.id)
if member:
author = member
u_data = await self._format_user(author, is_webhook=is_webhook)
if u_data: new_users.append(u_data)
# 1.5 Mentions handling (ensure all mentioned users are saved)
if msg.mentions:
for mention in msg.mentions:
u_ment = await self._format_user(mention)
# Mentions can be Member objects already, so roles work naturally
u_ment = await self._format_user(mention, is_webhook=False)
if u_ment: new_users.append(u_ment)
# 2. Attachments handling (Content-Addressable Storage)
@ -603,6 +654,15 @@ class DiscordExporter:
for s_emb in snapshot.embeds:
embeds.append(s_emb.to_dict())
# 5.6 Author Overrides (Webhooks / Masquerade)
custom_display_name = None
custom_avatar_url = None
# Webhooks or bots with masquerade often use per-message names/avatars
if getattr(msg, "webhook_id", None) or (msg.author and msg.author.bot):
custom_display_name = msg.author.name
custom_avatar_url = str(msg.author.display_avatar.url) if msg.author.display_avatar else None
m_data = {
"id": str(msg.id),
"channel_id": str(msg.channel.id),
@ -616,7 +676,9 @@ class DiscordExporter:
"stickers": stickers,
"embeds": embeds,
"reactions": reactions,
"extra_data": None
"extra_data": None,
"custom_display_name": custom_display_name,
"custom_avatar_url": custom_avatar_url
}
return m_data, new_users

View file

@ -329,8 +329,13 @@ class OperationPane(Container):
for pne in self.query("#op_target_pane"): pne.display = False
enabled = (v.get("discord_token") and v.get("discord_server") and not d_missing)
for bid in ("#op_backup_msgs", "#op_backup_sync", "#op_autotest"):
for btn in self.query(bid): btn.disabled = not enabled
for btn in self.query("#op_backup_msgs"):
btn.disabled = not enabled
for btn in self.query("#op_backup_sync"):
btn.display = self.has_backup
btn.disabled = not (enabled and self.has_backup)
for btn in self.query("#op_autotest"):
btn.disabled = not enabled
for btn in self.query("#op_backup_stats"):
btn.display = self.has_backup
@ -2348,6 +2353,9 @@ class OperationPane(Container):
modal_confirm.dismiss()
return
modal_confirm.cancel_callback = lambda: setattr(self.exporter, "is_running", False)
modal_confirm.phase_progress()
await self._logic_full_backup(
modal=modal_confirm,
selected_channels=selected_channels,
@ -2376,6 +2384,10 @@ class OperationPane(Container):
await self.exporter.export_roles()
await self.exporter.export_assets()
# Pre-fetch all members once for role resolution during message export
modal.set_status("Pre-fetching server members...")
await self.exporter.prefetch_members()
# 2. Channel Messages
total_chans = len(selected_channels)
modal.write(f"\n[bold cyan]Backing up {total_chans} channels...[/bold cyan]")