diff --git a/src/core/backup_database.py b/src/core/backup_database.py index 381cdb7..70d0847 100644 --- a/src/core/backup_database.py +++ b/src/core/backup_database.py @@ -98,9 +98,9 @@ class BackupDatabase: elif table == "permissions": conn.execute("CREATE TABLE permissions (id INTEGER PRIMARY KEY AUTOINCREMENT, channel_id INTEGER, target_id INTEGER, target_type TEXT, allow INTEGER, deny INTEGER)") elif table == "users": - conn.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, username TEXT, display_name TEXT, avatar_file TEXT, avatar_url TEXT, roles TEXT)") + conn.execute("CREATE TABLE users (id INTEGER PRIMARY KEY, username TEXT, display_name TEXT, avatar_file TEXT, avatar_url TEXT, roles TEXT, type INTEGER DEFAULT 0)") elif table == "messages": - conn.execute("CREATE TABLE messages (id INTEGER PRIMARY KEY, channel_id INTEGER, author_id INTEGER, content TEXT, timestamp TEXT, type INTEGER, message_reference INTEGER, is_pinned INTEGER, extra_data TEXT)") + conn.execute("CREATE TABLE messages (id INTEGER PRIMARY KEY, channel_id INTEGER, author_id INTEGER, content TEXT, timestamp TEXT, type INTEGER, message_reference INTEGER, is_pinned INTEGER, extra_data TEXT, custom_display_name TEXT, custom_avatar_url TEXT)") elif table == "attachments": conn.execute("CREATE TABLE attachments (id INTEGER PRIMARY KEY, message_id INTEGER, filename TEXT, size INTEGER, url TEXT, content_type TEXT, local_hash TEXT)") elif table == "embeds": @@ -123,6 +123,25 @@ class BackupDatabase: conn.execute(f"INSERT INTO {table} ({col_str}) SELECT {col_str} FROM {table}_old") conn.execute(f"DROP TABLE {table}_old") + + # 3. Custom Author Profile Migration + res = conn.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='messages'").fetchone() + if res and res[0] > 0: + cols = conn.execute("PRAGMA table_info(messages)").fetchall() + col_names = [c["name"] for c in cols] + if "custom_display_name" not in col_names: + logger.info("Migrating messages: adding custom author profile columns") + conn.execute("ALTER TABLE messages ADD COLUMN custom_display_name TEXT") + conn.execute("ALTER TABLE messages ADD COLUMN custom_avatar_url TEXT") + + # 4. User Type Categorization Migration + res = conn.execute("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='users'").fetchone() + if res and res[0] > 0: + cols = conn.execute("PRAGMA table_info(users)").fetchall() + col_names = [c["name"] for c in cols] + if "type" not in col_names: + logger.info("Migrating users: adding type column") + conn.execute("ALTER TABLE users ADD COLUMN type INTEGER DEFAULT 0") conn.commit() @@ -196,7 +215,8 @@ class BackupDatabase: display_name TEXT, avatar_file TEXT, avatar_url TEXT, - roles TEXT + roles TEXT, + type INTEGER DEFAULT 0 ) """) @@ -211,7 +231,9 @@ class BackupDatabase: type INTEGER, message_reference INTEGER, is_pinned INTEGER, - extra_data TEXT + extra_data TEXT, + custom_display_name TEXT, + custom_avatar_url TEXT ) """) conn.execute("CREATE INDEX IF NOT EXISTS idx_messages_channel ON messages(channel_id)") @@ -405,8 +427,8 @@ class BackupDatabase: """Saves users to the author cache.""" with self._lock: self._conn.executemany(""" - INSERT OR REPLACE INTO users (id, username, display_name, avatar_file, avatar_url, roles) - VALUES (:id, :username, :display_name, :avatar_file, :avatar_url, :roles) + INSERT OR REPLACE INTO users (id, username, display_name, avatar_file, avatar_url, roles, type) + VALUES (:id, :username, :display_name, :avatar_file, :avatar_url, :roles, :type) """, users) self._conn.commit() @@ -454,8 +476,8 @@ class BackupDatabase: conn = self._conn # Insert messages conn.executemany(""" - INSERT OR REPLACE INTO messages (id, channel_id, author_id, content, timestamp, type, message_reference, is_pinned, extra_data) - VALUES (:id, :channel_id, :author_id, :content, :timestamp, :type, :message_reference, :is_pinned, :extra_data) + INSERT OR REPLACE INTO messages (id, channel_id, author_id, content, timestamp, type, message_reference, is_pinned, extra_data, custom_display_name, custom_avatar_url) + VALUES (:id, :channel_id, :author_id, :content, :timestamp, :type, :message_reference, :is_pinned, :extra_data, :custom_display_name, :custom_avatar_url) """, messages) # Extract attachments, reactions, and stickers diff --git a/src/core/backup_reader.py b/src/core/backup_reader.py index aa8f66c..d4c6b36 100644 --- a/src/core/backup_reader.py +++ b/src/core/backup_reader.py @@ -1339,6 +1339,18 @@ class BackupReader: user_id = parse_snowflake(msg_data.get("author_id", 0)) or 0 author = self._resolve_author(user_id) + # Check for custom author profile (Webhooks / Masquerade) + over_name = msg_data.get("custom_display_name") + over_avatar = msg_data.get("custom_avatar_url") + if over_name: + # Create an ephemeral author object for this message + author = BackupMember({ + "id": str(user_id), + "username": over_name, + "display_name": over_name, + "avatar_url": over_avatar + }) + self._ensure_media_pool_loaded() channel_id = parse_snowflake(msg_data["channel_id"]) diff --git a/src/core/exporter.py b/src/core/exporter.py index 7ad8926..c62cc28 100644 --- a/src/core/exporter.py +++ b/src/core/exporter.py @@ -439,37 +439,61 @@ class DiscordExporter: return accumulated_count, accumulated_threads, accumulated_files - async def _format_user(self, user): + async def _format_user(self, user, is_webhook=False): """Formats user data for the author or a mention. - Avatar downloads are intentionally deferred to keep this off the hot - message-formatting path. Call _flush_pending_avatars() after each batch. + For Webhooks, we use a generic name and the default Discord avatar system + for the base profile in the user cache. """ - user_id = str(user.id) + user_id_int = int(user.id) + user_id = str(user_id_int) + if user_id in self.user_cache: return None + username = user.name + display_name = getattr(user, "display_name", user.name) + avatar = user.avatar + avatar_url = str(user.display_avatar.url) if user.display_avatar else None + + if is_webhook: + # For webhooks, we use the ID as the username for technical clarity, + # and the current name as the display name. + username = user_id + display_name = user.name + # Discord default avatar formula: (ID >> 22) % 5 + default_index = (user_id_int >> 22) % 5 + avatar_url = f"https://cdn.discordapp.com/embed/avatars/{default_index}.png" + avatar = None # Don't download character avatar as the "base" webhook avatar + # New user discovered — schedule avatar download but don't block here avatar_file = None - if user.avatar: + if avatar: av_name = f"{user_id}.png" av_target = self.users_path / av_name avatar_file = f"users/{av_name}" if not av_target.exists(): # Queue for deferred download - self._pending_avatars.append((user_id, user.avatar, av_target)) + self._pending_avatars.append((user_id, avatar, av_target)) - roles = [] - if hasattr(user, "roles"): - roles = [str(r.id) for r in user.roles if not r.is_default()] + # Determine user type + # 0: Regular User, 1: Bot, 2: Webhook, 3: System + u_type = 0 + if is_webhook: + u_type = 2 + elif getattr(user, "system", False): + u_type = 3 + elif getattr(user, "bot", False): + u_type = 1 user_data = { "id": user_id, - "username": user.name, - "display_name": getattr(user, "display_name", user.name), + "username": username, + "display_name": display_name, "avatar_file": avatar_file, - "avatar_url": str(user.display_avatar.url) if user.avatar else None, - "roles": json.dumps(roles) + "avatar_url": avatar_url, + "roles": json.dumps(roles), + "type": u_type } self.user_cache[user_id] = user_data return user_data @@ -494,13 +518,14 @@ class DiscordExporter: new_users = [] # 1. Author handling - u_data = await self._format_user(msg.author) + is_webhook = bool(getattr(msg, "webhook_id", None)) + u_data = await self._format_user(msg.author, is_webhook=is_webhook) if u_data: new_users.append(u_data) # 1.5 Mentions handling (ensure all mentioned users are saved) if msg.mentions: for mention in msg.mentions: - u_ment = await self._format_user(mention) + u_ment = await self._format_user(mention, is_webhook=False) if u_ment: new_users.append(u_ment) # 2. Attachments handling (Content-Addressable Storage) @@ -603,6 +628,15 @@ class DiscordExporter: for s_emb in snapshot.embeds: embeds.append(s_emb.to_dict()) + # 5.6 Author Overrides (Webhooks / Masquerade) + custom_display_name = None + custom_avatar_url = None + + # Webhooks or bots with masquerade often use per-message names/avatars + if getattr(msg, "webhook_id", None) or (msg.author and msg.author.bot): + custom_display_name = msg.author.name + custom_avatar_url = str(msg.author.display_avatar.url) if msg.author.display_avatar else None + m_data = { "id": str(msg.id), "channel_id": str(msg.channel.id), @@ -616,7 +650,9 @@ class DiscordExporter: "stickers": stickers, "embeds": embeds, "reactions": reactions, - "extra_data": None + "extra_data": None, + "custom_display_name": custom_display_name, + "custom_avatar_url": custom_avatar_url } return m_data, new_users