From d37441dc4b8a7b25be22b0c587e8cae9eb7e2f54 Mon Sep 17 00:00:00 2001 From: rambros Date: Thu, 19 Mar 2026 11:04:42 +0530 Subject: [PATCH] improve fallbacks for message and channel references --- src/core/database.py | 4 +++ src/core/utils.py | 8 +++-- src/fluxer/migrate_message.py | 65 ++++++++++++++++++++++++++++------- src/stoat/migrate_message.py | 64 +++++++++++++++++++++++++++------- 4 files changed, 115 insertions(+), 26 deletions(-) diff --git a/src/core/database.py b/src/core/database.py index 801da00..fdb7256 100644 --- a/src/core/database.py +++ b/src/core/database.py @@ -101,6 +101,10 @@ class MigrationDatabase: ) """) + # Indexes for fast lookup by source message ID + cursor.execute("CREATE INDEX IF NOT EXISTS idx_message_mappings_source ON message_mappings (source_msg_id)") + cursor.execute("CREATE INDEX IF NOT EXISTS idx_thread_mappings_source ON thread_mappings (source_msg_id)") + conn.commit() conn.close() diff --git a/src/core/utils.py b/src/core/utils.py index bf94677..5bb8a62 100644 --- a/src/core/utils.py +++ b/src/core/utils.py @@ -24,7 +24,8 @@ def resolve_discord_links(content: str, state: MigrationState, platform: str, ta start_idx = match.start() if start_idx > 2: prev_chars = content[max(0, start_idx-3):start_idx] - if prev_chars.endswith("](") or prev_chars.endswith("]<"): + if prev_chars.endswith("](") or prev_chars.endswith("](<"): + logger.debug(f"resolve_discord_links: Skipping already-wrapped link: {full_url[:60]}") return full_url guild_id = match.group(1) @@ -32,10 +33,12 @@ def resolve_discord_links(content: str, state: MigrationState, platform: str, ta message_id = match.group(3) target_cid = state.get_target_channel_id(channel_id) or state.get_target_category_id(channel_id) + logger.debug(f"resolve_discord_links: guild={guild_id} channel={channel_id} msg={message_id} target_cid={target_cid}") if message_id: # Message link resolution t_cid, t_mid = state.find_message_mapping(message_id) + logger.debug(f"resolve_discord_links: find_message_mapping({message_id}) -> t_cid={t_cid}, t_mid={t_mid}") if t_mid: # Use found channel ID if available, otherwise fallback to channel_id mapping final_cid = t_cid or target_cid @@ -55,9 +58,10 @@ def resolve_discord_links(content: str, state: MigrationState, platform: str, ta else: # Fluxer return f"https://fluxer.app/channels/{target_server_id}/{target_cid}" - # Fallback for unmigrated channel + # Fallback for unmapped channel return f"[`discord-channel`](<{full_url}>)" + logger.debug(f"resolve_discord_links: Processing content (len {len(content)}): {content[:100]!r}") result = discord_link_re.sub(replace_link, content) if result != content: diff --git a/src/fluxer/migrate_message.py b/src/fluxer/migrate_message.py index a7802d3..00a1ca7 100644 --- a/src/fluxer/migrate_message.py +++ b/src/fluxer/migrate_message.py @@ -17,7 +17,7 @@ from src.core.utils import resolve_discord_links logger = logging.getLogger(__name__) -def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, emoji_map=None, channel_map=None, state=None, target_server_id=None) -> str: +def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, channel_mentions=None, emoji_map=None, channel_map=None, state=None, target_server_id=None, channel_names=None) -> str: if content is None: return "" if not content or not guild: @@ -39,10 +39,16 @@ def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, user = guild.client.get_user(uid) if user: return f"`@{user.name}`" - return match.group(0) + return f"`@Unknown User`" def replace_role(match): rid = int(match.group(1)) + # 0. Try native mapping first + if state: + target_role_id = state.get_target_role_id(str(rid)) + if target_role_id: + return f"<@&{target_role_id}>" + # 1. Try provided guild cache/list role = guild.get_role(rid) or next((r for r in guild.roles if r.id == rid), None) # 2. Try message's role_mentions @@ -58,7 +64,7 @@ def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, if role and role.name: return f"`@{role.name}`" - return match.group(0) + return f"`@Unknown Role`" def replace_channel(match): cid = int(match.group(1)) @@ -67,9 +73,23 @@ def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, if channel_map and str(cid) in channel_map: return f"<#{channel_map[str(cid)]}>" - # 2. Fallback to name in backticks - channel = guild.get_channel(cid) - return f"`#{channel.name}`" if channel else f"<#{cid}>" + # 2. Try to resolve channel name from pre-fetched names + name = None + if channel_names and str(cid) in channel_names: + name = channel_names[str(cid)] + + # 3. Try live lookup (fallback) + if not name: + channel = guild.get_channel(cid) or guild.get_thread(cid) + if not channel and channel_mentions: + channel = next((c for c in channel_mentions if c.id == cid), None) + if channel: + name = channel.name + + if name: + return f"`#{name}`" + + return f"<#{cid}>" def replace_emoji(match): animated = match.group(1) == "a" @@ -96,9 +116,6 @@ def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, return content - return content - - async def get_channel_threads(reader: Any, channel_id: int) -> List[Any]: """Helper to fetch all threads (active and archived) for a channel from Live or Backup.""" threads = [] @@ -233,8 +250,28 @@ async def migrate_messages( logger.info(f"Starting message migration: Discord #{source_channel_id} -> Fluxer #{target_channel_id}") if after_message_id: - logger.info(f"Resuming migration from after message ID: {after_message_id}") + logger.info(f"Starting migration of {source_channel_id} (inclusive={inclusive})...") + + # Pre-fetch channel and thread names for better mention resolution + if not hasattr(context, 'channel_names'): + context.channel_names = {} + try: + logger.debug(f"Pre-fetching channel and thread names for guild {context.discord_reader.guild.id}...") + # fetch_channels usually includes all non-thread channels + all_channels = await context.discord_reader.guild.fetch_channels() + for c in all_channels: + context.channel_names[str(c.id)] = c.name + + # active_threads helps find threads that might be mentioned + threads = await context.discord_reader.guild.active_threads() + for t in threads: + context.channel_names[str(t.id)] = t.name + + logger.debug(f"Pre-fetched {len(context.channel_names)} names.") + except Exception as e: + logger.debug(f"Failed to pre-fetch channel names: {e}") + # Process missed threads first if resuming if processed_threads is None: processed_threads = set() @@ -353,10 +390,12 @@ async def migrate_messages( msg.guild, msg.mentions, msg.role_mentions, + msg.channel_mentions, context.state.emoji_map, context.state.channel_map, state=context.state, - target_server_id=context.fluxer_writer.community_id + target_server_id=context.fluxer_writer.community_id, + channel_names=context.channel_names if hasattr(context, 'channel_names') else None ) logger.debug(f"Message {msg.id} cleaned content length: {len(content) if content else 0}") @@ -385,10 +424,12 @@ async def migrate_messages( msg.guild, snapshot.mentions if hasattr(snapshot, 'mentions') else None, snapshot.role_mentions if hasattr(snapshot, 'role_mentions') else None, + snapshot.channel_mentions if hasattr(snapshot, 'channel_mentions') else None, # Changed this line context.state.emoji_map, context.state.channel_map, state=context.state, - target_server_id=context.fluxer_writer.community_id + target_server_id=context.fluxer_writer.community_id, + channel_names=context.channel_names if hasattr(context, 'channel_names') else None ) # Add snapshot attachments to the list to process attachments_to_process.extend(snapshot.attachments) diff --git a/src/stoat/migrate_message.py b/src/stoat/migrate_message.py index b2f5ea3..2bdd195 100644 --- a/src/stoat/migrate_message.py +++ b/src/stoat/migrate_message.py @@ -17,7 +17,7 @@ from src.core.utils import resolve_discord_links logger = logging.getLogger(__name__) -def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, emoji_map=None, channel_map=None, state=None, target_server_id=None) -> str: +def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, channel_mentions=None, emoji_map=None, channel_map=None, state=None, target_server_id=None, channel_names=None) -> str: if content is None: return "" if not content or not guild: @@ -39,10 +39,16 @@ def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, user = guild.client.get_user(uid) if user: return f"`@{user.name}`" - return match.group(0) + return f"`@Unknown User`" def replace_role(match): rid = int(match.group(1)) + # 0. Try native mapping first + if state: + target_role_id = state.get_target_role_id(str(rid)) + if target_role_id: + return f"<@&{target_role_id}>" + # 1. Try provided guild cache/list role = guild.get_role(rid) or next((r for r in guild.roles if r.id == rid), None) # 2. Try message's role_mentions @@ -58,7 +64,7 @@ def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, if role and role.name: return f"`@{role.name}`" - return match.group(0) + return f"`@Unknown Role`" def replace_channel(match): cid = int(match.group(1)) @@ -67,9 +73,23 @@ def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, if channel_map and str(cid) in channel_map: return f"<#{channel_map[str(cid)]}>" - # 2. Fallback to name in backticks - channel = guild.get_channel(cid) - return f"`#{channel.name}`" if channel else f"<#{cid}>" + # 2. Try to resolve channel name from pre-fetched names + name = None + if channel_names and str(cid) in channel_names: + name = channel_names[str(cid)] + + # 3. Try live lookup (fallback) + if not name: + channel = guild.get_channel(cid) or guild.get_thread(cid) + if not channel and channel_mentions: + channel = next((c for c in channel_mentions if c.id == cid), None) + if channel: + name = channel.name + + if name: + return f"`#{name}`" + + return f"<#{cid}>" def replace_emoji(match): animated = match.group(1) == "a" @@ -78,8 +98,6 @@ def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, if emoji_map and eid in emoji_map: target_eid = emoji_map[eid] - prefix = "a" if animated else "" - #return f"<{prefix}:{name}:{target_eid}>" name not require for stoat return f":{target_eid}:" return f":{name}:" @@ -236,8 +254,26 @@ async def migrate_messages( logger.info(f"Starting message migration: Discord #{source_channel_id} -> Stoat #{target_channel_id}") if after_message_id: - logger.info(f"Resuming migration from after message ID: {after_message_id}") - + logger.info(f"Starting migration of {source_channel_id} (inclusive={inclusive})...") + + # Pre-fetch channel and thread names for better mention resolution + if not hasattr(context, 'channel_names'): + context.channel_names = {} + try: + logger.debug(f"Pre-fetching channel and thread names for guild {context.discord_reader.guild.id}...") + all_channels = await context.discord_reader.guild.fetch_channels() + for c in all_channels: + context.channel_names[str(c.id)] = c.name + + threads = await context.discord_reader.guild.active_threads() + for t in threads: + context.channel_names[str(t.id)] = t.name + + logger.debug(f"Pre-fetched {len(context.channel_names)} names.") + except Exception as e: + logger.debug(f"Failed to pre-fetch channel names: {e}") + + # Process missed threads first if resuming if processed_threads is None: processed_threads = set() @@ -357,10 +393,12 @@ async def migrate_messages( msg.guild, msg.mentions, msg.role_mentions, + msg.channel_mentions, context.state.emoji_map, context.state.channel_map, state=context.state, - target_server_id=context.stoat_writer.community_id + target_server_id=context.stoat_writer.community_id, + channel_names=context.channel_names if hasattr(context, 'channel_names') else None ) logger.debug(f"Message {msg.id} cleaned content length: {len(content) if content else 0}") @@ -385,10 +423,12 @@ async def migrate_messages( msg.guild, snapshot.mentions if hasattr(snapshot, 'mentions') else None, snapshot.role_mentions if hasattr(snapshot, 'role_mentions') else None, + snapshot.channel_mentions if hasattr(snapshot, 'channel_mentions') else None, context.state.emoji_map, context.state.channel_map, state=context.state, - target_server_id=context.stoat_writer.community_id + target_server_id=context.stoat_writer.community_id, + channel_names=context.channel_names if hasattr(context, 'channel_names') else None ) attachments_to_process.extend(snapshot.attachments) logger.debug(f"Found forwarded snapshot content: {content[:50]}... and {len(snapshot.attachments)} attachments")