From 478c0257546620f817de817a985d0aaa79b7c8a8 Mon Sep 17 00:00:00 2001 From: rambros Date: Mon, 9 Mar 2026 13:32:34 +0530 Subject: [PATCH] preserve discord links to messages & channels in migraions --- src/core/state.py | 18 +++++++++++ src/core/utils.py | 61 +++++++++++++++++++++++++++++++++++ src/fluxer/migrate_message.py | 16 +++++++-- src/stoat/migrate_message.py | 16 +++++++-- 4 files changed, 105 insertions(+), 6 deletions(-) create mode 100644 src/core/utils.py diff --git a/src/core/state.py b/src/core/state.py index 53a49e7..0923180 100644 --- a/src/core/state.py +++ b/src/core/state.py @@ -278,6 +278,24 @@ class MigrationState: if str(target_channel_id) in self.channel_messages: return self.channel_messages[str(target_channel_id)]["message_map"].get(str(discord_id)) return None + + def find_message_mapping(self, discord_id: str) -> tuple[str, str] | tuple[None, None]: + """ + Searches for a message mapping across all tracked channels. + Returns (target_channel_id, target_message_id) or (None, None). + """ + d_id = str(discord_id) + for t_cid, data in self.channel_messages.items(): + # Check main message map + if d_id in data.get("message_map", {}): + return str(t_cid), str(data["message_map"][d_id]) + # Check threads + for t_tid, t_data in data.get("threads", {}).items(): + if d_id in t_data.get("thread_map", {}): + # For thread links, the target_channel_id is technically the thread ID in some contexts, + # but usually for the URL it's the thread ID itself. + return str(t_tid), str(t_data["thread_map"][d_id]) + return None, None def update_last_message_timestamp(self, target_channel_id: str, timestamp: str): self._ensure_channel_tracking(target_channel_id) diff --git a/src/core/utils.py b/src/core/utils.py new file mode 100644 index 0000000..1055cec --- /dev/null +++ b/src/core/utils.py @@ -0,0 +1,61 @@ +import re +import logging +from src.core.state import MigrationState + +logger = logging.getLogger(__name__) + +def resolve_discord_links(content: str, state: MigrationState, platform: str, target_server_id: str) -> str: + """ + Finds Discord message/channel links and resolves them to the target platform + if they have been migrated. + """ + if not content: + return content + + # Regex for Discord links: https://discord.com/channels/{guild}/{channel}/{message} + # Matches: https://discord.com/channels/123/456 or https://discord.com/channels/123/456/789 + discord_link_re = re.compile(r'https?://(?:ptb\.|canary\.)?discord\.com/channels/(\d+)/(\d+)(?:/(\d+))?') + + def replace_link(match): + full_url = match.group(0) + + # Check if already part of a markdown link: [text](link) or [text]() + # We look backwards for ]( or ](< + start_idx = match.start() + if start_idx > 2: + prev_chars = content[max(0, start_idx-3):start_idx] + if prev_chars.endswith("](") or prev_chars.endswith("]<"): + return full_url + + guild_id = match.group(1) + channel_id = match.group(2) + message_id = match.group(3) + + target_cid = state.get_target_channel_id(channel_id) or state.get_target_category_id(channel_id) + + if message_id: + # Message link resolution + t_cid, t_mid = state.find_message_mapping(message_id) + if t_mid: + # Use found channel ID if available, otherwise fallback to channel_id mapping + final_cid = t_cid or target_cid + if final_cid: + if platform == "stoat": + return f"https://stoat.chat/server/{target_server_id}/channel/{final_cid}/{t_mid}" + else: # Fluxer + return f"https://fluxer.app/channels/{target_server_id}/{final_cid}/{t_mid}" + + # Fallback for unmigrated message + return f"[`discord-message`](<{full_url}>)" + else: + # Channel link resolution + if target_cid: + if platform == "stoat": + return f"https://stoat.chat/server/{target_server_id}/channel/{target_cid}" + else: # Fluxer + return f"https://fluxer.app/channels/{target_server_id}/{target_cid}" + + # Fallback for unmigrated channel + return f"[`discord-channel`](<{full_url}>)" + + return discord_link_re.sub(replace_link, content) diff --git a/src/fluxer/migrate_message.py b/src/fluxer/migrate_message.py index 716299b..307b16f 100644 --- a/src/fluxer/migrate_message.py +++ b/src/fluxer/migrate_message.py @@ -4,10 +4,11 @@ import re from typing import Callable, Awaitable, Dict, Any from src.core.base import MigrationContext +from src.core.utils import resolve_discord_links logger = logging.getLogger(__name__) -def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, role_map=None, emoji_map=None, channel_map=None, discord_channel_map=None) -> str: +def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, role_map=None, emoji_map=None, channel_map=None, discord_channel_map=None, state=None, target_server_id=None) -> str: if not content or not guild: return content @@ -85,6 +86,11 @@ def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, content = re.sub(r'<#([0-9]+)>', replace_channel, content) content = re.sub(r'<(a?):([^:]+):([0-9]+)>', replace_emoji, content) content = content.replace("@everyone", "`@everyone`").replace("@here", "`@here`") + + # Resolve Discord Links + if state and target_server_id: + content = resolve_discord_links(content, state, "fluxer", target_server_id) + return content @@ -193,7 +199,9 @@ async def migrate_messages( context.discord_reader.role_map, context.state.emoji_map, context.state.channel_map, - context.discord_reader.channel_name_map + context.discord_reader.channel_name_map, + state=context.state, + target_server_id=context.fluxer_writer.community_id ) # Process attachments @@ -224,7 +232,9 @@ async def migrate_messages( context.discord_reader.role_map, context.state.emoji_map, context.state.channel_map, - context.discord_reader.channel_name_map + context.discord_reader.channel_name_map, + state=context.state, + target_server_id=context.fluxer_writer.community_id ) # Add snapshot attachments to the list to process attachments_to_process.extend(snapshot.attachments) diff --git a/src/stoat/migrate_message.py b/src/stoat/migrate_message.py index 4d535d7..7cd4217 100644 --- a/src/stoat/migrate_message.py +++ b/src/stoat/migrate_message.py @@ -4,10 +4,11 @@ import re from typing import Callable, Awaitable, Dict, Any from src.core.base import MigrationContext +from src.core.utils import resolve_discord_links logger = logging.getLogger(__name__) -def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, role_map=None, emoji_map=None, channel_map=None, discord_channel_map=None) -> str: +def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, role_map=None, emoji_map=None, channel_map=None, discord_channel_map=None, state=None, target_server_id=None) -> str: if not content or not guild: return content @@ -85,6 +86,11 @@ def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, content = re.sub(r'<#([0-9]+)>', replace_channel, content) content = re.sub(r'<(a?):([^:]+):([0-9]+)>', replace_emoji, content) content = content.replace("@everyone", "`@everyone`").replace("@here", "`@here`") + + # Resolve Discord Links + if state and target_server_id: + content = resolve_discord_links(content, state, "stoat", target_server_id) + return content @@ -201,7 +207,9 @@ async def migrate_messages( context.discord_reader.role_map, context.state.emoji_map, context.state.channel_map, - context.discord_reader.channel_name_map + context.discord_reader.channel_name_map, + state=context.state, + target_server_id=context.stoat_writer.community_id ) # Process attachments @@ -228,7 +236,9 @@ async def migrate_messages( context.discord_reader.role_map, context.state.emoji_map, context.state.channel_map, - context.discord_reader.channel_name_map + context.discord_reader.channel_name_map, + state=context.state, + target_server_id=context.stoat_writer.community_id ) attachments_to_process.extend(snapshot.attachments) logger.debug(f"Found forwarded snapshot content: {content[:50]}... and {len(snapshot.attachments)} attachments")