diff --git a/src/core/backup_reader.py b/src/core/backup_reader.py index 9b39a95..a5a2829 100644 --- a/src/core/backup_reader.py +++ b/src/core/backup_reader.py @@ -606,14 +606,6 @@ class BackupTag: return f"BackupTag(id={self.id}, name='{self.name}')" -class BackupMessageReference: - """Minimal stand-in for discord.MessageReference.""" - - __slots__ = ("message_id", "channel_id") - - def __init__(self, data: dict): - self.message_id = parse_snowflake(data["messageId"]) - self.channel_id = parse_snowflake(data["channelId"]) class BackupThread: @@ -769,17 +761,30 @@ class BackupMessage: # Reference (replies/forwards) self.reference = None if data.get("message_reference"): - self.reference = type("Ref", (), {"message_id": parse_snowflake(data["message_reference"]), "channel_id": self.channel_id})() - - self.thread = None - self.flags = type("Flags", (), {"value": 0, "forwarded": self.type == MessageType.forward})() + self.reference = BackupMessageReference( + message_id=parse_snowflake(data["message_reference"]), + channel_id=self.channel_id + ) - # snapshots not used in latest refined structure as content is in main message + self.thread = None + self.flags = BackupMessageFlags(forwarded=self.type == MessageType.forward) self.message_snapshots = [] def __repr__(self) -> str: return f"BackupMessage(id={self.id}, author={self.author})" +class BackupMessageReference: + __slots__ = ("message_id", "channel_id") + def __init__(self, message_id: int | None = None, channel_id: int | None = None): + self.message_id = message_id + self.channel_id = channel_id + +class BackupMessageFlags: + __slots__ = ("value", "forwarded") + def __init__(self, value: int = 0, forwarded: bool = False): + self.value = value + self.forwarded = forwarded + class BackupEmbed: """Minimal stand-in for discord.Embed.""" @@ -793,24 +798,70 @@ class BackupEmbed: self.color = data.get("color") self.timestamp = data.get("timestamp") - self.thumbnail = type("Thumbnail", (), {"url": data["thumbnail"]["url"]})() if data.get("thumbnail") and "url" in data["thumbnail"] else None - self.image = type("Image", (), {"url": data["image"]["url"]})() if data.get("image") and "url" in data["image"] else None + self.thumbnail = BackupEmbedThumbnail(data["thumbnail"]["url"]) if data.get("thumbnail") and "url" in data["thumbnail"] else None + self.image = BackupEmbedImage(data["image"]["url"]) if data.get("image") and "url" in data["image"] else None author = data.get("author") - self.author = type("Author", (), { - "name": author.get("name"), - "url": author.get("url"), - "icon_url": author.get("icon_url") - })() if author else None + self.author = BackupEmbedAuthor( + name=author.get("name"), + url=author.get("url"), + icon_url=author.get("icon_url") + ) if author else None footer = data.get("footer") - self.footer = type("Footer", (), { - "text": footer.get("text"), - "icon_url": footer.get("icon_url") - })() if footer else None + self.footer = BackupEmbedFooter( + text=footer.get("text"), + icon_url=footer.get("icon_url") + ) if footer else None self.fields = [BackupEmbedField(f) for f in data.get("fields", [])] + def to_dict(self) -> dict: + result = { + "type": "rich" # Default for bot-sent embeds + } + if self.title: result["title"] = self.title + if self.description: result["description"] = self.description + if self.url: result["url"] = self.url + if self.color: result["color"] = self.color + if self.timestamp: result["timestamp"] = self.timestamp + if self.thumbnail: result["thumbnail"] = {"url": self.thumbnail.url} + if self.image: result["image"] = {"url": self.image.url} + if self.author: + result["author"] = { + "name": self.author.name, + "url": self.author.url, + "icon_url": self.author.icon_url + } + if self.footer: + result["footer"] = { + "text": self.footer.text, + "icon_url": self.footer.icon_url + } + if self.fields: + result["fields"] = [{"name": f.name, "value": f.value, "inline": f.inline} for f in self.fields] + return result + +class BackupEmbedThumbnail: + __slots__ = ("url",) + def __init__(self, url: str): self.url = url + +class BackupEmbedImage: + __slots__ = ("url",) + def __init__(self, url: str): self.url = url + +class BackupEmbedAuthor: + __slots__ = ("name", "url", "icon_url") + def __init__(self, name: str | None = None, url: str | None = None, icon_url: str | None = None): + self.name = name + self.url = url + self.icon_url = icon_url + +class BackupEmbedFooter: + __slots__ = ("text", "icon_url") + def __init__(self, text: str | None = None, icon_url: str | None = None): + self.text = text + self.icon_url = icon_url class BackupEmbedField: """Minimal stand-in for embed fields.""" @@ -916,7 +967,10 @@ class BackupReader: MESSAGE_TYPE_REPLY = MessageType.reply MESSAGE_TYPE_THREAD_STARTER = MessageType.thread_starter_message MESSAGE_TYPE_FORWARD = MessageType.forward # Custom Reaper constant - + MESSAGE_TYPE_CHAT_INPUT_COMMAND = MessageType.chat_input_command + MESSAGE_TYPE_CONTEXT_MENU_COMMAND = MessageType.context_menu_command + MESSAGE_TYPE_POLL_RESULT = MessageType.poll_result + MESSAGE_TYPE_AUTO_MODERATION_ACTION = MessageType.auto_moderation_action Forbidden = BackupForbidden CHANNEL_TYPE_TEXT = ChannelType.text diff --git a/src/core/discord_reader.py b/src/core/discord_reader.py index 07789fa..3735098 100644 --- a/src/core/discord_reader.py +++ b/src/core/discord_reader.py @@ -9,6 +9,11 @@ class DiscordReader: MESSAGE_TYPE_DEFAULT = discord.MessageType.default MESSAGE_TYPE_REPLY = discord.MessageType.reply MESSAGE_TYPE_THREAD_STARTER = discord.MessageType.thread_starter_message + MESSAGE_TYPE_CHAT_INPUT_COMMAND = discord.MessageType.chat_input_command + MESSAGE_TYPE_CONTEXT_MENU_COMMAND = discord.MessageType.context_menu_command + MESSAGE_TYPE_FORWARD = getattr(discord.MessageType, 'forward', 100) + MESSAGE_TYPE_POLL_RESULT = getattr(discord.MessageType, 'poll_result', 46) + MESSAGE_TYPE_AUTO_MODERATION_ACTION = getattr(discord.MessageType, 'auto_moderation_action', 24) # Exceptions Forbidden = discord.Forbidden @@ -295,9 +300,28 @@ class DiscordReader: """Downloads a Discord emoji into memory.""" return await emoji.read() + @staticmethod + def get_sticker_extension(sticker) -> str: + """Determines the correct file extension for a sticker.""" + fmt = getattr(sticker, 'format', None) + if fmt: + # StickerFormatType: png=1, apng=2, lottie=3, gif=4 + val = getattr(fmt, 'value', fmt) + if val == 3: return "json" + if val == 4: return "gif" + if val == 2: return "png" # APNG is often saved as PNG + + # Fallback to URL parsing + url = str(getattr(sticker, 'url', "")) + if ".json" in url: return "json" + if ".gif" in url: return "gif" + if ".webp" in url: return "webp" + return "png" + async def download_sticker(self, sticker: Union[discord.GuildSticker, discord.StickerItem]) -> bytes: """Downloads a Discord sticker into memory.""" - logger.debug(f"Attempting to download sticker: {getattr(sticker, 'name', 'unknown')} (type: {type(sticker)})") + name = getattr(sticker, 'name', 'unknown') + logger.debug(f"Attempting to download sticker: {name} (ID: {sticker.id}, type: {type(sticker)})") # 1. Try directly reading if hasattr(sticker, 'read'): @@ -316,22 +340,36 @@ class DiscordReader: except Exception as e: logger.debug(f"to_sticker fallback failed: {e}") - # 3. Try downloading from URL as last resort + # 3. Try download via reader's session (Robust fallback) url = getattr(sticker, 'url', None) if url: try: - import aiohttp - logger.debug(f"Attempting URL download for sticker from {url}") - async with aiohttp.ClientSession() as session: + # Use the internal session from discord.py if possible (it has proper headers/auth) + session = None + if self.client and hasattr(self.client, 'http') and hasattr(self.client.http, '_HTTPClient__session'): + session = self.client.http._HTTPClient__session + + if session: + logger.debug(f"Attempting download for sticker '{name}' using bot's session from {url}") async with session.get(str(url)) as resp: if resp.status == 200: - return await resp.read() - else: - logger.debug(f"URL download failed with status {resp.status}") + data = await resp.read() + if data: + logger.debug(f"Successfully downloaded sticker '{name}' (size: {len(data)})") + return data + else: + # Generic fallback session + import aiohttp + logger.debug(f"Attempting download for sticker '{name}' using generic session from {url}") + async with aiohttp.ClientSession() as session: + async with session.get(str(url)) as resp: + if resp.status == 200: + data = await resp.read() + if data: return data except Exception as e: - logger.debug(f"URL download failed for sticker: {e}") + logger.debug(f"URL download failed for sticker '{name}': {e}") - logger.warning(f"Failed to download sticker {getattr(sticker, 'name', 'unknown')} after all attempts") + logger.warning(f"Failed to download sticker {name} ({sticker.id}) after all attempts") return b"" async def download_attachment(self, attachment: discord.Attachment) -> bytes: diff --git a/src/core/exporter.py b/src/core/exporter.py index db64b1f..2c8b837 100644 --- a/src/core/exporter.py +++ b/src/core/exporter.py @@ -21,6 +21,7 @@ class DiscordExporter: self.base_dir = Path(base_dir) if base_dir else Path(".") self.is_running = True self.db: Optional[BackupDatabase] = None + self.sticker_cache: Dict[int, bytes] = {} # Deduplicate downloads in one session async def setup(self): """Prepares the output directory and fetches server metadata.""" @@ -177,19 +178,16 @@ class DiscordExporter: sticker_data = [] logger.info(f"Exporting {len(stickers)} stickers...") for s in stickers: - ext = "png" - if s.url: - if ".json" in str(s.url): ext = "json" - elif ".gif" in str(s.url): ext = "gif" - elif ".webp" in str(s.url): ext = "webp" - + ext = self.reader.get_sticker_extension(s) filename = f"sticker_{s.id}.{ext}" sticker_path = self.assets_path / filename try: if not sticker_path.exists(): data = await self.reader.download_sticker(s) - with open(sticker_path, "wb") as f: - f.write(data) + if data: + with open(sticker_path, "wb") as f: + f.write(data) + mime_type = "image/png" if ext == "json": mime_type = "application/json" elif ext == "gif": mime_type = "image/gif" @@ -197,14 +195,14 @@ class DiscordExporter: sticker_data.append({ "id": str(s.id), - "name": s.name, + "name": getattr(s, "name", "unknown"), "type": "sticker", "filename": filename, "url": str(s.url), "mime_type": mime_type }) except Exception as ex: - logger.error(f"Failed to download sticker {s.name}: {ex}") + logger.error(f"Failed to download sticker {getattr(s, 'name', 'unknown')}: {ex}") # Save to database if self.db: @@ -482,30 +480,29 @@ class DiscordExporter: stickers = [] if msg.stickers: for st in msg.stickers: - # Determine extension based on format - ext = ".png" - if hasattr(st, "format"): - try: - from discord import StickerFormatType - if st.format == StickerFormatType.lottie: - ext = ".json" - elif st.format == StickerFormatType.apng: - ext = ".png" - elif st.format == StickerFormatType.gif: - ext = ".gif" - except ImportError: - pass + # Deduplicate downloads for the same sticker in one session + if st.id in self.sticker_cache: + st_bytes = self.sticker_cache[st.id] + else: + st_bytes = await self.reader.download_sticker(st) + if st_bytes: + self.sticker_cache[st.id] = st_bytes - st_data = await self._process_media( - media_id=st.id, - url=st.url, - filename=f"{st.name}{ext}", - content_type=f"image/{ext[1:]}" if ext != ".json" else "application/json", - save_method=st.save - ) - if st_data: - st_data["format_type"] = int(st.format.value) if hasattr(st, "format") and hasattr(st.format, "value") else 1 - stickers.append(st_data) + if st_bytes: + ext = self.reader.get_sticker_extension(st) + st_data = await self._process_media( + media_id=st.id, + url=st.url, + filename=f"{st.name}.{ext}", + content_type=f"image/{ext}" if ext != "json" else "application/json", + data=st_bytes + ) + if st_data: + st_data["name"] = st.name + st_data["format_type"] = int(st.format.value) if hasattr(st, "format") and hasattr(st.format, "value") else 1 + stickers.append(st_data) + else: + logger.warning(f"Could not download message sticker {st.id} in message {msg.id}") # 3. Embeds embeds = [] @@ -576,7 +573,7 @@ class DiscordExporter: return m_data, user_data - async def _process_media(self, media_id, url, filename, size=None, content_type=None, save_method=None): + async def _process_media(self, media_id, url, filename, size=None, content_type=None, save_method=None, data=None): """Downloads and deduplicates any media (attachment or sticker) using SHA-256 (CAS).""" # 1. First check by URL in DB if self.db: @@ -594,14 +591,23 @@ class DiscordExporter: # 2. Temporary download to calculate hash import tempfile import shutil - with tempfile.NamedTemporaryFile(delete=False) as tmp: - tmp_path = Path(tmp.name) - try: - if save_method: + tmp_path = None + try: + with tempfile.NamedTemporaryFile(delete=False) as tmp: + tmp_path = Path(tmp.name) + if data: + tmp.write(data) + elif save_method: + # Closing handle before save_method just in case it needs to open it's own handle + tmp.close() await save_method(tmp_path) else: return None + # Ensure it's closed before hashing + try: tmp.close() + except: pass + file_hash = self._calculate_sha256(tmp_path) actual_size = tmp_path.stat().st_size @@ -637,10 +643,10 @@ class DiscordExporter: "content_type": content_type, "local_hash": file_hash } - except Exception as e: - logger.error(f"Failed to process media {filename}: {e}") - if tmp_path.exists(): tmp_path.unlink() - return None + except Exception as e: + logger.error(f"Failed to process media {filename}: {e}") + if tmp_path and tmp_path.exists(): tmp_path.unlink() + return None async def export_threads(self, channel_id: int, progress_callback=None, force=False, accumulated_count=0, accumulated_threads=0, accumulated_files=0, after_id: int | None = None): """Exports active and archived threads for a channel to SQLite.""" diff --git a/src/core/utils.py b/src/core/utils.py index 1055cec..bf94677 100644 --- a/src/core/utils.py +++ b/src/core/utils.py @@ -58,4 +58,8 @@ def resolve_discord_links(content: str, state: MigrationState, platform: str, ta # Fallback for unmigrated channel return f"[`discord-channel`](<{full_url}>)" - return discord_link_re.sub(replace_link, content) + logger.debug(f"resolve_discord_links: Processing content (len {len(content)}): {content[:100]!r}") + result = discord_link_re.sub(replace_link, content) + if result != content: + logger.debug(f"resolve_discord_links: Content resolved to (len {len(result)}): {result[:100]!r}") + return result diff --git a/src/fluxer/migrate_message.py b/src/fluxer/migrate_message.py index cb5bc2d..24e3377 100644 --- a/src/fluxer/migrate_message.py +++ b/src/fluxer/migrate_message.py @@ -18,6 +18,8 @@ from src.core.utils import resolve_discord_links logger = logging.getLogger(__name__) def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, emoji_map=None, channel_map=None, state=None, target_server_id=None) -> str: + if content is None: + return "" if not content or not guild: return content @@ -159,12 +161,18 @@ async def analyze_migration(context: MigrationContext, source_channel_id: int, a context.discord_reader.MESSAGE_TYPE_DEFAULT, context.discord_reader.MESSAGE_TYPE_REPLY, context.discord_reader.MESSAGE_TYPE_THREAD_STARTER, - context.discord_reader.MESSAGE_TYPE_FORWARD + context.discord_reader.MESSAGE_TYPE_FORWARD, + context.discord_reader.MESSAGE_TYPE_CHAT_INPUT_COMMAND, + context.discord_reader.MESSAGE_TYPE_CONTEXT_MENU_COMMAND, + context.discord_reader.MESSAGE_TYPE_POLL_RESULT, + context.discord_reader.MESSAGE_TYPE_AUTO_MODERATION_ACTION ]: + logger.debug(f"Skipping message {msg.id} in analyze: type={msg.type} (not an allowed type)") continue stats["messages"] += 1 stats["attachments"] += len(msg.attachments) + logger.debug(f"Analyze msg {msg.id}: type={msg.type}, content={msg.content[:50]!r}...") if progress_callback and stats["messages"] % 10 == 0: await progress_callback(stats) @@ -225,11 +233,16 @@ async def migrate_messages( # Skip system messages like "pinned a message", etc. + logger.debug(f"Analyzing message {msg.id}: type={msg.type}, content_len={len(msg.content) if msg.content else 0}, attachments={len(msg.attachments)}, embeds={len(msg.embeds)}") if msg.type not in [ context.discord_reader.MESSAGE_TYPE_DEFAULT, context.discord_reader.MESSAGE_TYPE_REPLY, context.discord_reader.MESSAGE_TYPE_THREAD_STARTER, - context.discord_reader.MESSAGE_TYPE_FORWARD + context.discord_reader.MESSAGE_TYPE_FORWARD, + context.discord_reader.MESSAGE_TYPE_CHAT_INPUT_COMMAND, + context.discord_reader.MESSAGE_TYPE_CONTEXT_MENU_COMMAND, + context.discord_reader.MESSAGE_TYPE_POLL_RESULT, + context.discord_reader.MESSAGE_TYPE_AUTO_MODERATION_ACTION ]: # If we are skipping the parent, we STILL need to check for a thread! if hasattr(msg, 'thread') and msg.thread: @@ -263,7 +276,6 @@ async def migrate_messages( await progress_callback(stats) continue else: - # Use custom clean_mentions with msg mentions for accuracy # Use custom clean_mentions with msg mentions for accuracy content = clean_mentions( msg.content, @@ -275,6 +287,7 @@ async def migrate_messages( state=context.state, target_server_id=context.fluxer_writer.community_id ) + logger.debug(f"Message {msg.id} cleaned content length: {len(content) if content else 0}") # Process attachments files = [] @@ -343,21 +356,31 @@ async def migrate_messages( if ext == 'lottie': if HAS_LOTTIE: try: - logger.debug(f"Converting Lottie sticker {s.name} to WebP...") + logger.debug(f"Converting Lottie sticker {s.name} (ID: {s.id}) to WebP...") lottie_data = json.loads(sticker_data) - animation = Animation.load(lottie_data) - gif_buf = io.BytesIO() - export_gif(animation, gif_buf) - gif_buf.seek(0) + + def _convert_lottie(data): + anim = Animation.load(data) + buf = io.BytesIO() + export_gif(anim, buf) + buf.seek(0) + return buf + + gif_buf = await asyncio.to_thread(_convert_lottie, lottie_data) + # GIF → WebP via Pillow from PIL import Image - img = Image.open(gif_buf) - webp_buf = io.BytesIO() - if getattr(img, 'n_frames', 1) > 1: - img.save(webp_buf, format='WEBP', save_all=True, loop=0) - else: - img.save(webp_buf, format='WEBP') - sticker_data = webp_buf.getvalue() + + def _convert_gif_to_webp(buf): + img = Image.open(buf) + w_buf = io.BytesIO() + if getattr(img, 'n_frames', 1) > 1: + img.save(w_buf, format='WEBP', save_all=True, loop=0, quality=80) + else: + img.save(w_buf, format='WEBP', quality=80) + return w_buf.getvalue() + + sticker_data = await asyncio.to_thread(_convert_gif_to_webp, gif_buf) ext = 'webp' logger.debug(f"Successfully converted Lottie sticker {s.name} to WebP") except Exception as conv_err: @@ -367,18 +390,21 @@ async def migrate_messages( logger.warning(f"Lottie library not available, sending sticker {s.name} as raw JSON") ext = 'json' - # APNG / GIF → WebP (via Pillow) elif ext in ('apng', 'gif'): try: - logger.debug(f"Converting {ext.upper()} sticker {s.name} to WebP...") + logger.debug(f"Converting {ext.upper()} sticker {s.name} (ID: {s.id}) to WebP...") from PIL import Image - img = Image.open(io.BytesIO(sticker_data)) - webp_buf = io.BytesIO() - if getattr(img, 'n_frames', 1) > 1: - img.save(webp_buf, format='WEBP', save_all=True, loop=0) - else: - img.save(webp_buf, format='WEBP') - sticker_data = webp_buf.getvalue() + + def _process_animated_sticker(data): + img = Image.open(io.BytesIO(data)) + webp_buf = io.BytesIO() + if getattr(img, 'n_frames', 1) > 1: + img.save(webp_buf, format='WEBP', save_all=True, loop=0, quality=80) + else: + img.save(webp_buf, format='WEBP', quality=80) + return webp_buf.getvalue() + + sticker_data = await asyncio.to_thread(_process_animated_sticker, sticker_data) ext = 'webp' logger.debug(f"Successfully converted sticker {s.name} to WebP") except Exception as conv_err: @@ -386,9 +412,10 @@ async def migrate_messages( # Keep original format as fallback filename = f"sticker_{s.name}_{s.id}.{ext}" + sticker_size = len(sticker_data) files.append({"filename": filename, "data": sticker_data}) stats["attachments"] += 1 - logger.debug(f"Added sticker {s.name} as attachment (extension: {ext})") + logger.debug(f"Added sticker {s.name} as attachment (extension: {ext}, size: {sticker_size} bytes)") except Exception as e: logger.error(f"Failed to download sticker {getattr(s, 'name', 'unknown')}: {e}") @@ -414,6 +441,7 @@ async def migrate_messages( if avatar_url and not avatar_url.startswith("http"): avatar_url = None + logger.debug(f"Fluxer: Calling send_message for Discord ID {msg.id}") fluxer_msg_id = await context.fluxer_writer.send_message( channel_id=target_channel_id, author_name=msg.author.display_name, @@ -425,12 +453,14 @@ async def migrate_messages( is_forwarded=is_forwarded, embeds=msg.embeds ) - + if fluxer_msg_id: if thread_id: context.state.set_thread_message_mapping(target_channel_id, thread_id, str(msg.id), fluxer_msg_id) else: context.state.set_message_mapping(target_channel_id, str(msg.id), fluxer_msg_id) + else: + logger.warning(f"Fluxer: send_message returned None for Discord ID {msg.id} (message might have been skipped or timed out)") if thread_id: context.state.update_thread_last_message_timestamp(target_channel_id, thread_id, str(msg.created_at)) @@ -481,6 +511,7 @@ async def migrate_messages( if progress_callback: await progress_callback(stats) + logger.debug(f"Fluxer: Finished processing message Discord ID {msg.id}") except Exception as e: logger.error(f"Failed to process message {msg.id}: {e}") import traceback diff --git a/src/fluxer/writer.py b/src/fluxer/writer.py index 52217b9..58603f4 100644 --- a/src/fluxer/writer.py +++ b/src/fluxer/writer.py @@ -265,16 +265,21 @@ class FluxerWriter: Returns the ID of the sent message if available. """ assert self.client is not None + logger.debug(f"Fluxer: send_message called for channel {channel_id}, author='{author_name}', content_len={len(content) if content else 0}, files={len(files) if files else 0}, is_forwarded={is_forwarded}") # Ensure we are ready before sending (wait a bit if needed) if not self._ready_event.is_set(): + logger.debug(f"Fluxer: Bot not ready, waiting...") try: await asyncio.wait_for(self._ready_event.wait(), timeout=5.0) except asyncio.TimeoutError: + logger.warning(f"Fluxer: Timeout waiting for bot readiness.") pass # Use webhook for avatar/username spoofing + logger.debug(f"Fluxer: Resolving webhook for channel {channel_id}...") webhook = await self._get_or_create_webhook(channel_id) + logger.debug(f"Fluxer: Webhook resolved: {webhook.id if webhook else 'None'}") # Prepare content with subtext timestamp # -# is Fluxer/Discord's subtext markdown: small, muted grey text @@ -287,6 +292,7 @@ class FluxerWriter: display_content = f">>> {content}" final_content = prefix + display_content if display_content else prefix + logger.debug(f"Fluxer: Prepared final_content (len {len(final_content)}): {final_content!r}") # Convert files to fluxer.File objects fluxer_files = None @@ -298,24 +304,42 @@ class FluxerWriter: if embeds: normalized_embeds = [] for e in embeds: - if hasattr(e, "to_dict"): - normalized_embeds.append(e.to_dict()) - else: - normalized_embeds.append(e) + d = e.to_dict() if hasattr(e, "to_dict") else e + if not isinstance(d, dict): + continue + + # Heuristic: Skip redundant link previews to avoid "Invalid Embed" errors or duplication. + # If an embed has a URL that is already in the message content, and no complex fields, skip it. + if content and d.get("url") and str(d.get("url")) in content: + if not d.get("fields") and not d.get("description") and not d.get("title"): + logger.debug(f"Fluxer: Skipping redundant link preview embed for {d.get('url')}") + continue + + normalized_embeds.append(d) + if not normalized_embeds: normalized_embeds = None try: # Current limitation: fluxer.py execute_webhook doesn't support 'message_reference' yet. # So if we have a reply, we MUST use the bot's direct send method. if webhook and not reply_to_message_id: - msg = await webhook.send( - content=final_content, - username=f"{author_name} (discord)", - avatar_url=author_avatar_url, - files=fluxer_files, - embeds=normalized_embeds, - wait=True - ) - return str(msg.id) if msg else None + logger.debug(f"Fluxer: Sending message via webhook {webhook.id} for user '{author_name}'") + try: + msg = await asyncio.wait_for( + webhook.send( + content=final_content, + username=f"{author_name} (discord)", + avatar_url=author_avatar_url, + files=fluxer_files, + embeds=normalized_embeds, + wait=True + ), + timeout=45.0 # Increased timeout for potential large file uploads + ) + logger.debug(f"Fluxer: Webhook send complete, msg_id={msg.id if msg else 'None'}") + return str(msg.id) if msg else None + except asyncio.TimeoutError: + logger.error(f"Fluxer: Webhook send timed out after 45s for channel {channel_id}") + return None else: # Use bot direct message (supports files and message_reference) # We add the author name to the prefix since bot name won't match @@ -330,19 +354,28 @@ class FluxerWriter: if reply_to_message_id: message_reference = {"message_id": str(reply_to_message_id), "channel_id": str(channel_id)} - msg_data = await self.client.send_message( - channel_id=channel_id, - content=final_bot_content, - files=fluxer_files, - embeds=normalized_embeds, - message_reference=message_reference - ) - return str(msg_data["id"]) if msg_data else None + logger.debug(f"Fluxer: Sending message via bot for user '{author_name}'") + try: + msg_data = await asyncio.wait_for( + self.client.send_message( + channel_id=channel_id, + content=final_bot_content, + files=fluxer_files, + embeds=normalized_embeds, + message_reference=message_reference + ), + timeout=45.0 + ) + logger.debug(f"Fluxer: Bot send complete, msg_id={msg_data.get('id') if msg_data else 'None'}") + return str(msg_data["id"]) if msg_data else None + except asyncio.TimeoutError: + logger.error(f"Fluxer: Bot send timed out after 45s for channel {channel_id}") + return None except Exception as e: - err_msg = f"Failed to copy message: {e}" + err_msg = f"Failed to copy message to Fluxer: {e}" if hasattr(e, 'errors') and e.errors: err_msg += f" - Details: {e.errors}" - print(err_msg) + logger.error(err_msg) return None async def send_marker(self, channel_id: str, content: str, files: list[dict] | None = None, reply_to_message_id: Optional[str] = None) -> Optional[str]: diff --git a/src/stoat/migrate_message.py b/src/stoat/migrate_message.py index 1616ee9..bcdb294 100644 --- a/src/stoat/migrate_message.py +++ b/src/stoat/migrate_message.py @@ -18,6 +18,8 @@ from src.core.utils import resolve_discord_links logger = logging.getLogger(__name__) def clean_mentions(content: str, guild, user_mentions=None, role_mentions=None, emoji_map=None, channel_map=None, state=None, target_server_id=None) -> str: + if content is None: + return "" if not content or not guild: return content @@ -164,8 +166,13 @@ async def analyze_migration(context: MigrationContext, source_channel_id: int, a context.discord_reader.MESSAGE_TYPE_DEFAULT, context.discord_reader.MESSAGE_TYPE_REPLY, context.discord_reader.MESSAGE_TYPE_THREAD_STARTER, - context.discord_reader.MESSAGE_TYPE_FORWARD + context.discord_reader.MESSAGE_TYPE_FORWARD, + context.discord_reader.MESSAGE_TYPE_CHAT_INPUT_COMMAND, + context.discord_reader.MESSAGE_TYPE_CONTEXT_MENU_COMMAND, + context.discord_reader.MESSAGE_TYPE_POLL_RESULT, + context.discord_reader.MESSAGE_TYPE_AUTO_MODERATION_ACTION ]: + logger.debug(f"Skipping message {msg.id} in analyze: type={msg.type} (not an allowed type)") continue stats["messages"] += 1 @@ -230,11 +237,16 @@ async def migrate_messages( # Skip system messages like "pinned a message", etc. content = "" # Initialize content + logger.debug(f"Analyzing message {msg.id}: type={msg.type}, content_len={len(msg.content) if msg.content else 0}, attachments={len(msg.attachments)}, embeds={len(msg.embeds)}") if msg.type not in [ context.discord_reader.MESSAGE_TYPE_DEFAULT, context.discord_reader.MESSAGE_TYPE_REPLY, context.discord_reader.MESSAGE_TYPE_THREAD_STARTER, - context.discord_reader.MESSAGE_TYPE_FORWARD + context.discord_reader.MESSAGE_TYPE_FORWARD, + context.discord_reader.MESSAGE_TYPE_CHAT_INPUT_COMMAND, + context.discord_reader.MESSAGE_TYPE_CONTEXT_MENU_COMMAND, + context.discord_reader.MESSAGE_TYPE_POLL_RESULT, + context.discord_reader.MESSAGE_TYPE_AUTO_MODERATION_ACTION ]: # If we are skipping the parent, we STILL need to check for a thread! if hasattr(msg, 'thread') and msg.thread: @@ -280,6 +292,7 @@ async def migrate_messages( state=context.state, target_server_id=context.stoat_writer.community_id ) + logger.debug(f"Message {msg.id} cleaned content length: {len(content) if content else 0}") # Process attachments files = [] @@ -345,10 +358,14 @@ async def migrate_messages( try: logger.debug(f"Converting Lottie sticker {s.name} to GIF...") lottie_data = json.loads(sticker_data) - animation = Animation.load(lottie_data) - output = io.BytesIO() - export_gif(animation, output) - sticker_data = output.getvalue() + + def _convert_lottie_to_gif(data): + animation = Animation.load(data) + output = io.BytesIO() + export_gif(animation, output) + return output.getvalue() + + sticker_data = await asyncio.to_thread(_convert_lottie_to_gif, lottie_data) ext = 'gif' logger.debug(f"Successfully converted Lottie sticker {s.name} to GIF") except Exception as conv_err: @@ -361,32 +378,33 @@ async def migrate_messages( # APNG → GIF (via Pillow, with proper frame disposal) elif ext == 'apng': try: - logger.debug(f"Converting APNG sticker {s.name} to GIF...") + logger.debug(f"Converting APNG sticker {s.name} (ID: {s.id}) to GIF for Stoat...") from PIL import Image - img = Image.open(io.BytesIO(sticker_data)) - gif_buf = io.BytesIO() - if getattr(img, 'n_frames', 1) > 1: - # Extract each frame onto a clean background to avoid overlap - frames = [] - durations = [] - for frame_idx in range(img.n_frames): - img.seek(frame_idx) - # Create fresh background and composite the frame - canvas = Image.new('RGBA', img.size, (0, 0, 0, 0)) - frame = img.convert('RGBA') - canvas.paste(frame, (0, 0), frame) - # Convert to palette mode for GIF - frames.append(canvas.convert('RGBA')) - durations.append(img.info.get('duration', 100)) - # Save with disposal=2 (clear frame before next) - frames[0].save( - gif_buf, format='GIF', save_all=True, - append_images=frames[1:], loop=0, - duration=durations, disposal=2, transparency=0 - ) - else: - img.save(gif_buf, format='GIF') - sticker_data = gif_buf.getvalue() + + def _convert_apng_to_gif(data): + img = Image.open(io.BytesIO(data)) + gif_buf = io.BytesIO() + if getattr(img, 'n_frames', 1) > 1: + frames = [] + durations = [] + # Create a RGBA canvas for disposal handling + canvas = Image.new('RGBA', img.size, (0,0,0,0)) + for i in range(img.n_frames): + img.seek(i) + frame = img.convert('RGBA') + canvas.paste(frame, (0, 0), frame) + frames.append(canvas.convert('RGBA')) + durations.append(img.info.get('duration', 100)) + frames[0].save( + gif_buf, format='GIF', save_all=True, + append_images=frames[1:], loop=0, + duration=durations, disposal=2, transparency=0 + ) + else: + img.save(gif_buf, format='GIF') + return gif_buf.getvalue() + + sticker_data = await asyncio.to_thread(_convert_apng_to_gif, sticker_data) ext = 'gif' logger.debug(f"Successfully converted APNG sticker {s.name} to GIF") except Exception as conv_err: