Merge pull request #5 from HuntingFighter/error-fetching-content-type-backup-messages

Repeated Image content_type exception
This commit is contained in:
RamBros 2026-03-27 09:27:08 +05:30 committed by GitHub
commit 6d9f724e2d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -367,13 +367,13 @@ class DiscordExporter:
async for msg in self.reader.fetch_message_history(channel_id, after_id=last_id): async for msg in self.reader.fetch_message_history(channel_id, after_id=last_id):
if not self.is_running: break if not self.is_running: break
batch_raw.append(msg) batch_raw.append(msg)
if len(batch_raw) >= BATCH_SIZE: if len(batch_raw) >= BATCH_SIZE:
results = await asyncio.gather(*(self._format_message(m) for m in batch_raw)) results = await asyncio.gather(*(self._format_message(m) for m in batch_raw))
for m_data, u_list in results: for m_data, u_list in results:
batch_messages.append(m_data) batch_messages.append(m_data)
if u_list: batch_users.extend(u_list) if u_list: batch_users.extend(u_list)
new_count += len(batch_messages) new_count += len(batch_messages)
accumulated_count += len(batch_messages) accumulated_count += len(batch_messages)
@ -388,13 +388,13 @@ class DiscordExporter:
if self.db: if self.db:
if batch_users: self.db.save_users(batch_users) if batch_users: self.db.save_users(batch_users)
self.db.save_messages_batch(batch_messages) self.db.save_messages_batch(batch_messages)
if progress_callback: if progress_callback:
last_msg = batch_raw[-1] last_msg = batch_raw[-1]
author_name = getattr(last_msg.author, "display_name", "Unknown") author_name = getattr(last_msg.author, "display_name", "Unknown")
preview = (last_msg.content or "")[:150] preview = (last_msg.content or "")[:150]
await progress_callback(channel_name, accumulated_count, author_name=author_name, message_preview=preview, thread_count=accumulated_threads, file_count=accumulated_files) await progress_callback(channel_name, accumulated_count, author_name=author_name, message_preview=preview, thread_count=accumulated_threads, file_count=accumulated_files)
batch_messages.clear() batch_messages.clear()
batch_users.clear() batch_users.clear()
batch_raw.clear() batch_raw.clear()
@ -492,7 +492,7 @@ class DiscordExporter:
async def _format_message(self, msg): async def _format_message(self, msg):
"""Formats a single message and its author for DB storage.""" """Formats a single message and its author for DB storage."""
new_users = [] new_users = []
# 1. Author handling # 1. Author handling
u_data = await self._format_user(msg.author) u_data = await self._format_user(msg.author)
if u_data: new_users.append(u_data) if u_data: new_users.append(u_data)
@ -631,7 +631,7 @@ class DiscordExporter:
"filename": filename, "filename": filename,
"size": existing["size"], "size": existing["size"],
"url": str(url), "url": str(url),
"content_type": existing["content_type"], "content_type": existing["mime_type"],
"local_hash": existing["hash"] "local_hash": existing["hash"]
} }
@ -654,7 +654,7 @@ class DiscordExporter:
# Ensure it's closed before hashing # Ensure it's closed before hashing
try: tmp.close() try: tmp.close()
except: pass except: pass
# Offload CPU-bound hashing and blocking file ops to the thread pool # Offload CPU-bound hashing and blocking file ops to the thread pool
# so we don't stall concurrent downloads on the event loop. # so we don't stall concurrent downloads on the event loop.
file_hash = await asyncio.to_thread(self._calculate_sha256, tmp_path) file_hash = await asyncio.to_thread(self._calculate_sha256, tmp_path)
@ -678,7 +678,7 @@ class DiscordExporter:
ext = Path(filename).suffix ext = Path(filename).suffix
target_filename = f"{file_hash}{ext}" target_filename = f"{file_hash}{ext}"
target_path = self.attachments_path / target_filename target_path = self.attachments_path / target_filename
await asyncio.to_thread(shutil.move, str(tmp_path), str(target_path)) await asyncio.to_thread(shutil.move, str(tmp_path), str(target_path))
# Mark as successfully moved so finally block doesn't delete it # Mark as successfully moved so finally block doesn't delete it
@ -686,7 +686,7 @@ class DiscordExporter:
if self.db: if self.db:
self.db.add_media_to_pool(file_hash, f"attachments/{target_filename}", actual_size, content_type, str(url)) self.db.add_media_to_pool(file_hash, f"attachments/{target_filename}", actual_size, content_type, str(url))
return { return {
"id": str(media_id), "id": str(media_id),
"filename": filename, "filename": filename,