fix or get bug

fix ui
new ui
2024-12-22 18:27:15 +08:00 · 2024-12-22 17:45:19 +08:00 · 2024-12-22 05:34:21 +08:00 · 2024-12-20 19:50:47 +08:00
3 changed files with 144 additions and 71 deletions
--- a/pixiv_download.py
+++ b/pixiv_download.py
@ -4,6 +4,7 @@ import re
 from typing import Optional, Union
 import requests
 from rich.progress import Progress
+from rich.console import Console

 from config import PIXIV_CONFIG
 from redis_client import RedisClient
@ -11,19 +12,23 @@ from redis_client import RedisClient
 class PixivDownloader:
    """处理Pixiv图片下载"""
    
-    def __init__(self, headers: dict, progress: Progress):
+    def __init__(self, spider, headers: dict, progress: Progress):
        """
        初始化下载器
        
        参数:
+            spider: PixivSpider实例，用于日志更新
            headers: 带cookie的请求头
            progress: Rich进度条实例
        """
+        self.spider = spider
        self.headers = headers
        self.progress = progress
        self.redis = RedisClient()
+        # 用于追踪下载状态
+        self.work_status = {}  # 记录每个作品的下载状态

-    def download_image(self, url: str) -> bool:
+    def download_image(self, url: str, work_id: str = None) -> bool:
        """
        下载单张图片
        
@ -41,8 +46,13 @@ class PixivDownloader:
        illust_id, page_num, extension = match.groups()
        file_name = f"{illust_id}_p{page_num}.{extension}"
        
-        # 检查是否已下载
-        if self.redis.is_image_downloaded(illust_id, page_num):
+        # 检查文件是否已存在
+        file_path = f'./img/{file_name}'
+        if os.path.exists(file_path):
+            self.spider._update_log(f"[green]{file_name} 已存在！[/green]")
+            # 确保Redis状态同步
+            if not self.redis.is_image_downloaded(illust_id, page_num):
+                self.redis.mark_image_downloaded(illust_id, page_num)
            return True
            
        # 确保下载目录存在
@ -63,8 +73,9 @@ class PixivDownloader:
                    with open(f'./img/{file_name}', 'wb') as fp:
                        fp.write(response.content)
                    
-                    # 更新Redis记录
+                    # 更新Redis记录并显示下载成功信息
                    self.redis.mark_image_downloaded(illust_id, page_num)
+                    self.spider._update_log(f"[bold white]{file_name} 已下载！[/bold white]")
                    
                    # 更新总页数
                    total_pages = self.redis.get_total_pages(illust_id)
@ -81,9 +92,12 @@ class PixivDownloader:
                    
                    return True
                    
-            except requests.RequestException:
+            except requests.RequestException as e:
                if attempt == 2:  # 最后一次尝试失败
+                    error_msg = f"[red]下载失败(PID:{work_id}): {str(e)}[/red]"
+                    self.spider._update_log(error_msg)
                    return False
+                self.spider._update_log(f"[yellow]重试下载(PID:{work_id}): 第{attempt + 1}次[/yellow]")
                continue
                
        return False
@ -100,6 +114,9 @@ class PixivDownloader:
        """
        # 跳过已完成的作品
        if self.redis.is_work_complete(work_id):
+            if work_id not in self.work_status:
+                self.spider._update_log(f"[green]作品(PID:{work_id})已完成下载[/green]")
+                self.work_status[work_id] = "complete"
            return True
            
        try:
@ -118,33 +135,44 @@ class PixivDownloader:
            if not images:
                return False
                
-            # 下载每张图片
-            if len(images) > 1:
-                # 多图作品
-                subtask_id = self.progress.add_task(
-                    f"[yellow]PID:{work_id}",
-                    total=len(images)
-                )
-                
-                success = True
-                for image in images:
-                    if 'urls' not in image or 'original' not in image['urls']:
-                        success = False
-                        continue
-                        
-                    if not self.download_image(image['urls']['original']):
-                        success = False
-                        
-                    self.progress.update(subtask_id, advance=1)
+            try:
+                # 下载每张图片
+                if len(images) > 1:
+                    # 多图作品
+                    subtask_id = self.progress.add_task(
+                        f"[yellow]PID:{work_id}",
+                        total=len(images)
+                    )
                    
-                self.progress.remove_task(subtask_id)
-                return success
+                    success = True
+                    for idx, image in enumerate(images):
+                        if 'urls' not in image or 'original' not in image['urls']:
+                            self.spider._update_log(f"[red]图片{idx + 1}URL获取失败(PID:{work_id})[/red]")
+                            success = False
+                            continue
+                            
+                        if not self.download_image(image['urls']['original'], work_id):
+                            self.spider._update_log(f"[red]图片{idx + 1}下载失败(PID:{work_id})[/red]")
+                            success = False
+                        else:
+                            self.spider._update_log(f"[green]图片{idx + 1}/{len(images)}下载完成(PID:{work_id})[/green]")
+                            
+                        self.progress.update(subtask_id, advance=1)
+                        
+                    self.progress.remove_task(subtask_id)
+                    return success
+                    
+                else:
+                    # 单图作品
+                    if 'urls' not in images[0] or 'original' not in images[0]['urls']:
+                        self.spider._update_log(f"[red]URL获取失败(PID:{work_id})[/red]")
+                        return False
+                    return self.download_image(images[0]['urls']['original'], work_id)
+                    
+            except Exception as e:
+                self.spider._update_log(f"[red]作品处理出错(PID:{work_id}): {str(e)}[/red]")
+                return False
                
-            else:
-                # 单图作品
-                if 'urls' not in images[0] or 'original' not in images[0]['urls']:
-                    return False
-                return self.download_image(images[0]['urls']['original'])
-                
-        except (requests.RequestException, KeyError, ValueError):
+        except (requests.RequestException, KeyError, ValueError) as e:
+            self.spider._update_log(f"[red]作品信息获取失败(PID:{work_id}): {str(e)}[/red]")
            return False
--- a/pixiv_spider.py
+++ b/pixiv_spider.py
@ -4,6 +4,7 @@ Pixiv爬虫 - 每日排行榜下载
 """
 from typing import Generator, List, Dict, Any
 import requests
+import time
 from rich.console import Console
 from rich.progress import (
    Progress,
@ -48,57 +49,74 @@ class PixivSpider:
        self.headers = None
        self.current_ranking_data = []
        self.failed_works = []
+        self.log_messages = []
        
    def _setup_ui(self) -> None:
        """设置Rich界面组件"""
        # 创建布局
        self.layout = Layout()
        self.layout.split(
-            Layout(name="PixivSpider", ratio=8),
-            Layout(name="progress", ratio=2)
+            Layout(name="header", size=3),
+            Layout(name="main", size=None),
+            Layout(name="progress", size=3)
        )
        
-        # 创建进度条
+        # 设置标题
+        self.layout["header"].update(
+            Panel("PixivSpider", style="bold magenta", border_style="bright_blue")
+        )
+        
+        # 创建进度条 - 固定在底部
        self.progress = Progress(
+            SpinnerColumn(),
            TextColumn("[bold blue]{task.description}"),
-            BarColumn(bar_width=40),
-            TaskProgressColumn(),
-            TextColumn("{task.fields[speed]}"),
-            console=Console(stderr=True),
+            BarColumn(),
+            TextColumn("{task.percentage:>3.0f}%"),
+            console=self.console,
+            expand=True,
+            transient=False
+        )
+        
+        # 设置进度条任务
+        self.main_task_id = self.progress.add_task(
+            "总体进度",
+            total=self.TOTAL_IMAGES
+        )
+        
+    def _update_log(self, message: str, speed: float = 0.0) -> None:
+        """更新日志信息"""
+        if not self.log_messages or message != self.log_messages[-1]:
+            self.log_messages.insert(0, message)
+            if len(self.log_messages) > 100:
+                self.log_messages = self.log_messages[:100]
+
+        messages = self.log_messages[:10]
+
+        # 清空控制台
+        self.console.clear()
+
+        # 重新渲染布局
+        self.console.print(self.layout)
+
+        # 更新日志面板
+        log_content = "\n".join(messages)
+        log_panel = Panel(
+            log_content,
+            title="下载状态",
+            subtitle=f"显示最新 {len(messages)}/{len(self.log_messages)} 条消息, 速度: {speed:.2f} t/s",
+            border_style="green",
+            padding=(1, 2),
            expand=True
        )
-        
-        # 设置日志面板
-        self.log_messages = []
-        self.main_task_id = self.progress.add_task(
-            "[cyan]总体进度",
-            total=self.TOTAL_IMAGES,
-            speed=""
-        )
-        
-    def _update_log(self, message: str) -> None:
-        """更新日志显示"""
-        self.log_messages.append(message)
-        if len(self.log_messages) > 18:
-            self.log_messages.pop(0)
-        log_group = Group(*self.log_messages)
-        self.layout["PixivSpider"].update(
-            Panel(
-                log_group,
-                title="PixivSpider",
-                title_align="left",
-                border_style="cyan",
-                padding=(0, 1)
-            )
-        )
+        self.layout["main"].update(log_panel)
+
        
    def _setup_session(self) -> None:
        """设置请求会话"""
        cookie = self.redis.get_cookie()
        if not cookie:
-            cookie = input('请输入一个cookie：')
+            cookie = input('请输入Pixiv Cookie ：')
            self.redis.set_cookie(cookie)
-            
        self.headers = PIXIV_CONFIG.headers.copy()
        self.headers['cookie'] = cookie
        
@ -141,20 +159,47 @@ class PixivSpider:
    def run(self) -> None:
        """运行爬虫"""
        self._setup_session()
-        downloader = PixivDownloader(self.headers, self.progress)
+        downloader = PixivDownloader(self, self.headers, self.progress)
        
-        with Live(self.layout, self.console, refresh_per_second=10):
+        with Live(self.layout, refresh_per_second=20, auto_refresh=True, console=self.console):
            self.layout["progress"].update(self.progress)
            self._update_log('[cyan]开始抓取...[/cyan]')
            
            # 处理排行榜页面
+            # 保存开始时间用于计算速度
+            start_time = time.time()
+            last_update_time = start_time
+            completed_works = 0
+            
            for page in range(1, 11):
                try:
                    self.get_ranking_page(page)
                    for work_id in self.process_ranking_data():
                        if not downloader.download_work(work_id):
                            self.failed_works.append(work_id)
-                        self.progress.update(self.main_task_id, advance=1)
+                        else:
+                            # 计算实际速度（作品/秒）
+                            current_time = time.time()
+                            elapsed_time = current_time - start_time
+                            
+                            # 每秒更新一次速度
+                            
+                            # 计算实际速度（作品/秒）
+                            current_time = time.time()
+                            elapsed_time = current_time - start_time
+                            completed_works += 1
+                            # 每次下载图片后更新速度
+                            if elapsed_time > 0:    # 避免除以零错误
+                                speed = completed_works / elapsed_time
+                                self._update_log(f"[cyan]已爬取[/cyan] {completed_works} [cyan]个页面！[/cyan]")
+                            # 更新进度
+                            self.progress.update(
+                                self.main_task_id,
+                                completed=completed_works,
+                                
+                            )
+                            # 更新UI显示
+                            self.layout["progress"].update(self.progress)
                        
                except requests.RequestException as e:
                    self._update_log(f'[red]获取排行榜第{page}页时发生错误：{str(e)}[/red]')
@ -162,6 +207,6 @@ class PixivSpider:
                    
            # 清理失败作品的记录
            for work_id in self.failed_works:
-                self.redis.client.delete(work_id)
+                self.redis.client().delete(work_id)
                
            self._update_log('[green]爬虫运行完成[/green]')
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,4 @@
 redis==5.2.1
 requests==2.32.3
-rich==13.7.1
+rich==13.9.4
 urllib3<2.0.0  # 确保与requests兼容
Author	SHA1	Message	Date
岛风	df1f74a6bf	fix or get bug	2024-12-22 18:27:15 +08:00
岛风	a604050d6c	fix ui	2024-12-22 17:45:19 +08:00
岛风	a6bf93ebbc	new ui	2024-12-22 05:34:21 +08:00
nyaasuki	99a9de90aa	update	2024-12-20 19:50:47 +08:00