new ui

2024-12-22 05:34:21 +08:00 · 2024-12-22 05:34:21 +08:00 · a6bf93ebbc
commit a6bf93ebbc
parent 99a9de90aa
2 changed files with 136 additions and 62 deletions
--- a/pixiv_download.py
+++ b/pixiv_download.py
@ -4,6 +4,7 @@ import re
 from typing import Optional, Union
 import requests
 from rich.progress import Progress
+from rich.console import Console

 from config import PIXIV_CONFIG
 from redis_client import RedisClient
@ -11,19 +12,23 @@ from redis_client import RedisClient
 class PixivDownloader:
    """处理Pixiv图片下载"""
    
-    def __init__(self, headers: dict, progress: Progress):
+    def __init__(self, spider, headers: dict, progress: Progress):
        """
        初始化下载器
        
        参数:
+            spider: PixivSpider实例，用于日志更新
            headers: 带cookie的请求头
            progress: Rich进度条实例
        """
+        self.spider = spider
        self.headers = headers
        self.progress = progress
        self.redis = RedisClient()
+        # 用于追踪下载状态
+        self.work_status = {}  # 记录每个作品的下载状态

-    def download_image(self, url: str) -> bool:
+    def download_image(self, url: str, work_id: str = None) -> bool:
        """
        下载单张图片
        
@ -41,8 +46,13 @@ class PixivDownloader:
        illust_id, page_num, extension = match.groups()
        file_name = f"{illust_id}_p{page_num}.{extension}"
        
-        # 检查是否已下载
-        if self.redis.is_image_downloaded(illust_id, page_num):
+        # 检查文件是否已存在
+        file_path = f'./img/{file_name}'
+        if os.path.exists(file_path):
+            self.spider._update_log(f"[green]{file_name} 已存在！[/green]")
+            # 确保Redis状态同步
+            if not self.redis.is_image_downloaded(illust_id, page_num):
+                self.redis.mark_image_downloaded(illust_id, page_num)
            return True
            
        # 确保下载目录存在
@ -63,8 +73,9 @@ class PixivDownloader:
                    with open(f'./img/{file_name}', 'wb') as fp:
                        fp.write(response.content)
                    
-                    # 更新Redis记录
+                    # 更新Redis记录并显示下载成功信息
                    self.redis.mark_image_downloaded(illust_id, page_num)
+                    self.spider._update_log(f"[bold white]{file_name} 已下载！[/bold white]")
                    
                    # 更新总页数
                    total_pages = self.redis.get_total_pages(illust_id)
@ -81,9 +92,12 @@ class PixivDownloader:
                    
                    return True
                    
-            except requests.RequestException:
+            except requests.RequestException as e:
                if attempt == 2:  # 最后一次尝试失败
+                    error_msg = f"[red]下载失败(PID:{work_id}): {str(e)}[/red]"
+                    self.spider._update_log(error_msg)
                    return False
+                self.spider._update_log(f"[yellow]重试下载(PID:{work_id}): 第{attempt + 1}次[/yellow]")
                continue
                
        return False
@ -100,6 +114,9 @@ class PixivDownloader:
        """
        # 跳过已完成的作品
        if self.redis.is_work_complete(work_id):
+            if work_id not in self.work_status:
+                self.spider._update_log(f"[green]作品(PID:{work_id})已完成下载[/green]")
+                self.work_status[work_id] = "complete"
            return True
            
        try:
@ -118,6 +135,7 @@ class PixivDownloader:
            if not images:
                return False
                
+            try:
                # 下载每张图片
                if len(images) > 1:
                    # 多图作品
@ -127,13 +145,17 @@ class PixivDownloader:
                    )
                    
                    success = True
-                for image in images:
+                    for idx, image in enumerate(images):
                        if 'urls' not in image or 'original' not in image['urls']:
+                            self.spider._update_log(f"[red]图片{idx + 1}URL获取失败(PID:{work_id})[/red]")
                            success = False
                            continue
                            
-                    if not self.download_image(image['urls']['original']):
+                        if not self.download_image(image['urls']['original'], work_id):
+                            self.spider._update_log(f"[red]图片{idx + 1}下载失败(PID:{work_id})[/red]")
                            success = False
+                        else:
+                            self.spider._update_log(f"[green]图片{idx + 1}/{len(images)}下载完成(PID:{work_id})[/green]")
                            
                        self.progress.update(subtask_id, advance=1)
                        
@ -143,8 +165,14 @@ class PixivDownloader:
                else:
                    # 单图作品
                    if 'urls' not in images[0] or 'original' not in images[0]['urls']:
+                        self.spider._update_log(f"[red]URL获取失败(PID:{work_id})[/red]")
                        return False
-                return self.download_image(images[0]['urls']['original'])
+                    return self.download_image(images[0]['urls']['original'], work_id)
                    
-        except (requests.RequestException, KeyError, ValueError):
+            except Exception as e:
+                self.spider._update_log(f"[red]作品处理出错(PID:{work_id}): {str(e)}[/red]")
+                return False
+                
+        except (requests.RequestException, KeyError, ValueError) as e:
+            self.spider._update_log(f"[red]作品信息获取失败(PID:{work_id}): {str(e)}[/red]")
            return False
--- a/pixiv_spider.py
+++ b/pixiv_spider.py
@ -4,6 +4,7 @@ Pixiv爬虫 - 每日排行榜下载
 """
 from typing import Generator, List, Dict, Any
 import requests
+import time
 from rich.console import Console
 from rich.progress import (
    Progress,
@ -54,51 +55,77 @@ class PixivSpider:
        # 创建布局
        self.layout = Layout()
        self.layout.split(
-            Layout(name="PixivSpider", ratio=8),
-            Layout(name="progress", ratio=2)
+            Layout(name="header", size=3),
+            Layout(name="main", size=None),
+            Layout(name="progress", size=3)
        )
        
-        # 创建进度条
+        # 设置标题
+        self.layout["header"].update(
+            Panel("PixivSpider", style="bold magenta", border_style="bright_blue")
+        )
+        
+        # 创建进度条 - 固定在底部
        self.progress = Progress(
+            SpinnerColumn(),
            TextColumn("[bold blue]{task.description}"),
-            BarColumn(bar_width=40),
-            TaskProgressColumn(),
-            TextColumn("{task.fields[speed]}"),
-            console=Console(stderr=True),
-            expand=True
+            BarColumn(),
+            TextColumn("{task.percentage:>3.0f}%"),
+            TextColumn("[bold green]{task.fields[speed]}"),
+            console=self.console,
+            expand=True,
+            transient=False  # 保持进度条显示
        )
        
-        # 设置日志面板
+        # 设置日志面板 - 滚动显示在进度条上方
        self.log_messages = []
+        self.layout["main"].update(
+            Panel(
+                Group(*self.log_messages),
+                title="下载状态",
+                border_style="green"
+            )
+        )
+        
+        # 设置进度条任务
        self.main_task_id = self.progress.add_task(
-            "[cyan]总体进度",
+            "总体进度",
            total=self.TOTAL_IMAGES,
-            speed=""
+            speed="0.00 t / 秒"
        )
        
    def _update_log(self, message: str) -> None:
        """更新日志显示"""
+        # 检查是否为重复消息
+        if not self.log_messages or message != self.log_messages[-1]:
            self.log_messages.append(message)
-        if len(self.log_messages) > 18:
-            self.log_messages.pop(0)
-        log_group = Group(*self.log_messages)
-        self.layout["PixivSpider"].update(
+            if len(self.log_messages) > 100:
+                self.log_messages = self.log_messages[-100:]
+            
+            try:
+                # 构建消息显示，确保顺序正确
+                messages = self.log_messages[-30:]  # 只显示最新的30条
+                
+                # 更新日志面板
+                self.layout["main"].update(
                    Panel(
-                log_group,
-                title="PixivSpider",
-                title_align="left",
-                border_style="cyan",
+                        Group(*messages),
+                        title="下载状态",
+                        subtitle=f"显示最新 {len(messages)}/{len(self.log_messages)} 条消息",
+                        border_style="green",
                        padding=(0, 1)
                    )
                )
+            except Exception as e:
+                # 防止界面更新错误影响主程序
+                print(f"界面更新出错: {e}")
        
    def _setup_session(self) -> None:
        """设置请求会话"""
        cookie = self.redis.get_cookie()
        if not cookie:
-            cookie = input('请输入一个cookie：')
+            cookie = input('请输入Pixiv Cookie ：')
            self.redis.set_cookie(cookie)
-            
        self.headers = PIXIV_CONFIG.headers.copy()
        self.headers['cookie'] = cookie
        
@ -141,20 +168,39 @@ class PixivSpider:
    def run(self) -> None:
        """运行爬虫"""
        self._setup_session()
-        downloader = PixivDownloader(self.headers, self.progress)
+        downloader = PixivDownloader(self, self.headers, self.progress)
        
-        with Live(self.layout, self.console, refresh_per_second=10):
+        with Live(self.layout, refresh_per_second=20, auto_refresh=True, console=self.console):
            self.layout["progress"].update(self.progress)
            self._update_log('[cyan]开始抓取...[/cyan]')
            
            # 处理排行榜页面
+            # 保存开始时间用于计算速度
+            start_time = time.time()
+            completed_works = 0
+            
            for page in range(1, 11):
                try:
                    self.get_ranking_page(page)
                    for work_id in self.process_ranking_data():
                        if not downloader.download_work(work_id):
                            self.failed_works.append(work_id)
-                        self.progress.update(self.main_task_id, advance=1)
+                        else:
+                            completed_works += 1
+                            # 计算实际速度（作品/秒）
+                            elapsed_time = max(1, int(time.time() - start_time))
+                            speed = completed_works / elapsed_time
+                            
+                            # 更新进度和速度
+                            self.progress.update(
+                                self.main_task_id,
+                                completed=completed_works,  # 使用绝对值而不是增量
+                                refresh=True,  # 强制刷新显示
+                            )
+                            # 单独设置速度字段
+                            self.progress.tasks[self.main_task_id].fields["speed"] = f"{speed:.2f} t / 秒"
+                            # 更新UI显示
+                            self.layout["progress"].update(self.progress)
                        
                except requests.RequestException as e:
                    self._update_log(f'[red]获取排行榜第{page}页时发生错误：{str(e)}[/red]')