fix or get bug

fix ui
new ui
2024-12-22 18:27:15 +08:00 · 2024-12-22 17:45:19 +08:00 · 2024-12-22 05:34:21 +08:00 · 2024-12-20 19:50:47 +08:00
3 changed files with 144 additions and 71 deletions
--- a/pixiv_download.py
+++ b/pixiv_download.py
@ -4,6 +4,7 @@ import re
 from typing import Optional, Union
 import requests
 from rich.progress import Progress
 from rich.console import Console
 from config import PIXIV_CONFIG
 from redis_client import RedisClient
@ -11,19 +12,23 @@ from redis_client import RedisClient
 class PixivDownloader:
    """处理Pixiv图片下载"""
-    def __init__(self, headers: dict, progress: Progress):
+    def __init__(self, spider, headers: dict, progress: Progress):
        """
        初始化下载器
        参数:
            spider: PixivSpider实例，用于日志更新
            headers: 带cookie的请求头
            progress: Rich进度条实例
        """
        self.spider = spider
        self.headers = headers
        self.progress = progress
        self.redis = RedisClient()
        # 用于追踪下载状态
        self.work_status = {}  # 记录每个作品的下载状态
-    def download_image(self, url: str) -> bool:
+    def download_image(self, url: str, work_id: str = None) -> bool:
        """
        下载单张图片
@ -41,8 +46,13 @@ class PixivDownloader:
        illust_id, page_num, extension = match.groups()
        file_name = f"{illust_id}_p{page_num}.{extension}"
-        # 检查是否已下载
+        # 检查文件是否已存在
-        if self.redis.is_image_downloaded(illust_id, page_num):
+        file_path = f'./img/{file_name}'
        if os.path.exists(file_path):
            self.spider._update_log(f"[green]{file_name} 已存在！[/green]")
            # 确保Redis状态同步
            if not self.redis.is_image_downloaded(illust_id, page_num):
                self.redis.mark_image_downloaded(illust_id, page_num)
            return True
        # 确保下载目录存在
@ -63,8 +73,9 @@ class PixivDownloader:
                    with open(f'./img/{file_name}', 'wb') as fp:
                        fp.write(response.content)
-                    # 更新Redis记录
+                    # 更新Redis记录并显示下载成功信息
                    self.redis.mark_image_downloaded(illust_id, page_num)
                    self.spider._update_log(f"[bold white]{file_name} 已下载！[/bold white]")
                    # 更新总页数
                    total_pages = self.redis.get_total_pages(illust_id)
@ -81,9 +92,12 @@ class PixivDownloader:
                    return True
-            except requests.RequestException:
+            except requests.RequestException as e:
                if attempt == 2:  # 最后一次尝试失败
                    error_msg = f"[red]下载失败(PID:{work_id}): {str(e)}[/red]"
                    self.spider._update_log(error_msg)
                    return False
                self.spider._update_log(f"[yellow]重试下载(PID:{work_id}): 第{attempt + 1}次[/yellow]")
                continue
        return False
@ -100,6 +114,9 @@ class PixivDownloader:
        """
        # 跳过已完成的作品
        if self.redis.is_work_complete(work_id):
            if work_id not in self.work_status:
                self.spider._update_log(f"[green]作品(PID:{work_id})已完成下载[/green]")
                self.work_status[work_id] = "complete"
            return True
        try:
@ -118,6 +135,7 @@ class PixivDownloader:
            if not images:
                return False
            try:
                # 下载每张图片
                if len(images) > 1:
                    # 多图作品
@ -127,13 +145,17 @@ class PixivDownloader:
                    )
                    success = True
-                for image in images:
+                    for idx, image in enumerate(images):
                        if 'urls' not in image or 'original' not in image['urls']:
                            self.spider._update_log(f"[red]图片{idx + 1}URL获取失败(PID:{work_id})[/red]")
                            success = False
                            continue
-                    if not self.download_image(image['urls']['original']):
+                        if not self.download_image(image['urls']['original'], work_id):
                            self.spider._update_log(f"[red]图片{idx + 1}下载失败(PID:{work_id})[/red]")
                            success = False
                        else:
                            self.spider._update_log(f"[green]图片{idx + 1}/{len(images)}下载完成(PID:{work_id})[/green]")
                        self.progress.update(subtask_id, advance=1)
@ -143,8 +165,14 @@ class PixivDownloader:
                else:
                    # 单图作品
                    if 'urls' not in images[0] or 'original' not in images[0]['urls']:
                        self.spider._update_log(f"[red]URL获取失败(PID:{work_id})[/red]")
                        return False
-                return self.download_image(images[0]['urls']['original'])
+                    return self.download_image(images[0]['urls']['original'], work_id)
-        except (requests.RequestException, KeyError, ValueError):
+            except Exception as e:
                self.spider._update_log(f"[red]作品处理出错(PID:{work_id}): {str(e)}[/red]")
                return False
        except (requests.RequestException, KeyError, ValueError) as e:
            self.spider._update_log(f"[red]作品信息获取失败(PID:{work_id}): {str(e)}[/red]")
            return False
--- a/pixiv_spider.py
+++ b/pixiv_spider.py
@ -4,6 +4,7 @@ Pixiv爬虫 - 每日排行榜下载
 """
 from typing import Generator, List, Dict, Any
 import requests
 import time
 from rich.console import Console
 from rich.progress import (
    Progress,
@ -48,57 +49,74 @@ class PixivSpider:
        self.headers = None
        self.current_ranking_data = []
        self.failed_works = []
        self.log_messages = []
    def _setup_ui(self) -> None:
        """设置Rich界面组件"""
        # 创建布局
        self.layout = Layout()
        self.layout.split(
-            Layout(name="PixivSpider", ratio=8),
+            Layout(name="header", size=3),
-            Layout(name="progress", ratio=2)
+            Layout(name="main", size=None),
            Layout(name="progress", size=3)
        )
-        # 创建进度条
+        # 设置标题
        self.layout["header"].update(
            Panel("PixivSpider", style="bold magenta", border_style="bright_blue")
        )
        # 创建进度条 - 固定在底部
        self.progress = Progress(
            SpinnerColumn(),
            TextColumn("[bold blue]{task.description}"),
-            BarColumn(bar_width=40),
+            BarColumn(),
-            TaskProgressColumn(),
+            TextColumn("{task.percentage:>3.0f}%"),
-            TextColumn("{task.fields[speed]}"),
+            console=self.console,
-            console=Console(stderr=True),
+            expand=True,
            transient=False
        )
        # 设置进度条任务
        self.main_task_id = self.progress.add_task(
            "总体进度",
            total=self.TOTAL_IMAGES
        )
    def _update_log(self, message: str, speed: float = 0.0) -> None:
        """更新日志信息"""
        if not self.log_messages or message != self.log_messages[-1]:
            self.log_messages.insert(0, message)
            if len(self.log_messages) > 100:
                self.log_messages = self.log_messages[:100]
        messages = self.log_messages[:10]
        # 清空控制台
        self.console.clear()
        # 重新渲染布局
        self.console.print(self.layout)
        # 更新日志面板
        log_content = "\n".join(messages)
        log_panel = Panel(
            log_content,
            title="下载状态",
            subtitle=f"显示最新 {len(messages)}/{len(self.log_messages)} 条消息, 速度: {speed:.2f} t/s",
            border_style="green",
            padding=(1, 2),
            expand=True
        )
        self.layout["main"].update(log_panel)
        # 设置日志面板
        self.log_messages = []
        self.main_task_id = self.progress.add_task(
            "[cyan]总体进度",
            total=self.TOTAL_IMAGES,
            speed=""
        )
    def _update_log(self, message: str) -> None:
        """更新日志显示"""
        self.log_messages.append(message)
        if len(self.log_messages) > 18:
            self.log_messages.pop(0)
        log_group = Group(*self.log_messages)
        self.layout["PixivSpider"].update(
            Panel(
                log_group,
                title="PixivSpider",
                title_align="left",
                border_style="cyan",
                padding=(0, 1)
            )
        )
    def _setup_session(self) -> None:
        """设置请求会话"""
        cookie = self.redis.get_cookie()
        if not cookie:
-            cookie = input('请输入一个cookie：')
+            cookie = input('请输入Pixiv Cookie ：')
            self.redis.set_cookie(cookie)
        self.headers = PIXIV_CONFIG.headers.copy()
        self.headers['cookie'] = cookie
@ -141,20 +159,47 @@ class PixivSpider:
    def run(self) -> None:
        """运行爬虫"""
        self._setup_session()
-        downloader = PixivDownloader(self.headers, self.progress)
+        downloader = PixivDownloader(self, self.headers, self.progress)
-        with Live(self.layout, self.console, refresh_per_second=10):
+        with Live(self.layout, refresh_per_second=20, auto_refresh=True, console=self.console):
            self.layout["progress"].update(self.progress)
            self._update_log('[cyan]开始抓取...[/cyan]')
            # 处理排行榜页面
            # 保存开始时间用于计算速度
            start_time = time.time()
            last_update_time = start_time
            completed_works = 0
            for page in range(1, 11):
                try:
                    self.get_ranking_page(page)
                    for work_id in self.process_ranking_data():
                        if not downloader.download_work(work_id):
                            self.failed_works.append(work_id)
-                        self.progress.update(self.main_task_id, advance=1)
+                        else:
                            # 计算实际速度（作品/秒）
                            current_time = time.time()
                            elapsed_time = current_time - start_time
                            # 每秒更新一次速度
                            # 计算实际速度（作品/秒）
                            current_time = time.time()
                            elapsed_time = current_time - start_time
                            completed_works += 1
                            # 每次下载图片后更新速度
                            if elapsed_time > 0:    # 避免除以零错误
                                speed = completed_works / elapsed_time
                                self._update_log(f"[cyan]已爬取[/cyan] {completed_works} [cyan]个页面！[/cyan]")
                            # 更新进度
                            self.progress.update(
                                self.main_task_id,
                                completed=completed_works,
                            )
                            # 更新UI显示
                            self.layout["progress"].update(self.progress)
                except requests.RequestException as e:
                    self._update_log(f'[red]获取排行榜第{page}页时发生错误：{str(e)}[/red]')
@ -162,6 +207,6 @@ class PixivSpider:
            # 清理失败作品的记录
            for work_id in self.failed_works:
-                self.redis.client.delete(work_id)
+                self.redis.client().delete(work_id)
            self._update_log('[green]爬虫运行完成[/green]')
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,4 @@
 redis==5.2.1
 requests==2.32.3
-rich==13.7.1
+rich==13.9.4
 urllib3<2.0.0  # 确保与requests兼容
Author	SHA1	Message	Date
岛风	df1f74a6bf	fix or get bug	2024-12-22 18:27:15 +08:00
岛风	a604050d6c	fix ui	2024-12-22 17:45:19 +08:00
岛风	a6bf93ebbc	new ui	2024-12-22 05:34:21 +08:00
nyaasuki	99a9de90aa	update	2024-12-20 19:50:47 +08:00