151 lines
4.8 KiB
Python
151 lines
4.8 KiB
Python
"""Pixiv下载组件"""
|
||
import os
|
||
import re
|
||
from typing import Optional, Union
|
||
import requests
|
||
from rich.progress import Progress
|
||
|
||
from config import PIXIV_CONFIG
|
||
from redis_client import RedisClient
|
||
|
||
class PixivDownloader:
|
||
"""处理Pixiv图片下载"""
|
||
|
||
def __init__(self, headers: dict, progress: Progress):
|
||
"""
|
||
初始化下载器
|
||
|
||
参数:
|
||
headers: 带cookie的请求头
|
||
progress: Rich进度条实例
|
||
"""
|
||
self.headers = headers
|
||
self.progress = progress
|
||
self.redis = RedisClient()
|
||
|
||
def download_image(self, url: str) -> bool:
|
||
"""
|
||
下载单张图片
|
||
|
||
参数:
|
||
url: 图片URL
|
||
|
||
返回:
|
||
bool: 成功返回True,失败返回False
|
||
"""
|
||
# 从URL提取图片信息
|
||
match = re.search(r'/(\d+)_p(\d+)\.([a-z]+)$', url)
|
||
if not match:
|
||
return False
|
||
|
||
illust_id, page_num, extension = match.groups()
|
||
file_name = f"{illust_id}_p{page_num}.{extension}"
|
||
|
||
# 检查是否已下载
|
||
if self.redis.is_image_downloaded(illust_id, page_num):
|
||
return True
|
||
|
||
# 确保下载目录存在
|
||
if not os.path.isdir('./img'):
|
||
os.makedirs('./img')
|
||
|
||
# 下载重试机制
|
||
for attempt in range(3):
|
||
try:
|
||
response = requests.get(
|
||
url,
|
||
headers=self.headers,
|
||
timeout=15,
|
||
verify=False
|
||
)
|
||
if response.status_code == 200:
|
||
# 保存图片
|
||
with open(f'./img/{file_name}', 'wb') as fp:
|
||
fp.write(response.content)
|
||
|
||
# 更新Redis记录
|
||
self.redis.mark_image_downloaded(illust_id, page_num)
|
||
|
||
# 更新总页数
|
||
total_pages = self.redis.get_total_pages(illust_id)
|
||
if not total_pages:
|
||
self.redis.set_total_pages(illust_id, int(page_num) + 1)
|
||
elif int(page_num) + 1 == total_pages:
|
||
# 检查作品是否完成
|
||
all_downloaded = all(
|
||
self.redis.is_image_downloaded(illust_id, i)
|
||
for i in range(total_pages)
|
||
)
|
||
if all_downloaded:
|
||
self.redis.mark_work_complete(illust_id)
|
||
|
||
return True
|
||
|
||
except requests.RequestException:
|
||
if attempt == 2: # 最后一次尝试失败
|
||
return False
|
||
continue
|
||
|
||
return False
|
||
|
||
def download_work(self, work_id: str) -> bool:
|
||
"""
|
||
下载作品的所有图片
|
||
|
||
参数:
|
||
work_id: Pixiv作品ID
|
||
|
||
返回:
|
||
bool: 全部成功返回True,否则False
|
||
"""
|
||
# 跳过已完成的作品
|
||
if self.redis.is_work_complete(work_id):
|
||
return True
|
||
|
||
try:
|
||
# 获取图片URL列表
|
||
response = requests.get(
|
||
PIXIV_CONFIG.ajax_url.format(work_id),
|
||
headers=self.headers,
|
||
verify=False
|
||
)
|
||
data = response.json()
|
||
|
||
if data.get('error'):
|
||
return False
|
||
|
||
images = data.get('body', [])
|
||
if not images:
|
||
return False
|
||
|
||
# 下载每张图片
|
||
if len(images) > 1:
|
||
# 多图作品
|
||
subtask_id = self.progress.add_task(
|
||
f"[yellow]PID:{work_id}",
|
||
total=len(images)
|
||
)
|
||
|
||
success = True
|
||
for image in images:
|
||
if 'urls' not in image or 'original' not in image['urls']:
|
||
success = False
|
||
continue
|
||
|
||
if not self.download_image(image['urls']['original']):
|
||
success = False
|
||
|
||
self.progress.update(subtask_id, advance=1)
|
||
|
||
self.progress.remove_task(subtask_id)
|
||
return success
|
||
|
||
else:
|
||
# 单图作品
|
||
if 'urls' not in images[0] or 'original' not in images[0]['urls']:
|
||
return False
|
||
return self.download_image(images[0]['urls']['original'])
|
||
|
||
except (requests.RequestException, KeyError, ValueError):
|
||
return False
|