257 lines
12 KiB
Python
257 lines
12 KiB
Python
from abc import ABC, abstractmethod
|
||
from typing import List, Dict, Optional, AsyncGenerator
|
||
from pathlib import Path
|
||
import aiohttp,os,shutil
|
||
import asyncio
|
||
import logging
|
||
from src.config import DEFAULT_HEADERS, TIMEOUT, RETRIES, PROXY_URL, RETRY_PROXY
|
||
from lxml import etree
|
||
from src.common.utils import Cache, CBZUtils # 导入缓存类
|
||
from src.common.item import Chapter, MangaItem, MangaInfo,CoverItem
|
||
from src.common.exceptions import SiteError, NetworkError, ParseError
|
||
from src.common.logging import setup_logging
|
||
from src.common.naming import DirectoryNaming,FileNaming
|
||
from src.common.ComicInfo import ComicInfo, ImageInfo, ComicInfoXml
|
||
|
||
logger = setup_logging(__name__)
|
||
|
||
class BaseSite(ABC):
|
||
"""漫画网站基类"""
|
||
def __init__(self):
|
||
self.session: Optional[aiohttp.ClientSession] = None
|
||
self.headers = DEFAULT_HEADERS.copy()
|
||
self.cache = Cache() # 初始化缓存
|
||
|
||
async def __aenter__(self):
|
||
self.session = aiohttp.ClientSession(headers=self.headers)
|
||
return self
|
||
|
||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||
if self.session:
|
||
await self.session.close()
|
||
|
||
async def _get(self, url: str, retries: int = RETRIES, PROXY: bool = RETRY_PROXY) -> str:
|
||
"""发送GET请求并处理错误"""
|
||
# 尝试从缓存中获取 HTML 内容
|
||
cached_html = self.cache.get(url)
|
||
if cached_html:
|
||
logger.debug(f"从缓存中获取 HTML 内容: {url}")
|
||
return cached_html
|
||
|
||
for attempt in range(retries):
|
||
try:
|
||
if PROXY:
|
||
proxy = PROXY_URL
|
||
else:
|
||
proxy = None
|
||
async with self.session.get(str(url), proxy=proxy) as response:
|
||
if response.status == 200:
|
||
html = await response.text()
|
||
self.cache.set(url, html) # 将 HTML 内容保存到缓存
|
||
return html
|
||
elif response.status == 404:
|
||
raise SiteError(f"页面不存在: {url}")
|
||
elif response.status == 403:
|
||
raise SiteError(f"访问被拒绝: {url}")
|
||
else:
|
||
raise NetworkError(f"HTTP错误 {response.status}: {url}")
|
||
except aiohttp.ClientError as e:
|
||
if attempt == retries - 1:
|
||
raise NetworkError(f"网络错误: {str(e)}")
|
||
logger.info(f"第 {attempt + 2} 次重试, 网站: {url}")
|
||
await asyncio.sleep(2 * (attempt + 1))
|
||
|
||
@abstractmethod
|
||
async def get_chapter_images(self, chapter_url: str) -> List[str]:
|
||
"""获取章节所有图片URL"""
|
||
pass
|
||
|
||
#@abstractmethod
|
||
async def get_manga_info(self, manga_url: str) -> Dict[str, str]:
|
||
"""获取漫画信息"""
|
||
try:
|
||
html = await self._get(manga_url)
|
||
tree = etree.HTML(html)
|
||
return self.extractor.extract_manga_info(tree)
|
||
except Exception as e:
|
||
if isinstance(e, (ParseError, SiteError)):
|
||
raise exit(f"解析漫画信息失败: {str(e)}")
|
||
raise ParseError(f"解析漫画信息失败: {str(e)}")
|
||
|
||
#@abstractmethod
|
||
#async def get_chapter_list(self, info: MangaInfo) -> List[Dict[str, str]]:
|
||
# """获取漫画章节列表"""
|
||
# pass
|
||
|
||
async def get_chapter_list(self, manga_info: MangaInfo) -> List[Dict[str, str]]:
|
||
"""获取章节列表"""
|
||
try:
|
||
list_chapter = manga_info.get_list_chapter()
|
||
down_chapter = []
|
||
downloaded_chapter = []
|
||
number = 0
|
||
for chapter in list_chapter:
|
||
number += 1
|
||
cbz_path = FileNaming.chapter_cbz(manga_info=manga_info,chapter=chapter)
|
||
if os.path.exists(cbz_path):
|
||
# 判断是否是最新章节
|
||
if manga_info.is_chapter_ended(chapter.title):
|
||
# 如果是最新章节且漫画已完结,则不再下
|
||
# if ci.Count != "" and int(ci.Count) != 0 and int(ci.Count) != int(number):
|
||
# count = number
|
||
xml_data = ComicInfoXml().update_comicinfo_count_or_number(cbz_path=cbz_path, number=number, count=number)
|
||
# 更新ComicInfo.xml至CBZ文件中
|
||
if xml_data:
|
||
CBZUtils().update_cbz_with_new_xml(cbz_path, xml_data)
|
||
# 更新完成后删除临时生成的ComicInfo.xml
|
||
logger.info(f"更新 {cbz_path} 的 ComicInfo.xml Number完成")
|
||
# 临时添加end
|
||
logger.debug(f"{chapter.title} 章节已存在")
|
||
chapter.status = "downloaded"
|
||
downloaded_chapter.append({ "name" : chapter.title, "status": chapter.status, "path" : cbz_path })
|
||
down_chapter.append(chapter)
|
||
if manga_info.status == "已完结":
|
||
from src.common.utils import KomgaAPI, MangaUtils
|
||
is_update_komga_ended = MangaUtils("mangas_ended.json").search_manga(name= manga_info.title)
|
||
if is_update_komga_ended != None:
|
||
logger.info(f"{manga_info.title} 漫画已完结, 已存在于KOMGA服务器已完结列表中,无需更新!")
|
||
else:
|
||
if len(downloaded_chapter) == len(list_chapter):
|
||
logger.info(f"{manga_info.title} 漫画已完结, 章节已全部下载完成")
|
||
KomgaAPI().update_series_ended(manga_info.title)
|
||
logger.info(f"{manga_info.title} 漫画已完结, 已更新KOMGA状态")
|
||
MangaUtils("mangas_ended.json").add_manga(name= manga_info.title)
|
||
else:
|
||
logger.info(f"{manga_info.title} 漫画已完结, 但章节未全部下载完成, 可能是网络问题或其他原因")
|
||
return down_chapter
|
||
except Exception as e:
|
||
if isinstance(e, (ParseError, SiteError)):
|
||
raise
|
||
raise ParseError(f"解析章节列表失败: {str(e)}")
|
||
|
||
async def update_covers(self, manga_info : MangaInfo) -> AsyncGenerator[Dict, None]:
|
||
"""更新Icons文件夹内Cover逻辑"""
|
||
cache_cover = { 'path' : str(DirectoryNaming.manga_cover_dir(manga_info, cache=True)) }
|
||
cover_img = { 'path' : str(DirectoryNaming.manga_cover_dir(manga_info, cache=False)) }
|
||
cache_cover_item = CoverItem(**cache_cover)
|
||
icons_dir = os.path.dirname(cover_img['path'])
|
||
if not os.path.exists(icons_dir): os.makedirs(icons_dir)
|
||
list_cover = []
|
||
is_update = 0
|
||
try:
|
||
for file in os.listdir(icons_dir):
|
||
if file.lower().endswith(".jpg"):
|
||
file_cover = {'path' : os.path.join(icons_dir, file)}
|
||
f_item = CoverItem(**file_cover)
|
||
list_cover.append(f_item)
|
||
if f_item.md5 == cache_cover_item.md5: is_update += 1
|
||
if is_update == 0:
|
||
new_cover = { 'path' : FileNaming.cover_format_path(cover_img["path"]) }
|
||
shutil.copy(cache_cover["path"], new_cover["path"])
|
||
list_cover.append(CoverItem(**new_cover))
|
||
except Exception:
|
||
raise exit("Cover 检测异常")
|
||
return list_cover
|
||
|
||
async def update_cbz_covers(self, manga_info : MangaInfo):
|
||
"""更新CBZ漫画的Cover"""
|
||
cbz_dir = DirectoryNaming().chapter_cbz_dir(manga_info=manga_info)
|
||
list_cbz = list(FileNaming().get_filenames_optimized(cbz_dir, ext_filter=[".CBZ"]))
|
||
|
||
list_cover = await self.update_covers(manga_info)
|
||
|
||
# 用来保存所有CBZ的Cover.jpg
|
||
list_file_img = []
|
||
|
||
for cbz_path in list_cbz:
|
||
first_cover_path = str(cbz_path).split(".")[0]+".jpg"
|
||
if len(list_cover) == 1:
|
||
if FileNaming().file_update_by_date(first_cover_path, day=30) or not os.path.exists(first_cover_path):
|
||
shutil.copy(list_cover[0].path, first_cover_path)
|
||
list_file_img.append(first_cover_path)
|
||
logger.info(f"{list_cover[0].path} ==> {first_cover_path} 已复制")
|
||
else:
|
||
list_file_img.append(first_cover_path)
|
||
continue
|
||
cover_count = 1
|
||
for cover in list_cover:
|
||
cover_path = cover.path
|
||
if os.path.exists(first_cover_path): os.remove(first_cover_path)
|
||
new_cover_path = FileNaming().cover_format_path(str(cbz_path).split(".")[0]+".jpg", count=cover_count)
|
||
if FileNaming().file_update_by_date(new_cover_path, day=30) or not os.path.exists(new_cover_path):
|
||
shutil.copy(cover_path, new_cover_path)
|
||
list_file_img.append(new_cover_path)
|
||
logger.info(f"{cover_path} ==> {new_cover_path} 已复制")
|
||
else:
|
||
list_file_img.append(new_cover_path)
|
||
cover_count += 1
|
||
|
||
list_cbz_and_img = list(FileNaming().get_filenames_optimized(cbz_dir, ext_filter=[".jpg"]))
|
||
clear_imgs = DirectoryNaming.get_unique_ordered(list_cbz_and_img, list_file_img)
|
||
# 清理多余Img
|
||
if len(clear_imgs) > 0:
|
||
try:
|
||
for img in clear_imgs: os.remove(img)
|
||
except Exception:
|
||
logger.error(f"{clear_imgs} 删除失败")
|
||
|
||
async def download_manga(self, manga_url: str) -> AsyncGenerator[Dict, None]:
|
||
"""下载整部漫画"""
|
||
try:
|
||
# 获取漫画信息
|
||
info = await self.get_manga_info(manga_url)
|
||
yield {'type': 'info', 'data': info, 'item': info}
|
||
|
||
# 获取章节列表
|
||
chapters = await self.get_chapter_list(info)
|
||
yield {'type': 'chapters', 'data': chapters, 'item': info}
|
||
|
||
# 下载封面
|
||
yield {'type': 'cover', 'item': info}
|
||
covers = await self.update_covers(info)
|
||
|
||
# 下载每个章节
|
||
for chapter in chapters:
|
||
try:
|
||
if chapter.status == "downloaded":
|
||
logger.debug(f"{chapter.title} 章节已下载")
|
||
continue
|
||
images = await self.get_chapter_images(chapter.url)
|
||
manga_item = MangaItem(
|
||
info=info,
|
||
covers=covers,
|
||
chapter=chapter,
|
||
chapter_images=images,
|
||
chapters=chapters
|
||
).get_item()
|
||
|
||
yield {
|
||
'type': 'chapter',
|
||
'chapter': str(chapter.title),
|
||
'images': images,
|
||
'item': manga_item
|
||
}
|
||
except Exception as e:
|
||
yield {
|
||
'type': 'error',
|
||
'chapter': chapter,
|
||
'error': str(e)
|
||
}
|
||
continue
|
||
|
||
# 所有章节全部下载完后执行
|
||
await self.update_cbz_covers(info)
|
||
|
||
except Exception as e:
|
||
yield {'type': 'error', 'error': str(e)}
|
||
|
||
async def get_manga_list(self, manga_url: str) -> List[Dict[str, str]]:
|
||
"""获取漫画列表"""
|
||
try:
|
||
html = await self._get(manga_url)
|
||
tree = etree.HTML(html)
|
||
return self.extractor.extract_manga_list(tree)
|
||
except Exception as e:
|
||
if isinstance(e, (ParseError, SiteError)):
|
||
raise exit(f"解析漫画信息失败: {str(e)}")
|
||
raise ParseError(f"解析漫画信息失败: {str(e)}") |