NewComicDownloader/src/sites/base.py

from abc import ABC, abstractmethod
from typing import List, Dict, Optional, AsyncGenerator
from pathlib import Path
import aiohttp,os,shutil
import asyncio
import logging
from src.config import DEFAULT_HEADERS, TIMEOUT, RETRIES, PROXY_URL, RETRY_PROXY
from lxml import etree
from src.common.utils import Cache, CBZUtils  # 导入缓存类
from src.common.item import Chapter, MangaItem, MangaInfo,CoverItem
from src.common.exceptions import SiteError, NetworkError, ParseError
from src.common.logging import setup_logging
from src.common.naming import DirectoryNaming,FileNaming
from src.common.ComicInfo import ComicInfo, ImageInfo, ComicInfoXml

logger = setup_logging(__name__)

class BaseSite(ABC):
    """漫画网站基类"""
    def __init__(self):
        self.session: Optional[aiohttp.ClientSession] = None
        self.headers = DEFAULT_HEADERS.copy()
        self.cache = Cache()  # 初始化缓存

    async def __aenter__(self):
        self.session = aiohttp.ClientSession(headers=self.headers)
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        if self.session:
            await self.session.close()

    async def _get(self, url: str, retries: int = RETRIES, PROXY: bool = RETRY_PROXY) -> str:
        """发送GET请求并处理错误"""
        # 尝试从缓存中获取 HTML 内容
        cached_html = self.cache.get(url)
        if cached_html:
            logger.debug(f"从缓存中获取 HTML 内容: {url}")
            return cached_html

        for attempt in range(retries):
            try:
                if PROXY:
                    proxy = PROXY_URL
                else:
                    proxy = None
                async with self.session.get(str(url), proxy=proxy) as response:
                    if response.status == 200:
                        html = await response.text()
                        self.cache.set(url, html)  # 将 HTML 内容保存到缓存
                        return html
                    elif response.status == 404:
                        raise SiteError(f"页面不存在: {url}")
                    elif response.status == 403:
                        raise SiteError(f"访问被拒绝: {url}")
                    else:
                        raise NetworkError(f"HTTP错误 {response.status}: {url}")
            except aiohttp.ClientError as e:
                if attempt == retries - 1:
                    raise NetworkError(f"网络错误: {str(e)}")
                logger.info(f"第 {attempt + 2} 次重试, 网站: {url}")
                await asyncio.sleep(2 * (attempt + 1))

    @abstractmethod
    async def get_chapter_images(self, chapter_url: str) -> List[str]:
        """获取章节所有图片URL"""
        pass

    #@abstractmethod
    async def get_manga_info(self, manga_url: str) -> Dict[str, str]:
        """获取漫画信息"""
        try:
            html = await self._get(manga_url)
            tree = etree.HTML(html)
            return self.extractor.extract_manga_info(tree)
        except Exception as e:
            if isinstance(e, (ParseError, SiteError)):
                raise exit(f"解析漫画信息失败: {str(e)}")
            raise ParseError(f"解析漫画信息失败: {str(e)}")

    #@abstractmethod
    #async def get_chapter_list(self, info: MangaInfo) -> List[Dict[str, str]]:
    #    """获取漫画章节列表"""
    #    pass

    async def get_chapter_list(self, manga_info: MangaInfo) -> List[Dict[str, str]]:
        """获取章节列表"""
        try:
            list_chapter = manga_info.get_list_chapter()
            down_chapter = []
            downloaded_chapter = []
            number = 0
            for chapter in list_chapter:
                number += 1
                cbz_path = FileNaming.chapter_cbz(manga_info=manga_info,chapter=chapter)
                if os.path.exists(cbz_path):
                    # 判断是否是最新章节
                    if manga_info.is_chapter_ended(chapter.title):
                        # 如果是最新章节且漫画已完结，则不再下
                        # if ci.Count != "" and int(ci.Count) != 0 and int(ci.Count) != int(number):
                        #    count = number
                        xml_data = ComicInfoXml().update_comicinfo_count_or_number(cbz_path=cbz_path, number=number, count=number)
                        # 更新ComicInfo.xml至CBZ文件中
                        if xml_data:
                            CBZUtils().update_cbz_with_new_xml(cbz_path, xml_data)
                            # 更新完成后删除临时生成的ComicInfo.xml
                            logger.info(f"更新 {cbz_path} 的 ComicInfo.xml Number完成")
                    # 临时添加end
                    logger.debug(f"{chapter.title} 章节已存在")
                    chapter.status = "downloaded"
                    downloaded_chapter.append({ "name" : chapter.title, "status": chapter.status, "path" : cbz_path })
                down_chapter.append(chapter)
            if manga_info.status == "已完结":
                from src.common.utils import KomgaAPI, MangaUtils
                is_update_komga_ended = MangaUtils("mangas_ended.json").search_manga(name= manga_info.title)
                if is_update_komga_ended != None:
                    logger.info(f"{manga_info.title} 漫画已完结, 已存在于KOMGA服务器已完结列表中，无需更新！")
                else:
                    if len(downloaded_chapter) == len(list_chapter):
                        logger.info(f"{manga_info.title} 漫画已完结, 章节已全部下载完成")
                        KomgaAPI().update_series_ended(manga_info.title)
                        logger.info(f"{manga_info.title} 漫画已完结, 已更新KOMGA状态")
                        MangaUtils("mangas_ended.json").add_manga(name= manga_info.title)
                    else:
                        logger.info(f"{manga_info.title} 漫画已完结, 但章节未全部下载完成, 可能是网络问题或其他原因")
            return down_chapter
        except Exception as e:
            if isinstance(e, (ParseError, SiteError)):
                raise
            raise ParseError(f"解析章节列表失败: {str(e)}")

    async def update_covers(self, manga_info : MangaInfo) -> AsyncGenerator[Dict, None]:
        """更新Icons文件夹内Cover逻辑"""
        cache_cover = { 'path' : str(DirectoryNaming.manga_cover_dir(manga_info, cache=True)) }
        cover_img = { 'path' : str(DirectoryNaming.manga_cover_dir(manga_info, cache=False)) }
        cache_cover_item = CoverItem(**cache_cover)
        icons_dir = os.path.dirname(cover_img['path'])
        if not os.path.exists(icons_dir): os.makedirs(icons_dir)
        list_cover = []
        is_update = 0
        try:
            for file in os.listdir(icons_dir):
                if file.lower().endswith(".jpg"):
                    file_cover = {'path' : os.path.join(icons_dir, file)}
                    f_item = CoverItem(**file_cover)
                    list_cover.append(f_item)
                    if f_item.md5 == cache_cover_item.md5: is_update += 1
            if is_update == 0:
                new_cover = { 'path' : FileNaming.cover_format_path(cover_img["path"]) }
                shutil.copy(cache_cover["path"], new_cover["path"])
                list_cover.append(CoverItem(**new_cover))
        except Exception:
            raise exit("Cover 检测异常")
        return list_cover

    async def update_cbz_covers(self, manga_info : MangaInfo):
        """更新CBZ漫画的Cover"""
        cbz_dir = DirectoryNaming().chapter_cbz_dir(manga_info=manga_info)
        list_cbz = list(FileNaming().get_filenames_optimized(cbz_dir, ext_filter=[".CBZ"]))

        list_cover = await self.update_covers(manga_info)

        # 用来保存所有CBZ的Cover.jpg
        list_file_img = []

        for cbz_path in list_cbz:
            first_cover_path = str(cbz_path).split(".")[0]+".jpg"
            if len(list_cover) == 1:
                if FileNaming().file_update_by_date(first_cover_path, day=30) or not os.path.exists(first_cover_path):
                    shutil.copy(list_cover[0].path, first_cover_path)
                    list_file_img.append(first_cover_path)
                    logger.info(f"{list_cover[0].path} ==> {first_cover_path} 已复制")
                else:
                    list_file_img.append(first_cover_path)
                continue
            cover_count = 1
            for cover in list_cover:
                cover_path = cover.path
                if os.path.exists(first_cover_path): os.remove(first_cover_path)
                new_cover_path = FileNaming().cover_format_path(str(cbz_path).split(".")[0]+".jpg", count=cover_count)
                if  FileNaming().file_update_by_date(new_cover_path, day=30) or not os.path.exists(new_cover_path):
                    shutil.copy(cover_path, new_cover_path)
                    list_file_img.append(new_cover_path)
                    logger.info(f"{cover_path} ==> {new_cover_path} 已复制")
                else:
                    list_file_img.append(new_cover_path)
                cover_count += 1

        list_cbz_and_img = list(FileNaming().get_filenames_optimized(cbz_dir, ext_filter=[".jpg"]))
        clear_imgs = DirectoryNaming.get_unique_ordered(list_cbz_and_img, list_file_img)
        # 清理多余Img
        if len(clear_imgs) > 0:
            try:
                for img in clear_imgs: os.remove(img)
            except Exception:
                logger.error(f"{clear_imgs} 删除失败")

    async def download_manga(self, manga_url: str) -> AsyncGenerator[Dict, None]:
        """下载整部漫画"""
        try:
            # 获取漫画信息
            info = await self.get_manga_info(manga_url)
            yield {'type': 'info', 'data': info, 'item': info}

            # 获取章节列表
            chapters = await self.get_chapter_list(info)
            yield {'type': 'chapters', 'data': chapters, 'item': info}

            # 下载封面
            yield {'type': 'cover', 'item': info}
            covers = await self.update_covers(info)

            # 下载每个章节
            for chapter in chapters:
                try:
                    if chapter.status == "downloaded":
                        logger.debug(f"{chapter.title} 章节已下载")
                        continue
                    images = await self.get_chapter_images(chapter.url)
                    manga_item = MangaItem(
                            info=info,
                            covers=covers,
                            chapter=chapter,
                            chapter_images=images,
                            chapters=chapters
                        ).get_item()

                    yield {
                        'type': 'chapter',
                        'chapter': str(chapter.title),
                        'images': images,
                        'item': manga_item
                    }
                except Exception as e:
                    yield {
                        'type': 'error',
                        'chapter': chapter,
                        'error': str(e)
                    }
                    continue

            # 所有章节全部下载完后执行
            await self.update_cbz_covers(info)

        except Exception as e:
            yield {'type': 'error', 'error': str(e)}

    async def get_manga_list(self, manga_url: str) -> List[Dict[str, str]]:
        """获取漫画列表"""
        try:
            html = await self._get(manga_url)
            tree = etree.HTML(html)
            return self.extractor.extract_manga_list(tree)
        except Exception as e:
            if isinstance(e, (ParseError, SiteError)):
                raise exit(f"解析漫画信息失败: {str(e)}")
            raise ParseError(f"解析漫画信息失败: {str(e)}")