update

2025-07-13 02:21:00 +08:00 · 2025-07-13 02:21:00 +08:00 · 58294db981
commit 58294db981
parent c020596284
5 changed files with 135 additions and 56 deletions
--- a/src/common/ComicInfo.py
+++ b/src/common/ComicInfo.py
@ -254,6 +254,7 @@ class ComicInfo:
            for key, value in kwargs.items():
                if hasattr(self, key):
                    setattr(self, key, value)
+                    
@dataclass 
 class ComicPageInfo:
    # ComicInfo.xml 中的<Page>
@ -294,30 +295,79 @@ class ComicInfoXml:
    """
        生成ComicInfo.xml
    """
-    def _save_xml_to_file(self, xml_string, filename):
-        """
-        Save the XML string to a file
-        """
-        base_dir = os.path.dirname(filename)
-        if not os.path.exists(base_dir): os.makedirs(base_dir)
-        with open(filename, "w", encoding="utf-8") as file:
-            file.write(xml_string)
-        logger.debug(f"ComicInfo.xml 生成成功 {filename}")
    
-    def _validate_xml_with_xsd_file(self, xml_file, xsd_file, remove=True):
+    def _validate_and_export_xml(self, xml_str, xsd_source, output_path=None, is_xsd_file=True):
        """
-        Validate the XML file against the XSD file
+        验证XML字符串并输出为文件
+    
+        参数:
+        xml_str      : XML内容字符串
+        xsd_source   : XSD文件路径 或 XSD字符串
+        output_path  : 验证成功后输出的XML文件路径（可选）
+        is_xsd_file  : True表示xsd_source是文件路径，False表示是字符串
+    
+        返回:
+         无输出路径：验证成功返回输出XML文件，失败返回False
+         无输出路径：验证成功返回True并输出XML文件，失败返回False
        """
-        xml_doc = etree.parse(xml_file)
-        with open(xsd_file, 'r', encoding="utf-8") as file:
-            xsd_doc = etree.XMLSchema(etree.parse(file))
        try:
-            xsd_doc.assertValid(xml_doc)
-            logger.debug(f"ComicInfo.xml 通过 XSD 验证成功 {xml_file}")
+            # 加载XSD模式
+            if is_xsd_file:
+                xsd_doc = etree.parse(xsd_source)
+            else:
+                xsd_doc = etree.fromstring(xsd_source.encode('utf-8'))
+        
+            # 创建XML模式验证器
+            schema_validator = etree.XMLSchema(xsd_doc)
+        
+            # 创建XML解析器（启用XSD验证）
+            parser = etree.XMLParser(schema=schema_validator)
+        
+            # 解析并验证XML字符串
+            xml_root = etree.fromstring(xml_str.encode('utf-8'), parser)
+        
+            logger.debug("✅ XML 验证成功！")
+        
+            # 如果需要输出文件
+            if output_path:
+                # 确保目录存在
+                os.makedirs(os.path.dirname(output_path), exist_ok=True)
+            
+                # 创建ElementTree对象
+                xml_tree = etree.ElementTree(xml_root)
+            
+                # 美化输出格式（缩进和换行）
+                etree.indent(xml_tree, space="  ")
+            
+                # 写入文件（包含XML声明）
+                xml_tree.write(
+                    output_path,
+                    encoding='utf-8',
+                    xml_declaration=True,
+                    pretty_print=True
+                )
+            
+                logger.debug(f"💾 XML文件已保存至: {os.path.abspath(output_path)}")
+        
+                return True
+            else:
+                logger.debug("✅ XML 验证成功，但未指定输出路径。返回原始XML字符串。")
+                return xml_str
+        
+        except etree.XMLSyntaxError as e:
+            logger.error("❌ XML 语法错误:")
+            logger.error(f"  错误详情: {e.msg}")
+            return False
+        
        except etree.DocumentInvalid as e:
-            logger.error(f"ComicInfo.xml 通过 XSD 验证失败 {xml_file}")
-            if remove:
-                os.remove(xml_file)
+            logger.error("❌ XML 结构/内容验证失败:")
+            for error in e.error_log:
+                logger.error(f"  行 {error.line}, 列 {error.column}: {error.message.strip()}")
+            return False
+        
+        except Exception as e:
+            logger.error(f"❌ 发生未知错误: {str(e)}")
+            return False

    def get_page_count(self, zip_file: Path):
        """获取 ComicInfo.xml 文件中的 <PageCount> 标签值"""
@ -352,7 +402,7 @@ class ComicInfoXml:
            # Add subelements and attributes based on presence and requirements
            for attr, value in comic.__dict__.items():
    #            if value or (attr in ['Volume', 'Year', 'Month', 'Day', 'PageCount'] and (value == -1 or value == "" ) ):  # Check required attributes
-                if value == -1 or value == "" or value == None or value == "[]" or value == []:
+                if value == -1 or value == 0 or value == "" or value == None or value == "[]" or value == []:
                    if attr in self._required_attributes():
                        raise exit(f"{xml_filename} 缺少必要属性: {attr}")
                    else:
@ -383,12 +433,9 @@ class ComicInfoXml:
        serialized_xml = serialize_comic_info(comic)
        
        # 保存数据XML到文件
-        if save_dir != None: xml_filename = os.path.join(save_dir, xml_filename)
-        self._save_xml_to_file(serialized_xml, xml_filename)
-        self._validate_xml_with_xsd_file(xml_filename, xsd_filename)   # 将 JSON 转换为 XML
-        #xml_data = json_to_xml_with_declaration(json_data)
-        #print(xml_data)
-        return Path(xml_filename)
+        if save_dir != None and xml_filename != None: xml_filename = os.path.join(save_dir, xml_filename)
+        
+        return self._validate_and_export_xml(serialized_xml, xsd_source=xsd_filename, output_path=xml_filename, is_xsd_file=True)
    
    def _required_attributes(self):
        """
@ -489,13 +536,25 @@ class ComicInfoXml:
            pages: 返回更新后的页面信息列表
        """
        # 读取 ComicInfo.xml 文件
+        is_update = False
        comicinfo = self._xml_file_to_comicinfo(cbz_path=cbz_path)
-        if count != -1:
-            comicinfo.Count = count
-        if number != -1:
-            comicinfo.Number = number
+        if count != -1 and count != "" and count != 0:
+            if comicinfo.Count == count:
+                logger.debug(f"ComicInfo.xml 中的 Count 已经是 {count}，无需更新")
+            else:
+                comicinfo.Count = count
+                is_update = True
+        if number != -1 and number != "" and number != 0:
+            if comicinfo.Number == number:
+                logger.debug(f"ComicInfo.xml 中的 Number 已经是 {number}，无需更新")
+            else:
+                comicinfo.Number = number
+                is_update = True
        # 保存更新后的 ComicInfo.xml 文件
-        return self._parse_comicinfo(comicinfo, save_dir=os.path.dirname(cbz_path))
+        if not is_update:
+            logger.debug(f"ComicInfo.xml 中的 Count 和 Number 无需更新")
+            return is_update
+        return self._parse_comicinfo(comicinfo, save_dir=os.path.dirname(cbz_path), xml_filename=None)
    
    def scrapy_xml_by_json(self, json_data, save_dir=None, xsd_file=XSD_FILE):
        """ 根据Json数据生成ComicInfo.xml
--- a/src/common/item.py
+++ b/src/common/item.py
@ -3,6 +3,9 @@ from typing import List, Optional
 from opencc import OpenCC
 import re,os
 from src.common.ComicInfo import ImageInfo
+from src.common.logging import setup_logging
+
+logger = setup_logging(__name__)

 class FileNaming:
    
@ -205,6 +208,22 @@ class MangaInfo(BaseModel):
    #        raise ValueError('tags must be a list')
    #    return v
    
+    def is_chapter_ended(cls, str_chapter: str):
+        """ 检查章节是否已完结 """
+        ended_chapter_keys = [ "最终话" , "最终回", "完结", "大结局", "大团圆", "终章", "终结篇", "结局篇" , "后记"]
+        # 如果章节名称列表长度大于1且最后一个章节名称与传入的章节名称相同
+        if len(cls.chapters_name) > 1 and str_chapter == FileNaming.chinese_file_name(cls.chapters_name[-1]):
+            # 检查状态是否为已完结
+            # if cls.status == "已完结": return True
+            # 检查章节名称是否包含已完结的关键字
+            str_chapter_key = str_chapter.replace("-", " ").split(" ")
+            # 如果章节名称包含已完结的关键字，则返回True
+            if any(key in str_chapter_key for key in ended_chapter_keys):
+                logger.info(f"比中已完结章节名： {str_chapter} ")
+                return True
+        return False
+    
+    
 class MangaItem(BaseModel):
    info: MangaInfo
    covers: List[CoverItem] = []
--- a/src/common/utils.py
+++ b/src/common/utils.py
@ -489,7 +489,7 @@ class CBZUtils:
                                new_zip.writestr(item, source_zip.read(item.filename))

                        # 添加新 XML
-                        new_zip.writestr("ComicInfo.xml", new_xml_content)
+                        new_zip.writestr("ComicInfo.xml", new_xml_content.encode('utf-8'))

                os.remove(tmp.name)  # 清理临时文件
                return True
--- a/src/sites/base.py
+++ b/src/sites/base.py
@ -82,32 +82,29 @@ class BaseSite(ABC):
    #async def get_chapter_list(self, info: MangaInfo) -> List[Dict[str, str]]:
    #    """获取漫画章节列表"""
    #    pass        
+     
    async def get_chapter_list(self, manga_info: MangaInfo) -> List[Dict[str, str]]:
        """获取章节列表"""
        try:
-            # result_type list[Chapter]
            list_chapter = manga_info.get_list_chapter()
-            # 临时添加begin
-            # 获取最新章节
-            last_chapter = list_chapter[-1] if list_chapter else []
-            # 临时添加end
            down_chapter = []
            downloaded_chapter = []
+            number = 0
            for chapter in list_chapter:
+                number += 1
                cbz_path = FileNaming.chapter_cbz(manga_info=manga_info,chapter=chapter)
                if os.path.exists(cbz_path):
-                    # 临时添加begin
-                    if chapter.title == last_chapter.title and manga_info.status == "已完结":
+                    # 判断是否是最新章节 
+                    if manga_info.is_chapter_ended(chapter.title): 
                        # 如果是最新章节且漫画已完结，则不再下
-                        ci = ComicInfoXml()._xml_file_to_comicinfo(cbz_path=cbz_path)
-                        if ci.Count == "":
-                            # 生成ComicInfo.xml
-                            xml_path = ComicInfoXml().update_comicinfo_count_or_number(cbz_path=cbz_path, count=len(list_chapter))
-                            # 更新ComicInfo.xml至CBZ文件中
-                            CBZUtils().update_cbz_with_new_xml(cbz_path, xml_path.read_text(encoding="utf-8"))
+                        # if ci.Count != "" and int(ci.Count) != 0 and int(ci.Count) != int(number): 
+                        #    count = number
+                        xml_data = ComicInfoXml().update_comicinfo_count_or_number(cbz_path=cbz_path, number=number, count=number)
+                        # 更新ComicInfo.xml至CBZ文件中
+                        if xml_data:
+                            CBZUtils().update_cbz_with_new_xml(cbz_path, xml_data)
                            # 更新完成后删除临时生成的ComicInfo.xml
-                            xml_path.unlink()
-                            logger.info(f"更新 {cbz_path} 的 ComicInfo.xml Count完成")
+                            logger.info(f"更新 {cbz_path} 的 ComicInfo.xml Number完成")
                    # 临时添加end
                    logger.debug(f"{chapter.title} 章节已存在")
                    chapter.status = "downloaded"
@ -115,13 +112,17 @@ class BaseSite(ABC):
                down_chapter.append(chapter)
            if manga_info.status == "已完结":
                from src.common.utils import KomgaAPI, MangaUtils
-                if len(downloaded_chapter) == len(list_chapter):
-                    logger.info(f"{manga_info.title} 漫画已完结, 章节已全部下载完成")
-                    KomgaAPI().update_series_ended(manga_info.title)
-                    logger.info(f"{manga_info.title} 漫画已完结, 已更新KOMGA状态")
-                    MangaUtils("mangas_ended.json").add_manga(name= manga_info.title)
+                is_update_komga_ended = MangaUtils("mangas_ended.json").search_manga(name= manga_info.title)
+                if is_update_komga_ended != None:
+                    logger.info(f"{manga_info.title} 漫画已完结, 已存在于KOMGA服务器已完结列表中，无需更新！")
                else:
-                    logger.info(f"{manga_info.title} 漫画已完结, 但章节未全部下载完成, 可能是网络问题或其他原因")
+                    if len(downloaded_chapter) == len(list_chapter):
+                        logger.info(f"{manga_info.title} 漫画已完结, 章节已全部下载完成")
+                        KomgaAPI().update_series_ended(manga_info.title)
+                        logger.info(f"{manga_info.title} 漫画已完结, 已更新KOMGA状态")
+                        MangaUtils("mangas_ended.json").add_manga(name= manga_info.title)
+                    else:
+                        logger.info(f"{manga_info.title} 漫画已完结, 但章节未全部下载完成, 可能是网络问题或其他原因")
            return down_chapter
        except Exception as e:
            if isinstance(e, (ParseError, SiteError)):
--- a/test.py
+++ b/test.py
@ -434,7 +434,7 @@ class comicInfo:
 if __name__ == "__main__":
    print("开始处理")
    # ComicInfoXml()._xml_file_to_comicinfo("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ")
-    xml_path = ComicInfoXml().update_comicinfo_count(37,"/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ")
+    xml_path = ComicInfoXml().update_comicinfo_count_or_number(count=37,cbz_path="/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ")
    comicInfo().update_cbz_with_new_xml("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ", xml_path.read_text(encoding="utf-8"))
    #items = ci().__dict__.keys()
    #print(items)