diff --git a/src/common/ComicInfo.py b/src/common/ComicInfo.py index d526213..cca3e07 100644 --- a/src/common/ComicInfo.py +++ b/src/common/ComicInfo.py @@ -254,6 +254,7 @@ class ComicInfo: for key, value in kwargs.items(): if hasattr(self, key): setattr(self, key, value) + @dataclass class ComicPageInfo: # ComicInfo.xml 中的 @@ -294,31 +295,80 @@ class ComicInfoXml: """ 生成ComicInfo.xml """ - def _save_xml_to_file(self, xml_string, filename): - """ - Save the XML string to a file - """ - base_dir = os.path.dirname(filename) - if not os.path.exists(base_dir): os.makedirs(base_dir) - with open(filename, "w", encoding="utf-8") as file: - file.write(xml_string) - logger.debug(f"ComicInfo.xml 生成成功 {filename}") - def _validate_xml_with_xsd_file(self, xml_file, xsd_file, remove=True): + def _validate_and_export_xml(self, xml_str, xsd_source, output_path=None, is_xsd_file=True): """ - Validate the XML file against the XSD file + 验证XML字符串并输出为文件 + + 参数: + xml_str : XML内容字符串 + xsd_source : XSD文件路径 或 XSD字符串 + output_path : 验证成功后输出的XML文件路径(可选) + is_xsd_file : True表示xsd_source是文件路径,False表示是字符串 + + 返回: + 无输出路径:验证成功返回输出XML文件,失败返回False + 无输出路径:验证成功返回True并输出XML文件,失败返回False """ - xml_doc = etree.parse(xml_file) - with open(xsd_file, 'r', encoding="utf-8") as file: - xsd_doc = etree.XMLSchema(etree.parse(file)) try: - xsd_doc.assertValid(xml_doc) - logger.debug(f"ComicInfo.xml 通过 XSD 验证成功 {xml_file}") + # 加载XSD模式 + if is_xsd_file: + xsd_doc = etree.parse(xsd_source) + else: + xsd_doc = etree.fromstring(xsd_source.encode('utf-8')) + + # 创建XML模式验证器 + schema_validator = etree.XMLSchema(xsd_doc) + + # 创建XML解析器(启用XSD验证) + parser = etree.XMLParser(schema=schema_validator) + + # 解析并验证XML字符串 + xml_root = etree.fromstring(xml_str.encode('utf-8'), parser) + + logger.debug("✅ XML 验证成功!") + + # 如果需要输出文件 + if output_path: + # 确保目录存在 + os.makedirs(os.path.dirname(output_path), exist_ok=True) + + # 创建ElementTree对象 + xml_tree = etree.ElementTree(xml_root) + + # 美化输出格式(缩进和换行) + etree.indent(xml_tree, space=" ") + + # 写入文件(包含XML声明) + xml_tree.write( + output_path, + encoding='utf-8', + xml_declaration=True, + pretty_print=True + ) + + logger.debug(f"💾 XML文件已保存至: {os.path.abspath(output_path)}") + + return True + else: + logger.debug("✅ XML 验证成功,但未指定输出路径。返回原始XML字符串。") + return xml_str + + except etree.XMLSyntaxError as e: + logger.error("❌ XML 语法错误:") + logger.error(f" 错误详情: {e.msg}") + return False + except etree.DocumentInvalid as e: - logger.error(f"ComicInfo.xml 通过 XSD 验证失败 {xml_file}") - if remove: - os.remove(xml_file) - + logger.error("❌ XML 结构/内容验证失败:") + for error in e.error_log: + logger.error(f" 行 {error.line}, 列 {error.column}: {error.message.strip()}") + return False + + except Exception as e: + logger.error(f"❌ 发生未知错误: {str(e)}") + return False + def get_page_count(self, zip_file: Path): """获取 ComicInfo.xml 文件中的 标签值""" # 打开ZIP文件 @@ -352,7 +402,7 @@ class ComicInfoXml: # Add subelements and attributes based on presence and requirements for attr, value in comic.__dict__.items(): # if value or (attr in ['Volume', 'Year', 'Month', 'Day', 'PageCount'] and (value == -1 or value == "" ) ): # Check required attributes - if value == -1 or value == "" or value == None or value == "[]" or value == []: + if value == -1 or value == 0 or value == "" or value == None or value == "[]" or value == []: if attr in self._required_attributes(): raise exit(f"{xml_filename} 缺少必要属性: {attr}") else: @@ -383,12 +433,9 @@ class ComicInfoXml: serialized_xml = serialize_comic_info(comic) # 保存数据XML到文件 - if save_dir != None: xml_filename = os.path.join(save_dir, xml_filename) - self._save_xml_to_file(serialized_xml, xml_filename) - self._validate_xml_with_xsd_file(xml_filename, xsd_filename) # 将 JSON 转换为 XML - #xml_data = json_to_xml_with_declaration(json_data) - #print(xml_data) - return Path(xml_filename) + if save_dir != None and xml_filename != None: xml_filename = os.path.join(save_dir, xml_filename) + + return self._validate_and_export_xml(serialized_xml, xsd_source=xsd_filename, output_path=xml_filename, is_xsd_file=True) def _required_attributes(self): """ @@ -489,13 +536,25 @@ class ComicInfoXml: pages: 返回更新后的页面信息列表 """ # 读取 ComicInfo.xml 文件 + is_update = False comicinfo = self._xml_file_to_comicinfo(cbz_path=cbz_path) - if count != -1: - comicinfo.Count = count - if number != -1: - comicinfo.Number = number + if count != -1 and count != "" and count != 0: + if comicinfo.Count == count: + logger.debug(f"ComicInfo.xml 中的 Count 已经是 {count},无需更新") + else: + comicinfo.Count = count + is_update = True + if number != -1 and number != "" and number != 0: + if comicinfo.Number == number: + logger.debug(f"ComicInfo.xml 中的 Number 已经是 {number},无需更新") + else: + comicinfo.Number = number + is_update = True # 保存更新后的 ComicInfo.xml 文件 - return self._parse_comicinfo(comicinfo, save_dir=os.path.dirname(cbz_path)) + if not is_update: + logger.debug(f"ComicInfo.xml 中的 Count 和 Number 无需更新") + return is_update + return self._parse_comicinfo(comicinfo, save_dir=os.path.dirname(cbz_path), xml_filename=None) def scrapy_xml_by_json(self, json_data, save_dir=None, xsd_file=XSD_FILE): """ 根据Json数据生成ComicInfo.xml diff --git a/src/common/item.py b/src/common/item.py index ec03fd2..a282602 100644 --- a/src/common/item.py +++ b/src/common/item.py @@ -3,6 +3,9 @@ from typing import List, Optional from opencc import OpenCC import re,os from src.common.ComicInfo import ImageInfo +from src.common.logging import setup_logging + +logger = setup_logging(__name__) class FileNaming: @@ -204,7 +207,23 @@ class MangaInfo(BaseModel): # if not isinstance(v, list): # raise ValueError('tags must be a list') # return v - + + def is_chapter_ended(cls, str_chapter: str): + """ 检查章节是否已完结 """ + ended_chapter_keys = [ "最终话" , "最终回", "完结", "大结局", "大团圆", "终章", "终结篇", "结局篇" , "后记"] + # 如果章节名称列表长度大于1且最后一个章节名称与传入的章节名称相同 + if len(cls.chapters_name) > 1 and str_chapter == FileNaming.chinese_file_name(cls.chapters_name[-1]): + # 检查状态是否为已完结 + # if cls.status == "已完结": return True + # 检查章节名称是否包含已完结的关键字 + str_chapter_key = str_chapter.replace("-", " ").split(" ") + # 如果章节名称包含已完结的关键字,则返回True + if any(key in str_chapter_key for key in ended_chapter_keys): + logger.info(f"比中已完结章节名: {str_chapter} ") + return True + return False + + class MangaItem(BaseModel): info: MangaInfo covers: List[CoverItem] = [] diff --git a/src/common/utils.py b/src/common/utils.py index bf5b02b..263db22 100644 --- a/src/common/utils.py +++ b/src/common/utils.py @@ -489,7 +489,7 @@ class CBZUtils: new_zip.writestr(item, source_zip.read(item.filename)) # 添加新 XML - new_zip.writestr("ComicInfo.xml", new_xml_content) + new_zip.writestr("ComicInfo.xml", new_xml_content.encode('utf-8')) os.remove(tmp.name) # 清理临时文件 return True diff --git a/src/sites/base.py b/src/sites/base.py index 09132cf..a02a40f 100644 --- a/src/sites/base.py +++ b/src/sites/base.py @@ -82,32 +82,29 @@ class BaseSite(ABC): #async def get_chapter_list(self, info: MangaInfo) -> List[Dict[str, str]]: # """获取漫画章节列表""" # pass + async def get_chapter_list(self, manga_info: MangaInfo) -> List[Dict[str, str]]: """获取章节列表""" try: - # result_type list[Chapter] list_chapter = manga_info.get_list_chapter() - # 临时添加begin - # 获取最新章节 - last_chapter = list_chapter[-1] if list_chapter else [] - # 临时添加end down_chapter = [] downloaded_chapter = [] + number = 0 for chapter in list_chapter: + number += 1 cbz_path = FileNaming.chapter_cbz(manga_info=manga_info,chapter=chapter) if os.path.exists(cbz_path): - # 临时添加begin - if chapter.title == last_chapter.title and manga_info.status == "已完结": + # 判断是否是最新章节 + if manga_info.is_chapter_ended(chapter.title): # 如果是最新章节且漫画已完结,则不再下 - ci = ComicInfoXml()._xml_file_to_comicinfo(cbz_path=cbz_path) - if ci.Count == "": - # 生成ComicInfo.xml - xml_path = ComicInfoXml().update_comicinfo_count_or_number(cbz_path=cbz_path, count=len(list_chapter)) - # 更新ComicInfo.xml至CBZ文件中 - CBZUtils().update_cbz_with_new_xml(cbz_path, xml_path.read_text(encoding="utf-8")) + # if ci.Count != "" and int(ci.Count) != 0 and int(ci.Count) != int(number): + # count = number + xml_data = ComicInfoXml().update_comicinfo_count_or_number(cbz_path=cbz_path, number=number, count=number) + # 更新ComicInfo.xml至CBZ文件中 + if xml_data: + CBZUtils().update_cbz_with_new_xml(cbz_path, xml_data) # 更新完成后删除临时生成的ComicInfo.xml - xml_path.unlink() - logger.info(f"更新 {cbz_path} 的 ComicInfo.xml Count完成") + logger.info(f"更新 {cbz_path} 的 ComicInfo.xml Number完成") # 临时添加end logger.debug(f"{chapter.title} 章节已存在") chapter.status = "downloaded" @@ -115,13 +112,17 @@ class BaseSite(ABC): down_chapter.append(chapter) if manga_info.status == "已完结": from src.common.utils import KomgaAPI, MangaUtils - if len(downloaded_chapter) == len(list_chapter): - logger.info(f"{manga_info.title} 漫画已完结, 章节已全部下载完成") - KomgaAPI().update_series_ended(manga_info.title) - logger.info(f"{manga_info.title} 漫画已完结, 已更新KOMGA状态") - MangaUtils("mangas_ended.json").add_manga(name= manga_info.title) + is_update_komga_ended = MangaUtils("mangas_ended.json").search_manga(name= manga_info.title) + if is_update_komga_ended != None: + logger.info(f"{manga_info.title} 漫画已完结, 已存在于KOMGA服务器已完结列表中,无需更新!") else: - logger.info(f"{manga_info.title} 漫画已完结, 但章节未全部下载完成, 可能是网络问题或其他原因") + if len(downloaded_chapter) == len(list_chapter): + logger.info(f"{manga_info.title} 漫画已完结, 章节已全部下载完成") + KomgaAPI().update_series_ended(manga_info.title) + logger.info(f"{manga_info.title} 漫画已完结, 已更新KOMGA状态") + MangaUtils("mangas_ended.json").add_manga(name= manga_info.title) + else: + logger.info(f"{manga_info.title} 漫画已完结, 但章节未全部下载完成, 可能是网络问题或其他原因") return down_chapter except Exception as e: if isinstance(e, (ParseError, SiteError)): diff --git a/test.py b/test.py index 57c4a19..4fb51a0 100644 --- a/test.py +++ b/test.py @@ -434,7 +434,7 @@ class comicInfo: if __name__ == "__main__": print("开始处理") # ComicInfoXml()._xml_file_to_comicinfo("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ") - xml_path = ComicInfoXml().update_comicinfo_count(37,"/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ") + xml_path = ComicInfoXml().update_comicinfo_count_or_number(count=37,cbz_path="/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ") comicInfo().update_cbz_with_new_xml("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ", xml_path.read_text(encoding="utf-8")) #items = ci().__dict__.keys() #print(items) \ No newline at end of file