This commit is contained in:
caiwx86 2025-07-13 02:21:00 +08:00
parent c020596284
commit 58294db981
5 changed files with 135 additions and 56 deletions

View File

@ -254,6 +254,7 @@ class ComicInfo:
for key, value in kwargs.items(): for key, value in kwargs.items():
if hasattr(self, key): if hasattr(self, key):
setattr(self, key, value) setattr(self, key, value)
@dataclass @dataclass
class ComicPageInfo: class ComicPageInfo:
# ComicInfo.xml 中的<Page> # ComicInfo.xml 中的<Page>
@ -294,31 +295,80 @@ class ComicInfoXml:
""" """
生成ComicInfo.xml 生成ComicInfo.xml
""" """
def _save_xml_to_file(self, xml_string, filename):
"""
Save the XML string to a file
"""
base_dir = os.path.dirname(filename)
if not os.path.exists(base_dir): os.makedirs(base_dir)
with open(filename, "w", encoding="utf-8") as file:
file.write(xml_string)
logger.debug(f"ComicInfo.xml 生成成功 {filename}")
def _validate_xml_with_xsd_file(self, xml_file, xsd_file, remove=True): def _validate_and_export_xml(self, xml_str, xsd_source, output_path=None, is_xsd_file=True):
""" """
Validate the XML file against the XSD file 验证XML字符串并输出为文件
参数:
xml_str : XML内容字符串
xsd_source : XSD文件路径 XSD字符串
output_path : 验证成功后输出的XML文件路径可选
is_xsd_file : True表示xsd_source是文件路径False表示是字符串
返回:
无输出路径验证成功返回输出XML文件失败返回False
无输出路径验证成功返回True并输出XML文件失败返回False
""" """
xml_doc = etree.parse(xml_file)
with open(xsd_file, 'r', encoding="utf-8") as file:
xsd_doc = etree.XMLSchema(etree.parse(file))
try: try:
xsd_doc.assertValid(xml_doc) # 加载XSD模式
logger.debug(f"ComicInfo.xml 通过 XSD 验证成功 {xml_file}") if is_xsd_file:
xsd_doc = etree.parse(xsd_source)
else:
xsd_doc = etree.fromstring(xsd_source.encode('utf-8'))
# 创建XML模式验证器
schema_validator = etree.XMLSchema(xsd_doc)
# 创建XML解析器启用XSD验证
parser = etree.XMLParser(schema=schema_validator)
# 解析并验证XML字符串
xml_root = etree.fromstring(xml_str.encode('utf-8'), parser)
logger.debug("✅ XML 验证成功!")
# 如果需要输出文件
if output_path:
# 确保目录存在
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# 创建ElementTree对象
xml_tree = etree.ElementTree(xml_root)
# 美化输出格式(缩进和换行)
etree.indent(xml_tree, space=" ")
# 写入文件包含XML声明
xml_tree.write(
output_path,
encoding='utf-8',
xml_declaration=True,
pretty_print=True
)
logger.debug(f"💾 XML文件已保存至: {os.path.abspath(output_path)}")
return True
else:
logger.debug("✅ XML 验证成功但未指定输出路径。返回原始XML字符串。")
return xml_str
except etree.XMLSyntaxError as e:
logger.error("❌ XML 语法错误:")
logger.error(f" 错误详情: {e.msg}")
return False
except etree.DocumentInvalid as e: except etree.DocumentInvalid as e:
logger.error(f"ComicInfo.xml 通过 XSD 验证失败 {xml_file}") logger.error("❌ XML 结构/内容验证失败:")
if remove: for error in e.error_log:
os.remove(xml_file) logger.error(f"{error.line}, 列 {error.column}: {error.message.strip()}")
return False
except Exception as e:
logger.error(f"❌ 发生未知错误: {str(e)}")
return False
def get_page_count(self, zip_file: Path): def get_page_count(self, zip_file: Path):
"""获取 ComicInfo.xml 文件中的 <PageCount> 标签值""" """获取 ComicInfo.xml 文件中的 <PageCount> 标签值"""
# 打开ZIP文件 # 打开ZIP文件
@ -352,7 +402,7 @@ class ComicInfoXml:
# Add subelements and attributes based on presence and requirements # Add subelements and attributes based on presence and requirements
for attr, value in comic.__dict__.items(): for attr, value in comic.__dict__.items():
# if value or (attr in ['Volume', 'Year', 'Month', 'Day', 'PageCount'] and (value == -1 or value == "" ) ): # Check required attributes # if value or (attr in ['Volume', 'Year', 'Month', 'Day', 'PageCount'] and (value == -1 or value == "" ) ): # Check required attributes
if value == -1 or value == "" or value == None or value == "[]" or value == []: if value == -1 or value == 0 or value == "" or value == None or value == "[]" or value == []:
if attr in self._required_attributes(): if attr in self._required_attributes():
raise exit(f"{xml_filename} 缺少必要属性: {attr}") raise exit(f"{xml_filename} 缺少必要属性: {attr}")
else: else:
@ -383,12 +433,9 @@ class ComicInfoXml:
serialized_xml = serialize_comic_info(comic) serialized_xml = serialize_comic_info(comic)
# 保存数据XML到文件 # 保存数据XML到文件
if save_dir != None: xml_filename = os.path.join(save_dir, xml_filename) if save_dir != None and xml_filename != None: xml_filename = os.path.join(save_dir, xml_filename)
self._save_xml_to_file(serialized_xml, xml_filename)
self._validate_xml_with_xsd_file(xml_filename, xsd_filename) # 将 JSON 转换为 XML return self._validate_and_export_xml(serialized_xml, xsd_source=xsd_filename, output_path=xml_filename, is_xsd_file=True)
#xml_data = json_to_xml_with_declaration(json_data)
#print(xml_data)
return Path(xml_filename)
def _required_attributes(self): def _required_attributes(self):
""" """
@ -489,13 +536,25 @@ class ComicInfoXml:
pages: 返回更新后的页面信息列表 pages: 返回更新后的页面信息列表
""" """
# 读取 ComicInfo.xml 文件 # 读取 ComicInfo.xml 文件
is_update = False
comicinfo = self._xml_file_to_comicinfo(cbz_path=cbz_path) comicinfo = self._xml_file_to_comicinfo(cbz_path=cbz_path)
if count != -1: if count != -1 and count != "" and count != 0:
comicinfo.Count = count if comicinfo.Count == count:
if number != -1: logger.debug(f"ComicInfo.xml 中的 Count 已经是 {count},无需更新")
comicinfo.Number = number else:
comicinfo.Count = count
is_update = True
if number != -1 and number != "" and number != 0:
if comicinfo.Number == number:
logger.debug(f"ComicInfo.xml 中的 Number 已经是 {number},无需更新")
else:
comicinfo.Number = number
is_update = True
# 保存更新后的 ComicInfo.xml 文件 # 保存更新后的 ComicInfo.xml 文件
return self._parse_comicinfo(comicinfo, save_dir=os.path.dirname(cbz_path)) if not is_update:
logger.debug(f"ComicInfo.xml 中的 Count 和 Number 无需更新")
return is_update
return self._parse_comicinfo(comicinfo, save_dir=os.path.dirname(cbz_path), xml_filename=None)
def scrapy_xml_by_json(self, json_data, save_dir=None, xsd_file=XSD_FILE): def scrapy_xml_by_json(self, json_data, save_dir=None, xsd_file=XSD_FILE):
""" 根据Json数据生成ComicInfo.xml """ 根据Json数据生成ComicInfo.xml

View File

@ -3,6 +3,9 @@ from typing import List, Optional
from opencc import OpenCC from opencc import OpenCC
import re,os import re,os
from src.common.ComicInfo import ImageInfo from src.common.ComicInfo import ImageInfo
from src.common.logging import setup_logging
logger = setup_logging(__name__)
class FileNaming: class FileNaming:
@ -204,7 +207,23 @@ class MangaInfo(BaseModel):
# if not isinstance(v, list): # if not isinstance(v, list):
# raise ValueError('tags must be a list') # raise ValueError('tags must be a list')
# return v # return v
def is_chapter_ended(cls, str_chapter: str):
""" 检查章节是否已完结 """
ended_chapter_keys = [ "最终话" , "最终回", "完结", "大结局", "大团圆", "终章", "终结篇", "结局篇" , "后记"]
# 如果章节名称列表长度大于1且最后一个章节名称与传入的章节名称相同
if len(cls.chapters_name) > 1 and str_chapter == FileNaming.chinese_file_name(cls.chapters_name[-1]):
# 检查状态是否为已完结
# if cls.status == "已完结": return True
# 检查章节名称是否包含已完结的关键字
str_chapter_key = str_chapter.replace("-", " ").split(" ")
# 如果章节名称包含已完结的关键字则返回True
if any(key in str_chapter_key for key in ended_chapter_keys):
logger.info(f"比中已完结章节名: {str_chapter} ")
return True
return False
class MangaItem(BaseModel): class MangaItem(BaseModel):
info: MangaInfo info: MangaInfo
covers: List[CoverItem] = [] covers: List[CoverItem] = []

View File

@ -489,7 +489,7 @@ class CBZUtils:
new_zip.writestr(item, source_zip.read(item.filename)) new_zip.writestr(item, source_zip.read(item.filename))
# 添加新 XML # 添加新 XML
new_zip.writestr("ComicInfo.xml", new_xml_content) new_zip.writestr("ComicInfo.xml", new_xml_content.encode('utf-8'))
os.remove(tmp.name) # 清理临时文件 os.remove(tmp.name) # 清理临时文件
return True return True

View File

@ -82,32 +82,29 @@ class BaseSite(ABC):
#async def get_chapter_list(self, info: MangaInfo) -> List[Dict[str, str]]: #async def get_chapter_list(self, info: MangaInfo) -> List[Dict[str, str]]:
# """获取漫画章节列表""" # """获取漫画章节列表"""
# pass # pass
async def get_chapter_list(self, manga_info: MangaInfo) -> List[Dict[str, str]]: async def get_chapter_list(self, manga_info: MangaInfo) -> List[Dict[str, str]]:
"""获取章节列表""" """获取章节列表"""
try: try:
# result_type list[Chapter]
list_chapter = manga_info.get_list_chapter() list_chapter = manga_info.get_list_chapter()
# 临时添加begin
# 获取最新章节
last_chapter = list_chapter[-1] if list_chapter else []
# 临时添加end
down_chapter = [] down_chapter = []
downloaded_chapter = [] downloaded_chapter = []
number = 0
for chapter in list_chapter: for chapter in list_chapter:
number += 1
cbz_path = FileNaming.chapter_cbz(manga_info=manga_info,chapter=chapter) cbz_path = FileNaming.chapter_cbz(manga_info=manga_info,chapter=chapter)
if os.path.exists(cbz_path): if os.path.exists(cbz_path):
# 临时添加begin # 判断是否是最新章节
if chapter.title == last_chapter.title and manga_info.status == "已完结": if manga_info.is_chapter_ended(chapter.title):
# 如果是最新章节且漫画已完结,则不再下 # 如果是最新章节且漫画已完结,则不再下
ci = ComicInfoXml()._xml_file_to_comicinfo(cbz_path=cbz_path) # if ci.Count != "" and int(ci.Count) != 0 and int(ci.Count) != int(number):
if ci.Count == "": # count = number
# 生成ComicInfo.xml xml_data = ComicInfoXml().update_comicinfo_count_or_number(cbz_path=cbz_path, number=number, count=number)
xml_path = ComicInfoXml().update_comicinfo_count_or_number(cbz_path=cbz_path, count=len(list_chapter)) # 更新ComicInfo.xml至CBZ文件中
# 更新ComicInfo.xml至CBZ文件中 if xml_data:
CBZUtils().update_cbz_with_new_xml(cbz_path, xml_path.read_text(encoding="utf-8")) CBZUtils().update_cbz_with_new_xml(cbz_path, xml_data)
# 更新完成后删除临时生成的ComicInfo.xml # 更新完成后删除临时生成的ComicInfo.xml
xml_path.unlink() logger.info(f"更新 {cbz_path} 的 ComicInfo.xml Number完成")
logger.info(f"更新 {cbz_path} 的 ComicInfo.xml Count完成")
# 临时添加end # 临时添加end
logger.debug(f"{chapter.title} 章节已存在") logger.debug(f"{chapter.title} 章节已存在")
chapter.status = "downloaded" chapter.status = "downloaded"
@ -115,13 +112,17 @@ class BaseSite(ABC):
down_chapter.append(chapter) down_chapter.append(chapter)
if manga_info.status == "已完结": if manga_info.status == "已完结":
from src.common.utils import KomgaAPI, MangaUtils from src.common.utils import KomgaAPI, MangaUtils
if len(downloaded_chapter) == len(list_chapter): is_update_komga_ended = MangaUtils("mangas_ended.json").search_manga(name= manga_info.title)
logger.info(f"{manga_info.title} 漫画已完结, 章节已全部下载完成") if is_update_komga_ended != None:
KomgaAPI().update_series_ended(manga_info.title) logger.info(f"{manga_info.title} 漫画已完结, 已存在于KOMGA服务器已完结列表中无需更新")
logger.info(f"{manga_info.title} 漫画已完结, 已更新KOMGA状态")
MangaUtils("mangas_ended.json").add_manga(name= manga_info.title)
else: else:
logger.info(f"{manga_info.title} 漫画已完结, 但章节未全部下载完成, 可能是网络问题或其他原因") if len(downloaded_chapter) == len(list_chapter):
logger.info(f"{manga_info.title} 漫画已完结, 章节已全部下载完成")
KomgaAPI().update_series_ended(manga_info.title)
logger.info(f"{manga_info.title} 漫画已完结, 已更新KOMGA状态")
MangaUtils("mangas_ended.json").add_manga(name= manga_info.title)
else:
logger.info(f"{manga_info.title} 漫画已完结, 但章节未全部下载完成, 可能是网络问题或其他原因")
return down_chapter return down_chapter
except Exception as e: except Exception as e:
if isinstance(e, (ParseError, SiteError)): if isinstance(e, (ParseError, SiteError)):

View File

@ -434,7 +434,7 @@ class comicInfo:
if __name__ == "__main__": if __name__ == "__main__":
print("开始处理") print("开始处理")
# ComicInfoXml()._xml_file_to_comicinfo("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ") # ComicInfoXml()._xml_file_to_comicinfo("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ")
xml_path = ComicInfoXml().update_comicinfo_count(37,"/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ") xml_path = ComicInfoXml().update_comicinfo_count_or_number(count=37,cbz_path="/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ")
comicInfo().update_cbz_with_new_xml("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ", xml_path.read_text(encoding="utf-8")) comicInfo().update_cbz_with_new_xml("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ", xml_path.read_text(encoding="utf-8"))
#items = ci().__dict__.keys() #items = ci().__dict__.keys()
#print(items) #print(items)