update
This commit is contained in:
parent
c020596284
commit
58294db981
@ -254,6 +254,7 @@ class ComicInfo:
|
||||
for key, value in kwargs.items():
|
||||
if hasattr(self, key):
|
||||
setattr(self, key, value)
|
||||
|
||||
@dataclass
|
||||
class ComicPageInfo:
|
||||
# ComicInfo.xml 中的<Page>
|
||||
@ -294,30 +295,79 @@ class ComicInfoXml:
|
||||
"""
|
||||
生成ComicInfo.xml
|
||||
"""
|
||||
def _save_xml_to_file(self, xml_string, filename):
|
||||
"""
|
||||
Save the XML string to a file
|
||||
"""
|
||||
base_dir = os.path.dirname(filename)
|
||||
if not os.path.exists(base_dir): os.makedirs(base_dir)
|
||||
with open(filename, "w", encoding="utf-8") as file:
|
||||
file.write(xml_string)
|
||||
logger.debug(f"ComicInfo.xml 生成成功 {filename}")
|
||||
|
||||
def _validate_xml_with_xsd_file(self, xml_file, xsd_file, remove=True):
|
||||
def _validate_and_export_xml(self, xml_str, xsd_source, output_path=None, is_xsd_file=True):
|
||||
"""
|
||||
Validate the XML file against the XSD file
|
||||
验证XML字符串并输出为文件
|
||||
|
||||
参数:
|
||||
xml_str : XML内容字符串
|
||||
xsd_source : XSD文件路径 或 XSD字符串
|
||||
output_path : 验证成功后输出的XML文件路径(可选)
|
||||
is_xsd_file : True表示xsd_source是文件路径,False表示是字符串
|
||||
|
||||
返回:
|
||||
无输出路径:验证成功返回输出XML文件,失败返回False
|
||||
无输出路径:验证成功返回True并输出XML文件,失败返回False
|
||||
"""
|
||||
xml_doc = etree.parse(xml_file)
|
||||
with open(xsd_file, 'r', encoding="utf-8") as file:
|
||||
xsd_doc = etree.XMLSchema(etree.parse(file))
|
||||
try:
|
||||
xsd_doc.assertValid(xml_doc)
|
||||
logger.debug(f"ComicInfo.xml 通过 XSD 验证成功 {xml_file}")
|
||||
# 加载XSD模式
|
||||
if is_xsd_file:
|
||||
xsd_doc = etree.parse(xsd_source)
|
||||
else:
|
||||
xsd_doc = etree.fromstring(xsd_source.encode('utf-8'))
|
||||
|
||||
# 创建XML模式验证器
|
||||
schema_validator = etree.XMLSchema(xsd_doc)
|
||||
|
||||
# 创建XML解析器(启用XSD验证)
|
||||
parser = etree.XMLParser(schema=schema_validator)
|
||||
|
||||
# 解析并验证XML字符串
|
||||
xml_root = etree.fromstring(xml_str.encode('utf-8'), parser)
|
||||
|
||||
logger.debug("✅ XML 验证成功!")
|
||||
|
||||
# 如果需要输出文件
|
||||
if output_path:
|
||||
# 确保目录存在
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
# 创建ElementTree对象
|
||||
xml_tree = etree.ElementTree(xml_root)
|
||||
|
||||
# 美化输出格式(缩进和换行)
|
||||
etree.indent(xml_tree, space=" ")
|
||||
|
||||
# 写入文件(包含XML声明)
|
||||
xml_tree.write(
|
||||
output_path,
|
||||
encoding='utf-8',
|
||||
xml_declaration=True,
|
||||
pretty_print=True
|
||||
)
|
||||
|
||||
logger.debug(f"💾 XML文件已保存至: {os.path.abspath(output_path)}")
|
||||
|
||||
return True
|
||||
else:
|
||||
logger.debug("✅ XML 验证成功,但未指定输出路径。返回原始XML字符串。")
|
||||
return xml_str
|
||||
|
||||
except etree.XMLSyntaxError as e:
|
||||
logger.error("❌ XML 语法错误:")
|
||||
logger.error(f" 错误详情: {e.msg}")
|
||||
return False
|
||||
|
||||
except etree.DocumentInvalid as e:
|
||||
logger.error(f"ComicInfo.xml 通过 XSD 验证失败 {xml_file}")
|
||||
if remove:
|
||||
os.remove(xml_file)
|
||||
logger.error("❌ XML 结构/内容验证失败:")
|
||||
for error in e.error_log:
|
||||
logger.error(f" 行 {error.line}, 列 {error.column}: {error.message.strip()}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ 发生未知错误: {str(e)}")
|
||||
return False
|
||||
|
||||
def get_page_count(self, zip_file: Path):
|
||||
"""获取 ComicInfo.xml 文件中的 <PageCount> 标签值"""
|
||||
@ -352,7 +402,7 @@ class ComicInfoXml:
|
||||
# Add subelements and attributes based on presence and requirements
|
||||
for attr, value in comic.__dict__.items():
|
||||
# if value or (attr in ['Volume', 'Year', 'Month', 'Day', 'PageCount'] and (value == -1 or value == "" ) ): # Check required attributes
|
||||
if value == -1 or value == "" or value == None or value == "[]" or value == []:
|
||||
if value == -1 or value == 0 or value == "" or value == None or value == "[]" or value == []:
|
||||
if attr in self._required_attributes():
|
||||
raise exit(f"{xml_filename} 缺少必要属性: {attr}")
|
||||
else:
|
||||
@ -383,12 +433,9 @@ class ComicInfoXml:
|
||||
serialized_xml = serialize_comic_info(comic)
|
||||
|
||||
# 保存数据XML到文件
|
||||
if save_dir != None: xml_filename = os.path.join(save_dir, xml_filename)
|
||||
self._save_xml_to_file(serialized_xml, xml_filename)
|
||||
self._validate_xml_with_xsd_file(xml_filename, xsd_filename) # 将 JSON 转换为 XML
|
||||
#xml_data = json_to_xml_with_declaration(json_data)
|
||||
#print(xml_data)
|
||||
return Path(xml_filename)
|
||||
if save_dir != None and xml_filename != None: xml_filename = os.path.join(save_dir, xml_filename)
|
||||
|
||||
return self._validate_and_export_xml(serialized_xml, xsd_source=xsd_filename, output_path=xml_filename, is_xsd_file=True)
|
||||
|
||||
def _required_attributes(self):
|
||||
"""
|
||||
@ -489,13 +536,25 @@ class ComicInfoXml:
|
||||
pages: 返回更新后的页面信息列表
|
||||
"""
|
||||
# 读取 ComicInfo.xml 文件
|
||||
is_update = False
|
||||
comicinfo = self._xml_file_to_comicinfo(cbz_path=cbz_path)
|
||||
if count != -1:
|
||||
comicinfo.Count = count
|
||||
if number != -1:
|
||||
comicinfo.Number = number
|
||||
if count != -1 and count != "" and count != 0:
|
||||
if comicinfo.Count == count:
|
||||
logger.debug(f"ComicInfo.xml 中的 Count 已经是 {count},无需更新")
|
||||
else:
|
||||
comicinfo.Count = count
|
||||
is_update = True
|
||||
if number != -1 and number != "" and number != 0:
|
||||
if comicinfo.Number == number:
|
||||
logger.debug(f"ComicInfo.xml 中的 Number 已经是 {number},无需更新")
|
||||
else:
|
||||
comicinfo.Number = number
|
||||
is_update = True
|
||||
# 保存更新后的 ComicInfo.xml 文件
|
||||
return self._parse_comicinfo(comicinfo, save_dir=os.path.dirname(cbz_path))
|
||||
if not is_update:
|
||||
logger.debug(f"ComicInfo.xml 中的 Count 和 Number 无需更新")
|
||||
return is_update
|
||||
return self._parse_comicinfo(comicinfo, save_dir=os.path.dirname(cbz_path), xml_filename=None)
|
||||
|
||||
def scrapy_xml_by_json(self, json_data, save_dir=None, xsd_file=XSD_FILE):
|
||||
""" 根据Json数据生成ComicInfo.xml
|
||||
|
||||
@ -3,6 +3,9 @@ from typing import List, Optional
|
||||
from opencc import OpenCC
|
||||
import re,os
|
||||
from src.common.ComicInfo import ImageInfo
|
||||
from src.common.logging import setup_logging
|
||||
|
||||
logger = setup_logging(__name__)
|
||||
|
||||
class FileNaming:
|
||||
|
||||
@ -205,6 +208,22 @@ class MangaInfo(BaseModel):
|
||||
# raise ValueError('tags must be a list')
|
||||
# return v
|
||||
|
||||
def is_chapter_ended(cls, str_chapter: str):
|
||||
""" 检查章节是否已完结 """
|
||||
ended_chapter_keys = [ "最终话" , "最终回", "完结", "大结局", "大团圆", "终章", "终结篇", "结局篇" , "后记"]
|
||||
# 如果章节名称列表长度大于1且最后一个章节名称与传入的章节名称相同
|
||||
if len(cls.chapters_name) > 1 and str_chapter == FileNaming.chinese_file_name(cls.chapters_name[-1]):
|
||||
# 检查状态是否为已完结
|
||||
# if cls.status == "已完结": return True
|
||||
# 检查章节名称是否包含已完结的关键字
|
||||
str_chapter_key = str_chapter.replace("-", " ").split(" ")
|
||||
# 如果章节名称包含已完结的关键字,则返回True
|
||||
if any(key in str_chapter_key for key in ended_chapter_keys):
|
||||
logger.info(f"比中已完结章节名: {str_chapter} ")
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class MangaItem(BaseModel):
|
||||
info: MangaInfo
|
||||
covers: List[CoverItem] = []
|
||||
|
||||
@ -489,7 +489,7 @@ class CBZUtils:
|
||||
new_zip.writestr(item, source_zip.read(item.filename))
|
||||
|
||||
# 添加新 XML
|
||||
new_zip.writestr("ComicInfo.xml", new_xml_content)
|
||||
new_zip.writestr("ComicInfo.xml", new_xml_content.encode('utf-8'))
|
||||
|
||||
os.remove(tmp.name) # 清理临时文件
|
||||
return True
|
||||
|
||||
@ -82,32 +82,29 @@ class BaseSite(ABC):
|
||||
#async def get_chapter_list(self, info: MangaInfo) -> List[Dict[str, str]]:
|
||||
# """获取漫画章节列表"""
|
||||
# pass
|
||||
|
||||
async def get_chapter_list(self, manga_info: MangaInfo) -> List[Dict[str, str]]:
|
||||
"""获取章节列表"""
|
||||
try:
|
||||
# result_type list[Chapter]
|
||||
list_chapter = manga_info.get_list_chapter()
|
||||
# 临时添加begin
|
||||
# 获取最新章节
|
||||
last_chapter = list_chapter[-1] if list_chapter else []
|
||||
# 临时添加end
|
||||
down_chapter = []
|
||||
downloaded_chapter = []
|
||||
number = 0
|
||||
for chapter in list_chapter:
|
||||
number += 1
|
||||
cbz_path = FileNaming.chapter_cbz(manga_info=manga_info,chapter=chapter)
|
||||
if os.path.exists(cbz_path):
|
||||
# 临时添加begin
|
||||
if chapter.title == last_chapter.title and manga_info.status == "已完结":
|
||||
# 判断是否是最新章节
|
||||
if manga_info.is_chapter_ended(chapter.title):
|
||||
# 如果是最新章节且漫画已完结,则不再下
|
||||
ci = ComicInfoXml()._xml_file_to_comicinfo(cbz_path=cbz_path)
|
||||
if ci.Count == "":
|
||||
# 生成ComicInfo.xml
|
||||
xml_path = ComicInfoXml().update_comicinfo_count_or_number(cbz_path=cbz_path, count=len(list_chapter))
|
||||
# 更新ComicInfo.xml至CBZ文件中
|
||||
CBZUtils().update_cbz_with_new_xml(cbz_path, xml_path.read_text(encoding="utf-8"))
|
||||
# if ci.Count != "" and int(ci.Count) != 0 and int(ci.Count) != int(number):
|
||||
# count = number
|
||||
xml_data = ComicInfoXml().update_comicinfo_count_or_number(cbz_path=cbz_path, number=number, count=number)
|
||||
# 更新ComicInfo.xml至CBZ文件中
|
||||
if xml_data:
|
||||
CBZUtils().update_cbz_with_new_xml(cbz_path, xml_data)
|
||||
# 更新完成后删除临时生成的ComicInfo.xml
|
||||
xml_path.unlink()
|
||||
logger.info(f"更新 {cbz_path} 的 ComicInfo.xml Count完成")
|
||||
logger.info(f"更新 {cbz_path} 的 ComicInfo.xml Number完成")
|
||||
# 临时添加end
|
||||
logger.debug(f"{chapter.title} 章节已存在")
|
||||
chapter.status = "downloaded"
|
||||
@ -115,13 +112,17 @@ class BaseSite(ABC):
|
||||
down_chapter.append(chapter)
|
||||
if manga_info.status == "已完结":
|
||||
from src.common.utils import KomgaAPI, MangaUtils
|
||||
if len(downloaded_chapter) == len(list_chapter):
|
||||
logger.info(f"{manga_info.title} 漫画已完结, 章节已全部下载完成")
|
||||
KomgaAPI().update_series_ended(manga_info.title)
|
||||
logger.info(f"{manga_info.title} 漫画已完结, 已更新KOMGA状态")
|
||||
MangaUtils("mangas_ended.json").add_manga(name= manga_info.title)
|
||||
is_update_komga_ended = MangaUtils("mangas_ended.json").search_manga(name= manga_info.title)
|
||||
if is_update_komga_ended != None:
|
||||
logger.info(f"{manga_info.title} 漫画已完结, 已存在于KOMGA服务器已完结列表中,无需更新!")
|
||||
else:
|
||||
logger.info(f"{manga_info.title} 漫画已完结, 但章节未全部下载完成, 可能是网络问题或其他原因")
|
||||
if len(downloaded_chapter) == len(list_chapter):
|
||||
logger.info(f"{manga_info.title} 漫画已完结, 章节已全部下载完成")
|
||||
KomgaAPI().update_series_ended(manga_info.title)
|
||||
logger.info(f"{manga_info.title} 漫画已完结, 已更新KOMGA状态")
|
||||
MangaUtils("mangas_ended.json").add_manga(name= manga_info.title)
|
||||
else:
|
||||
logger.info(f"{manga_info.title} 漫画已完结, 但章节未全部下载完成, 可能是网络问题或其他原因")
|
||||
return down_chapter
|
||||
except Exception as e:
|
||||
if isinstance(e, (ParseError, SiteError)):
|
||||
|
||||
2
test.py
2
test.py
@ -434,7 +434,7 @@ class comicInfo:
|
||||
if __name__ == "__main__":
|
||||
print("开始处理")
|
||||
# ComicInfoXml()._xml_file_to_comicinfo("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ")
|
||||
xml_path = ComicInfoXml().update_comicinfo_count(37,"/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ")
|
||||
xml_path = ComicInfoXml().update_comicinfo_count_or_number(count=37,cbz_path="/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ")
|
||||
comicInfo().update_cbz_with_new_xml("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ", xml_path.read_text(encoding="utf-8"))
|
||||
#items = ci().__dict__.keys()
|
||||
#print(items)
|
||||
Loading…
Reference in New Issue
Block a user