update
This commit is contained in:
parent
c020596284
commit
58294db981
@ -254,6 +254,7 @@ class ComicInfo:
|
|||||||
for key, value in kwargs.items():
|
for key, value in kwargs.items():
|
||||||
if hasattr(self, key):
|
if hasattr(self, key):
|
||||||
setattr(self, key, value)
|
setattr(self, key, value)
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ComicPageInfo:
|
class ComicPageInfo:
|
||||||
# ComicInfo.xml 中的<Page>
|
# ComicInfo.xml 中的<Page>
|
||||||
@ -294,31 +295,80 @@ class ComicInfoXml:
|
|||||||
"""
|
"""
|
||||||
生成ComicInfo.xml
|
生成ComicInfo.xml
|
||||||
"""
|
"""
|
||||||
def _save_xml_to_file(self, xml_string, filename):
|
|
||||||
"""
|
|
||||||
Save the XML string to a file
|
|
||||||
"""
|
|
||||||
base_dir = os.path.dirname(filename)
|
|
||||||
if not os.path.exists(base_dir): os.makedirs(base_dir)
|
|
||||||
with open(filename, "w", encoding="utf-8") as file:
|
|
||||||
file.write(xml_string)
|
|
||||||
logger.debug(f"ComicInfo.xml 生成成功 {filename}")
|
|
||||||
|
|
||||||
def _validate_xml_with_xsd_file(self, xml_file, xsd_file, remove=True):
|
def _validate_and_export_xml(self, xml_str, xsd_source, output_path=None, is_xsd_file=True):
|
||||||
"""
|
"""
|
||||||
Validate the XML file against the XSD file
|
验证XML字符串并输出为文件
|
||||||
|
|
||||||
|
参数:
|
||||||
|
xml_str : XML内容字符串
|
||||||
|
xsd_source : XSD文件路径 或 XSD字符串
|
||||||
|
output_path : 验证成功后输出的XML文件路径(可选)
|
||||||
|
is_xsd_file : True表示xsd_source是文件路径,False表示是字符串
|
||||||
|
|
||||||
|
返回:
|
||||||
|
无输出路径:验证成功返回输出XML文件,失败返回False
|
||||||
|
无输出路径:验证成功返回True并输出XML文件,失败返回False
|
||||||
"""
|
"""
|
||||||
xml_doc = etree.parse(xml_file)
|
|
||||||
with open(xsd_file, 'r', encoding="utf-8") as file:
|
|
||||||
xsd_doc = etree.XMLSchema(etree.parse(file))
|
|
||||||
try:
|
try:
|
||||||
xsd_doc.assertValid(xml_doc)
|
# 加载XSD模式
|
||||||
logger.debug(f"ComicInfo.xml 通过 XSD 验证成功 {xml_file}")
|
if is_xsd_file:
|
||||||
|
xsd_doc = etree.parse(xsd_source)
|
||||||
|
else:
|
||||||
|
xsd_doc = etree.fromstring(xsd_source.encode('utf-8'))
|
||||||
|
|
||||||
|
# 创建XML模式验证器
|
||||||
|
schema_validator = etree.XMLSchema(xsd_doc)
|
||||||
|
|
||||||
|
# 创建XML解析器(启用XSD验证)
|
||||||
|
parser = etree.XMLParser(schema=schema_validator)
|
||||||
|
|
||||||
|
# 解析并验证XML字符串
|
||||||
|
xml_root = etree.fromstring(xml_str.encode('utf-8'), parser)
|
||||||
|
|
||||||
|
logger.debug("✅ XML 验证成功!")
|
||||||
|
|
||||||
|
# 如果需要输出文件
|
||||||
|
if output_path:
|
||||||
|
# 确保目录存在
|
||||||
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||||
|
|
||||||
|
# 创建ElementTree对象
|
||||||
|
xml_tree = etree.ElementTree(xml_root)
|
||||||
|
|
||||||
|
# 美化输出格式(缩进和换行)
|
||||||
|
etree.indent(xml_tree, space=" ")
|
||||||
|
|
||||||
|
# 写入文件(包含XML声明)
|
||||||
|
xml_tree.write(
|
||||||
|
output_path,
|
||||||
|
encoding='utf-8',
|
||||||
|
xml_declaration=True,
|
||||||
|
pretty_print=True
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(f"💾 XML文件已保存至: {os.path.abspath(output_path)}")
|
||||||
|
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
logger.debug("✅ XML 验证成功,但未指定输出路径。返回原始XML字符串。")
|
||||||
|
return xml_str
|
||||||
|
|
||||||
|
except etree.XMLSyntaxError as e:
|
||||||
|
logger.error("❌ XML 语法错误:")
|
||||||
|
logger.error(f" 错误详情: {e.msg}")
|
||||||
|
return False
|
||||||
|
|
||||||
except etree.DocumentInvalid as e:
|
except etree.DocumentInvalid as e:
|
||||||
logger.error(f"ComicInfo.xml 通过 XSD 验证失败 {xml_file}")
|
logger.error("❌ XML 结构/内容验证失败:")
|
||||||
if remove:
|
for error in e.error_log:
|
||||||
os.remove(xml_file)
|
logger.error(f" 行 {error.line}, 列 {error.column}: {error.message.strip()}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ 发生未知错误: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
def get_page_count(self, zip_file: Path):
|
def get_page_count(self, zip_file: Path):
|
||||||
"""获取 ComicInfo.xml 文件中的 <PageCount> 标签值"""
|
"""获取 ComicInfo.xml 文件中的 <PageCount> 标签值"""
|
||||||
# 打开ZIP文件
|
# 打开ZIP文件
|
||||||
@ -352,7 +402,7 @@ class ComicInfoXml:
|
|||||||
# Add subelements and attributes based on presence and requirements
|
# Add subelements and attributes based on presence and requirements
|
||||||
for attr, value in comic.__dict__.items():
|
for attr, value in comic.__dict__.items():
|
||||||
# if value or (attr in ['Volume', 'Year', 'Month', 'Day', 'PageCount'] and (value == -1 or value == "" ) ): # Check required attributes
|
# if value or (attr in ['Volume', 'Year', 'Month', 'Day', 'PageCount'] and (value == -1 or value == "" ) ): # Check required attributes
|
||||||
if value == -1 or value == "" or value == None or value == "[]" or value == []:
|
if value == -1 or value == 0 or value == "" or value == None or value == "[]" or value == []:
|
||||||
if attr in self._required_attributes():
|
if attr in self._required_attributes():
|
||||||
raise exit(f"{xml_filename} 缺少必要属性: {attr}")
|
raise exit(f"{xml_filename} 缺少必要属性: {attr}")
|
||||||
else:
|
else:
|
||||||
@ -383,12 +433,9 @@ class ComicInfoXml:
|
|||||||
serialized_xml = serialize_comic_info(comic)
|
serialized_xml = serialize_comic_info(comic)
|
||||||
|
|
||||||
# 保存数据XML到文件
|
# 保存数据XML到文件
|
||||||
if save_dir != None: xml_filename = os.path.join(save_dir, xml_filename)
|
if save_dir != None and xml_filename != None: xml_filename = os.path.join(save_dir, xml_filename)
|
||||||
self._save_xml_to_file(serialized_xml, xml_filename)
|
|
||||||
self._validate_xml_with_xsd_file(xml_filename, xsd_filename) # 将 JSON 转换为 XML
|
return self._validate_and_export_xml(serialized_xml, xsd_source=xsd_filename, output_path=xml_filename, is_xsd_file=True)
|
||||||
#xml_data = json_to_xml_with_declaration(json_data)
|
|
||||||
#print(xml_data)
|
|
||||||
return Path(xml_filename)
|
|
||||||
|
|
||||||
def _required_attributes(self):
|
def _required_attributes(self):
|
||||||
"""
|
"""
|
||||||
@ -489,13 +536,25 @@ class ComicInfoXml:
|
|||||||
pages: 返回更新后的页面信息列表
|
pages: 返回更新后的页面信息列表
|
||||||
"""
|
"""
|
||||||
# 读取 ComicInfo.xml 文件
|
# 读取 ComicInfo.xml 文件
|
||||||
|
is_update = False
|
||||||
comicinfo = self._xml_file_to_comicinfo(cbz_path=cbz_path)
|
comicinfo = self._xml_file_to_comicinfo(cbz_path=cbz_path)
|
||||||
if count != -1:
|
if count != -1 and count != "" and count != 0:
|
||||||
comicinfo.Count = count
|
if comicinfo.Count == count:
|
||||||
if number != -1:
|
logger.debug(f"ComicInfo.xml 中的 Count 已经是 {count},无需更新")
|
||||||
comicinfo.Number = number
|
else:
|
||||||
|
comicinfo.Count = count
|
||||||
|
is_update = True
|
||||||
|
if number != -1 and number != "" and number != 0:
|
||||||
|
if comicinfo.Number == number:
|
||||||
|
logger.debug(f"ComicInfo.xml 中的 Number 已经是 {number},无需更新")
|
||||||
|
else:
|
||||||
|
comicinfo.Number = number
|
||||||
|
is_update = True
|
||||||
# 保存更新后的 ComicInfo.xml 文件
|
# 保存更新后的 ComicInfo.xml 文件
|
||||||
return self._parse_comicinfo(comicinfo, save_dir=os.path.dirname(cbz_path))
|
if not is_update:
|
||||||
|
logger.debug(f"ComicInfo.xml 中的 Count 和 Number 无需更新")
|
||||||
|
return is_update
|
||||||
|
return self._parse_comicinfo(comicinfo, save_dir=os.path.dirname(cbz_path), xml_filename=None)
|
||||||
|
|
||||||
def scrapy_xml_by_json(self, json_data, save_dir=None, xsd_file=XSD_FILE):
|
def scrapy_xml_by_json(self, json_data, save_dir=None, xsd_file=XSD_FILE):
|
||||||
""" 根据Json数据生成ComicInfo.xml
|
""" 根据Json数据生成ComicInfo.xml
|
||||||
|
|||||||
@ -3,6 +3,9 @@ from typing import List, Optional
|
|||||||
from opencc import OpenCC
|
from opencc import OpenCC
|
||||||
import re,os
|
import re,os
|
||||||
from src.common.ComicInfo import ImageInfo
|
from src.common.ComicInfo import ImageInfo
|
||||||
|
from src.common.logging import setup_logging
|
||||||
|
|
||||||
|
logger = setup_logging(__name__)
|
||||||
|
|
||||||
class FileNaming:
|
class FileNaming:
|
||||||
|
|
||||||
@ -204,7 +207,23 @@ class MangaInfo(BaseModel):
|
|||||||
# if not isinstance(v, list):
|
# if not isinstance(v, list):
|
||||||
# raise ValueError('tags must be a list')
|
# raise ValueError('tags must be a list')
|
||||||
# return v
|
# return v
|
||||||
|
|
||||||
|
def is_chapter_ended(cls, str_chapter: str):
|
||||||
|
""" 检查章节是否已完结 """
|
||||||
|
ended_chapter_keys = [ "最终话" , "最终回", "完结", "大结局", "大团圆", "终章", "终结篇", "结局篇" , "后记"]
|
||||||
|
# 如果章节名称列表长度大于1且最后一个章节名称与传入的章节名称相同
|
||||||
|
if len(cls.chapters_name) > 1 and str_chapter == FileNaming.chinese_file_name(cls.chapters_name[-1]):
|
||||||
|
# 检查状态是否为已完结
|
||||||
|
# if cls.status == "已完结": return True
|
||||||
|
# 检查章节名称是否包含已完结的关键字
|
||||||
|
str_chapter_key = str_chapter.replace("-", " ").split(" ")
|
||||||
|
# 如果章节名称包含已完结的关键字,则返回True
|
||||||
|
if any(key in str_chapter_key for key in ended_chapter_keys):
|
||||||
|
logger.info(f"比中已完结章节名: {str_chapter} ")
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class MangaItem(BaseModel):
|
class MangaItem(BaseModel):
|
||||||
info: MangaInfo
|
info: MangaInfo
|
||||||
covers: List[CoverItem] = []
|
covers: List[CoverItem] = []
|
||||||
|
|||||||
@ -489,7 +489,7 @@ class CBZUtils:
|
|||||||
new_zip.writestr(item, source_zip.read(item.filename))
|
new_zip.writestr(item, source_zip.read(item.filename))
|
||||||
|
|
||||||
# 添加新 XML
|
# 添加新 XML
|
||||||
new_zip.writestr("ComicInfo.xml", new_xml_content)
|
new_zip.writestr("ComicInfo.xml", new_xml_content.encode('utf-8'))
|
||||||
|
|
||||||
os.remove(tmp.name) # 清理临时文件
|
os.remove(tmp.name) # 清理临时文件
|
||||||
return True
|
return True
|
||||||
|
|||||||
@ -82,32 +82,29 @@ class BaseSite(ABC):
|
|||||||
#async def get_chapter_list(self, info: MangaInfo) -> List[Dict[str, str]]:
|
#async def get_chapter_list(self, info: MangaInfo) -> List[Dict[str, str]]:
|
||||||
# """获取漫画章节列表"""
|
# """获取漫画章节列表"""
|
||||||
# pass
|
# pass
|
||||||
|
|
||||||
async def get_chapter_list(self, manga_info: MangaInfo) -> List[Dict[str, str]]:
|
async def get_chapter_list(self, manga_info: MangaInfo) -> List[Dict[str, str]]:
|
||||||
"""获取章节列表"""
|
"""获取章节列表"""
|
||||||
try:
|
try:
|
||||||
# result_type list[Chapter]
|
|
||||||
list_chapter = manga_info.get_list_chapter()
|
list_chapter = manga_info.get_list_chapter()
|
||||||
# 临时添加begin
|
|
||||||
# 获取最新章节
|
|
||||||
last_chapter = list_chapter[-1] if list_chapter else []
|
|
||||||
# 临时添加end
|
|
||||||
down_chapter = []
|
down_chapter = []
|
||||||
downloaded_chapter = []
|
downloaded_chapter = []
|
||||||
|
number = 0
|
||||||
for chapter in list_chapter:
|
for chapter in list_chapter:
|
||||||
|
number += 1
|
||||||
cbz_path = FileNaming.chapter_cbz(manga_info=manga_info,chapter=chapter)
|
cbz_path = FileNaming.chapter_cbz(manga_info=manga_info,chapter=chapter)
|
||||||
if os.path.exists(cbz_path):
|
if os.path.exists(cbz_path):
|
||||||
# 临时添加begin
|
# 判断是否是最新章节
|
||||||
if chapter.title == last_chapter.title and manga_info.status == "已完结":
|
if manga_info.is_chapter_ended(chapter.title):
|
||||||
# 如果是最新章节且漫画已完结,则不再下
|
# 如果是最新章节且漫画已完结,则不再下
|
||||||
ci = ComicInfoXml()._xml_file_to_comicinfo(cbz_path=cbz_path)
|
# if ci.Count != "" and int(ci.Count) != 0 and int(ci.Count) != int(number):
|
||||||
if ci.Count == "":
|
# count = number
|
||||||
# 生成ComicInfo.xml
|
xml_data = ComicInfoXml().update_comicinfo_count_or_number(cbz_path=cbz_path, number=number, count=number)
|
||||||
xml_path = ComicInfoXml().update_comicinfo_count_or_number(cbz_path=cbz_path, count=len(list_chapter))
|
# 更新ComicInfo.xml至CBZ文件中
|
||||||
# 更新ComicInfo.xml至CBZ文件中
|
if xml_data:
|
||||||
CBZUtils().update_cbz_with_new_xml(cbz_path, xml_path.read_text(encoding="utf-8"))
|
CBZUtils().update_cbz_with_new_xml(cbz_path, xml_data)
|
||||||
# 更新完成后删除临时生成的ComicInfo.xml
|
# 更新完成后删除临时生成的ComicInfo.xml
|
||||||
xml_path.unlink()
|
logger.info(f"更新 {cbz_path} 的 ComicInfo.xml Number完成")
|
||||||
logger.info(f"更新 {cbz_path} 的 ComicInfo.xml Count完成")
|
|
||||||
# 临时添加end
|
# 临时添加end
|
||||||
logger.debug(f"{chapter.title} 章节已存在")
|
logger.debug(f"{chapter.title} 章节已存在")
|
||||||
chapter.status = "downloaded"
|
chapter.status = "downloaded"
|
||||||
@ -115,13 +112,17 @@ class BaseSite(ABC):
|
|||||||
down_chapter.append(chapter)
|
down_chapter.append(chapter)
|
||||||
if manga_info.status == "已完结":
|
if manga_info.status == "已完结":
|
||||||
from src.common.utils import KomgaAPI, MangaUtils
|
from src.common.utils import KomgaAPI, MangaUtils
|
||||||
if len(downloaded_chapter) == len(list_chapter):
|
is_update_komga_ended = MangaUtils("mangas_ended.json").search_manga(name= manga_info.title)
|
||||||
logger.info(f"{manga_info.title} 漫画已完结, 章节已全部下载完成")
|
if is_update_komga_ended != None:
|
||||||
KomgaAPI().update_series_ended(manga_info.title)
|
logger.info(f"{manga_info.title} 漫画已完结, 已存在于KOMGA服务器已完结列表中,无需更新!")
|
||||||
logger.info(f"{manga_info.title} 漫画已完结, 已更新KOMGA状态")
|
|
||||||
MangaUtils("mangas_ended.json").add_manga(name= manga_info.title)
|
|
||||||
else:
|
else:
|
||||||
logger.info(f"{manga_info.title} 漫画已完结, 但章节未全部下载完成, 可能是网络问题或其他原因")
|
if len(downloaded_chapter) == len(list_chapter):
|
||||||
|
logger.info(f"{manga_info.title} 漫画已完结, 章节已全部下载完成")
|
||||||
|
KomgaAPI().update_series_ended(manga_info.title)
|
||||||
|
logger.info(f"{manga_info.title} 漫画已完结, 已更新KOMGA状态")
|
||||||
|
MangaUtils("mangas_ended.json").add_manga(name= manga_info.title)
|
||||||
|
else:
|
||||||
|
logger.info(f"{manga_info.title} 漫画已完结, 但章节未全部下载完成, 可能是网络问题或其他原因")
|
||||||
return down_chapter
|
return down_chapter
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if isinstance(e, (ParseError, SiteError)):
|
if isinstance(e, (ParseError, SiteError)):
|
||||||
|
|||||||
2
test.py
2
test.py
@ -434,7 +434,7 @@ class comicInfo:
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("开始处理")
|
print("开始处理")
|
||||||
# ComicInfoXml()._xml_file_to_comicinfo("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ")
|
# ComicInfoXml()._xml_file_to_comicinfo("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ")
|
||||||
xml_path = ComicInfoXml().update_comicinfo_count(37,"/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ")
|
xml_path = ComicInfoXml().update_comicinfo_count_or_number(count=37,cbz_path="/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ")
|
||||||
comicInfo().update_cbz_with_new_xml("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ", xml_path.read_text(encoding="utf-8"))
|
comicInfo().update_cbz_with_new_xml("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ", xml_path.read_text(encoding="utf-8"))
|
||||||
#items = ci().__dict__.keys()
|
#items = ci().__dict__.keys()
|
||||||
#print(items)
|
#print(items)
|
||||||
Loading…
Reference in New Issue
Block a user