add update CBZ ComicInfo.xml
This commit is contained in:
parent
0d5e26662f
commit
97449dca0b
@ -205,6 +205,7 @@ class ImageInfo:
|
||||
|
||||
# Define the ComicInfo and ComicPageInfo classes
|
||||
class ComicInfo:
|
||||
# ComicInfo.xml 中的选项
|
||||
def __init__(self):
|
||||
self.Title: str = ""
|
||||
"""标题"""
|
||||
@ -247,6 +248,7 @@ class ComicInfo:
|
||||
self.Pages: List[ComicPageInfo] = []
|
||||
|
||||
class ComicPageInfo:
|
||||
# ComicInfo.xml 中的<Page>
|
||||
def __init__(self):
|
||||
self.Image: int = -1
|
||||
self.Type: str = "Story"
|
||||
@ -269,6 +271,9 @@ class ComicPageInfo:
|
||||
return data
|
||||
|
||||
class ComicInfoXml:
|
||||
"""
|
||||
生成ComicInfo.xml
|
||||
"""
|
||||
def _save_xml_to_file(self, xml_string, filename):
|
||||
"""
|
||||
Save the XML string to a file
|
||||
@ -365,9 +370,14 @@ class ComicInfoXml:
|
||||
#print(xml_data)
|
||||
|
||||
def _required_attributes(self):
|
||||
"""
|
||||
必需值,如果为空刚报错
|
||||
"""
|
||||
return ["Title", "Series", "Number", "PageCount", "Writer"]
|
||||
|
||||
def _gen_pageinfo(self, image_names, save_dir):
|
||||
""" 获取PageInfo数据
|
||||
"""
|
||||
pages = []
|
||||
# Adding pages to the comic
|
||||
for image_name in image_names:
|
||||
@ -377,8 +387,10 @@ class ComicInfoXml:
|
||||
# 图像属性 文件名 大小 长
|
||||
pages.append(page)
|
||||
return pages
|
||||
|
||||
|
||||
def scrapy_xml_by_json(self, json_data, save_dir=None, xsd_file=XSD_FILE):
|
||||
""" 根据Json数据生成ComicInfo.xml
|
||||
"""
|
||||
comic = ComicInfo()
|
||||
comic.Title = json_data.get("chapter", "")
|
||||
comic.Series = json_data.get("name", "")
|
||||
|
||||
203
test.py
203
test.py
@ -2,11 +2,20 @@ from src.common.naming import FileNaming
|
||||
from src.common.ComicInfo import ImageInfo
|
||||
from zipfile import ZipFile
|
||||
from datetime import datetime
|
||||
import os
|
||||
import os,hashlib
|
||||
import xml.etree.ElementTree as ET
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
from tempfile import NamedTemporaryFile
|
||||
from xml.dom import minidom
|
||||
|
||||
|
||||
class test:
|
||||
|
||||
def clean_cbz(self):
|
||||
def clean_min_cbz(self):
|
||||
"""
|
||||
清理3KB以下CBZ文件
|
||||
"""
|
||||
dir_path = "/mnt/Comics/CBZ/rm_comic"
|
||||
for dir in os.listdir(dir_path):
|
||||
c_dir = os.path.join(dir_path, dir)
|
||||
@ -49,6 +58,194 @@ class test:
|
||||
files = list(FileNaming.get_filenames_optimized(c_dir, ext_filter=['.CBZ']))
|
||||
for file in files:
|
||||
self._clean_old_cbz(file)
|
||||
|
||||
class comicInfo:
|
||||
|
||||
def find_actual_path(self, zip_ref, target_path):
|
||||
"""不区分大小写查找压缩包内的实际文件路径"""
|
||||
target_lower = target_path.lower()
|
||||
for name in zip_ref.namelist():
|
||||
if name.lower() == target_lower:
|
||||
return name
|
||||
return None
|
||||
|
||||
def process_cbz(self, cbz_path):
|
||||
try:
|
||||
with ZipFile(cbz_path, 'r') as cbz:
|
||||
# ============================================
|
||||
# 第一部分:读取 ComicInfo.xml 的元数据字段
|
||||
# ============================================
|
||||
xml_files = [f for f in cbz.namelist() if f.lower() == 'comicinfo.xml']
|
||||
if not xml_files:
|
||||
print("未找到 ComicInfo.xml")
|
||||
return None
|
||||
xml_file_name = xml_files[0]
|
||||
|
||||
# 解析 XML 元数据
|
||||
metadata = {}
|
||||
with cbz.open(xml_file_name) as xml_file:
|
||||
xml_content = xml_file.read().decode('utf-8')
|
||||
root = ET.fromstring(xml_content)
|
||||
|
||||
# 定义需要提取的元数据字段(用户自定义的字段列表)
|
||||
metadata_fields = [
|
||||
"Title", "Series", "Number", "Summary", "Writer",
|
||||
"Genre", "PageCount", "AgeRating"
|
||||
]
|
||||
|
||||
for field in metadata_fields:
|
||||
element = root.find(field)
|
||||
metadata[field] = element.text if element is not None else None
|
||||
|
||||
# ============================================
|
||||
# 第二部分:读取 Page 标签的图片信息
|
||||
# ============================================
|
||||
pages_info = []
|
||||
with cbz.open(xml_file_name) as xml_file:
|
||||
xml_content = xml_file.read().decode('utf-8')
|
||||
root = ET.fromstring(xml_content)
|
||||
|
||||
# 提取所有 Page 标签
|
||||
pages = root.find('Pages')
|
||||
if pages is None:
|
||||
print("XML 中缺少 Pages 标签")
|
||||
return {"metadata": metadata, "pages": None}
|
||||
|
||||
page_list = pages.findall('Page')
|
||||
if not page_list:
|
||||
print("Pages 标签下无 Page 元素")
|
||||
return {"metadata": metadata, "pages": None}
|
||||
|
||||
# 收集图片路径
|
||||
image_paths = [page.get('Image') for page in page_list if page.get('Image')]
|
||||
|
||||
# 处理每个图片文件
|
||||
for img_path in image_paths:
|
||||
actual_path = self.find_actual_path(cbz, img_path+".jpg")
|
||||
if not actual_path:
|
||||
print(f"警告:图片 '{img_path}' 不存在于压缩包中")
|
||||
continue
|
||||
|
||||
with cbz.open(actual_path) as img_file:
|
||||
content = img_file.read()
|
||||
|
||||
# 计算 MD5 和文件大小
|
||||
file_md5 = hashlib.md5(content).hexdigest()
|
||||
file_size = len(content)
|
||||
|
||||
# 读取图片尺寸
|
||||
img_width, img_height = None, None
|
||||
try:
|
||||
with Image.open(BytesIO(content)) as img:
|
||||
img_width, img_height = img.size
|
||||
except Exception as e:
|
||||
print(f"无法读取图片尺寸:{actual_path},错误:{e}")
|
||||
|
||||
# 存储图片信息
|
||||
pages_info.append({
|
||||
"name": os.path.basename(actual_path).split(".")[0],
|
||||
"size": file_size,
|
||||
"key": file_md5,
|
||||
"width": img_width,
|
||||
"height": img_height
|
||||
})
|
||||
|
||||
return {
|
||||
"metadata": metadata,
|
||||
"pages": pages_info
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"处理 CBZ 文件时出错: {e}")
|
||||
return None
|
||||
|
||||
def generate_comic_info_xml(self, metadata, pages_info):
|
||||
"""根据元数据和页面信息生成 ComicInfo.xml 内容"""
|
||||
# 创建根节点
|
||||
root = ET.Element("ComicInfo", xmlns="http://comicrack.cyolito.com/comicinfo")
|
||||
|
||||
# 添加元数据字段
|
||||
for field, value in metadata.items():
|
||||
if value is not None:
|
||||
elem = ET.SubElement(root, field)
|
||||
elem.text = str(value)
|
||||
|
||||
# 添加 Pages 节点
|
||||
if pages_info:
|
||||
pages_elem = ET.SubElement(root, "Pages")
|
||||
for page in pages_info:
|
||||
# 示例中保留关键属性,可根据需要扩展其他属性
|
||||
page_elem = ET.SubElement(pages_elem, "Page", attrib={
|
||||
"Image": page.get("name", ""),
|
||||
"ImageSize": str(page.get("size", 0)),
|
||||
"Key": str(page.get("key", 0)),
|
||||
"ImageWidth": str(page.get("width", 0)),
|
||||
"ImageHeight": str(page.get("height", 0))
|
||||
})
|
||||
|
||||
# 生成 XML 字符串
|
||||
#tree = ET.ElementTree(root)
|
||||
#xml_content = BytesIO()
|
||||
#tree.write(xml_content, encoding="utf-8", xml_declaration=True)
|
||||
|
||||
# Create a formatted XML string
|
||||
xml_str = ET.tostring(root, encoding='utf-8', method='xml')
|
||||
parsed_xml = minidom.parseString(xml_str)
|
||||
formatted_xml = parsed_xml.toprettyxml(indent=" ", encoding="utf-8") # Adjust the number of spaces for indentation as needed
|
||||
|
||||
# Convert bytes to string and add XML declaration
|
||||
return formatted_xml.decode('utf-8')
|
||||
|
||||
#return xml_content.getvalue()
|
||||
|
||||
def update_cbz_with_new_xml(self, cbz_path, new_xml_content, output_path=None):
|
||||
"""将新生成的 ComicInfo.xml 更新到 CBZ 文件中"""
|
||||
try:
|
||||
# 默认输出路径为原文件路径(覆盖原文件)
|
||||
if output_path is None:
|
||||
output_path = cbz_path
|
||||
|
||||
# 创建临时文件处理覆盖操作
|
||||
with NamedTemporaryFile(delete=False) as tmp:
|
||||
tmp.close()
|
||||
os.replace(cbz_path, tmp.name) # 备份原文件
|
||||
|
||||
# 读取原文件并替换 ComicInfo.xml
|
||||
with ZipFile(tmp.name, 'r') as source_zip:
|
||||
with ZipFile(output_path, 'w') as new_zip:
|
||||
# 复制原文件(跳过旧 XML)
|
||||
for item in source_zip.infolist():
|
||||
if item.filename.lower() != 'comicinfo.xml':
|
||||
new_zip.writestr(item, source_zip.read(item.filename))
|
||||
|
||||
# 添加新 XML
|
||||
new_zip.writestr("ComicInfo.xml", new_xml_content)
|
||||
|
||||
os.remove(tmp.name) # 清理临时文件
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"更新 CBZ 文件失败: {e}")
|
||||
if os.path.exists(tmp.name):
|
||||
os.replace(tmp.name, cbz_path) # 恢复备份
|
||||
return False
|
||||
|
||||
def update_comicinfo_cbz(self, cbz_path):
|
||||
"""更新CBZ中的ComicInfo.xml
|
||||
|
||||
Args:
|
||||
cbz_path (_type_): _description_
|
||||
"""
|
||||
data = self.process_cbz(cbz_path)
|
||||
# 生成 XML 内容
|
||||
new_xml = self.generate_comic_info_xml(data["metadata"], data["pages"])
|
||||
# 测试:保存 XML 到本地查看
|
||||
with open("NewComicInfo.xml", "w", encoding="utf-8") as f:
|
||||
f.write(new_xml)
|
||||
print("已生成 NewComicInfo.xml")
|
||||
# 更新 CBZ 文件(示例路径,实际操作前请备份)
|
||||
success = comicInfo().update_cbz_with_new_xml("example.cbz", new_xml, "example_updated.cbz")
|
||||
# if success:
|
||||
# print("CBZ 文件更新成功")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test().clean_old_cbz()
|
||||
# 清除3KB以下CBZ文件
|
||||
test().clean_min_cbz()
|
||||
Loading…
Reference in New Issue
Block a user