update
This commit is contained in:
parent
4874600a07
commit
3a45a11a65
@ -1,20 +1,16 @@
|
|||||||
|
import os, re, requests, hashlib
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from xml.dom import minidom
|
from xml.dom import minidom
|
||||||
from typing import List
|
from typing import List
|
||||||
import os
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from src.config import XSD_FILE
|
from src.config import XSD_FILE
|
||||||
from src.common.logging import setup_logging
|
from src.common.logging import setup_logging
|
||||||
import logging
|
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import re
|
|
||||||
import requests
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
import hashlib
|
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
logger = setup_logging(__name__)
|
logger = setup_logging(__name__)
|
||||||
|
|
||||||
@ -207,8 +203,12 @@ class ImageInfo:
|
|||||||
class ComicInfo:
|
class ComicInfo:
|
||||||
# ComicInfo.xml 中的选项
|
# ComicInfo.xml 中的选项
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
self._init_default_values()
|
||||||
|
|
||||||
|
def _init_default_values(self):
|
||||||
|
"""初始化默认值"""
|
||||||
self.Title: str = ""
|
self.Title: str = ""
|
||||||
"""标题"""
|
"""<h2>标题</h2>"""
|
||||||
self.Series: str = ""
|
self.Series: str = ""
|
||||||
self.Number: str = ""
|
self.Number: str = ""
|
||||||
self.Count: int = -1
|
self.Count: int = -1
|
||||||
@ -247,9 +247,18 @@ class ComicInfo:
|
|||||||
self.AgeRating: str = ""
|
self.AgeRating: str = ""
|
||||||
self.Pages: List[ComicPageInfo] = []
|
self.Pages: List[ComicPageInfo] = []
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
values = kwargs.values()
|
||||||
|
self._init_default_values()
|
||||||
|
if len(values) > 0:
|
||||||
|
for key, value in kwargs.items():
|
||||||
|
if hasattr(self, key):
|
||||||
|
setattr(self, key, value)
|
||||||
|
@dataclass
|
||||||
class ComicPageInfo:
|
class ComicPageInfo:
|
||||||
# ComicInfo.xml 中的<Page>
|
# ComicInfo.xml 中的<Page>
|
||||||
def __init__(self):
|
|
||||||
|
def _init_default_values(self):
|
||||||
self.Image: int = -1
|
self.Image: int = -1
|
||||||
self.Type: str = "Story"
|
self.Type: str = "Story"
|
||||||
self.DoublePage: bool = False
|
self.DoublePage: bool = False
|
||||||
@ -259,6 +268,17 @@ class ComicPageInfo:
|
|||||||
self.ImageWidth: int = -1
|
self.ImageWidth: int = -1
|
||||||
self.ImageHeight: int = -1
|
self.ImageHeight: int = -1
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._init_default_values()
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
values = kwargs.values()
|
||||||
|
self._init_default_values()
|
||||||
|
if len(values) > 0:
|
||||||
|
for key, value in kwargs.items():
|
||||||
|
if hasattr(self, key):
|
||||||
|
setattr(self, key, value)
|
||||||
|
|
||||||
def toString(self):
|
def toString(self):
|
||||||
data = {}
|
data = {}
|
||||||
def add(key, value):
|
def add(key, value):
|
||||||
@ -314,7 +334,7 @@ class ComicInfoXml:
|
|||||||
logger.debug(f"zip_file={zip_file} PageCount: {page_count}")
|
logger.debug(f"zip_file={zip_file} PageCount: {page_count}")
|
||||||
return page_count
|
return page_count
|
||||||
|
|
||||||
def _parse_comicinfo(self, comic: ComicInfo, save_dir=None, xml_filename="ComicInfo.xml", xsd_filename="ComicInfo.xsd"):
|
def _parse_comicinfo(self, comic: ComicInfo, save_dir=None, xml_filename="ComicInfo.xml", xsd_filename=XSD_FILE) -> Path:
|
||||||
"""_summary_
|
"""_summary_
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -368,12 +388,13 @@ class ComicInfoXml:
|
|||||||
self._validate_xml_with_xsd_file(xml_filename, xsd_filename) # 将 JSON 转换为 XML
|
self._validate_xml_with_xsd_file(xml_filename, xsd_filename) # 将 JSON 转换为 XML
|
||||||
#xml_data = json_to_xml_with_declaration(json_data)
|
#xml_data = json_to_xml_with_declaration(json_data)
|
||||||
#print(xml_data)
|
#print(xml_data)
|
||||||
|
return Path(xml_filename)
|
||||||
|
|
||||||
def _required_attributes(self):
|
def _required_attributes(self):
|
||||||
"""
|
"""
|
||||||
必需值,如果为空刚报错
|
必需值,如果为空刚报错
|
||||||
"""
|
"""
|
||||||
return ["Title", "Series", "Number", "PageCount", "Writer"]
|
return ["Title", "Series", "Number", "Writer", "PageCount", "Pages" ]
|
||||||
|
|
||||||
def _gen_pageinfo(self, image_names, save_dir):
|
def _gen_pageinfo(self, image_names, save_dir):
|
||||||
""" 获取PageInfo数据
|
""" 获取PageInfo数据
|
||||||
@ -388,6 +409,91 @@ class ComicInfoXml:
|
|||||||
pages.append(page)
|
pages.append(page)
|
||||||
return pages
|
return pages
|
||||||
|
|
||||||
|
def _xml_file_to_comicinfo(self, cbz_path=None, xml_file=None) -> ComicInfo:
|
||||||
|
""" 读取 CBZ 文件或 XML 文件中的 ComicInfo.xml 元数据,返回 ComicInfo 对象
|
||||||
|
Args:
|
||||||
|
以下参数任意一个都可以
|
||||||
|
cbz_path (_type_, optional): 任选择CBZ文件路径或XML文件路径. Defaults to None.
|
||||||
|
xml_file (_type_, optional): XML文件路径. Defaults to None
|
||||||
|
Returns:
|
||||||
|
ci: returns a ComicInfo object with the updated page count
|
||||||
|
"""
|
||||||
|
|
||||||
|
def xml_parse(xml_str) -> ComicInfo:
|
||||||
|
""" 解析 XML 字符串并提取指定字段的值
|
||||||
|
|
||||||
|
Args:
|
||||||
|
xml_str (_type_): xml文件内容字符串
|
||||||
|
keys (_type_): 需要提取的字段列表
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ci: returns a ComicInfo object with the extracted values
|
||||||
|
"""
|
||||||
|
|
||||||
|
import xmltodict
|
||||||
|
xml_dict = xmltodict.parse(xml_str)
|
||||||
|
# 解析 XML 元数据
|
||||||
|
metadata = {}
|
||||||
|
# 获取comicinfo.xml 字段
|
||||||
|
keys = ComicInfo().__dict__.keys()
|
||||||
|
for key in keys:
|
||||||
|
key_element = xml_dict.get("ComicInfo", {}).get(key, "")
|
||||||
|
if key == "Pages":
|
||||||
|
pages = []
|
||||||
|
page_list_element = xml_dict.get("ComicInfo", {}).get("Pages", []).get("Page", "")
|
||||||
|
for page_element in page_list_element:
|
||||||
|
pages.append(ComicPageInfo(**{ "Image": page_element['@Image'],
|
||||||
|
"ImageSize": int(page_element['@ImageSize']),
|
||||||
|
"Key": page_element['@Key'],
|
||||||
|
"ImageWidth": int(page_element['@ImageWidth']),
|
||||||
|
"ImageHeight": int(page_element['@ImageHeight'])}))
|
||||||
|
key_element = pages
|
||||||
|
if key_element is not None:
|
||||||
|
metadata[key] = key_element if key_element else ""
|
||||||
|
else:
|
||||||
|
metadata[key] = ""
|
||||||
|
return ComicInfo(**metadata)
|
||||||
|
|
||||||
|
def read_zip_file(zip_file_path):
|
||||||
|
"""读取 ZIP 文件并返回其内容"""
|
||||||
|
try:
|
||||||
|
with ZipFile(zip_file_path, 'r') as zip_ref:
|
||||||
|
# 获取 ZIP 文件中的所有文件名
|
||||||
|
file_list = zip_ref.namelist()
|
||||||
|
# 读取 ComicInfo.xml 文件内容
|
||||||
|
if 'ComicInfo.xml' in file_list:
|
||||||
|
with zip_ref.open('ComicInfo.xml') as xml_file:
|
||||||
|
return xml_file.read().decode('utf-8')
|
||||||
|
else:
|
||||||
|
raise FileNotFoundError("ComicInfo.xml not found in the ZIP file.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"处理 CBZ 文件时出错: {e}")
|
||||||
|
raise exit(f"处理CBZ出错")
|
||||||
|
|
||||||
|
if cbz_path is not None:
|
||||||
|
xml_content = read_zip_file(cbz_path)
|
||||||
|
elif xml_file is not None:
|
||||||
|
with open(xml_file, 'r', encoding='utf-8') as f:
|
||||||
|
xml_content = f.read()
|
||||||
|
else:
|
||||||
|
raise ValueError("请提供 cbz_path 或 xml_file 参数, 否则无法处理 XML 文件")
|
||||||
|
return xml_parse(xml_content)
|
||||||
|
|
||||||
|
def update_comicinfo_count(self, count, cbz_path: Path) -> Path:
|
||||||
|
""" 更新 ComicInfo.xml 中的 PageCount 字段
|
||||||
|
Args:
|
||||||
|
cbz_path (Path): CBZ 文件路径
|
||||||
|
xml_filename (str, optional): XML 文件名. Defaults to "ComicInfo.xml".
|
||||||
|
xsd_filename (str, optional): XSD 文件名. Defaults to "ComicInfo.xsd".
|
||||||
|
Returns:
|
||||||
|
pages: 返回更新后的页面信息列表
|
||||||
|
"""
|
||||||
|
# 读取 ComicInfo.xml 文件
|
||||||
|
comicinfo = self._xml_file_to_comicinfo(cbz_path=cbz_path)
|
||||||
|
comicinfo.Count = count
|
||||||
|
# 保存更新后的 ComicInfo.xml 文件
|
||||||
|
return self._parse_comicinfo(comicinfo, save_dir=os.path.dirname(cbz_path))
|
||||||
|
|
||||||
def scrapy_xml_by_json(self, json_data, save_dir=None, xsd_file=XSD_FILE):
|
def scrapy_xml_by_json(self, json_data, save_dir=None, xsd_file=XSD_FILE):
|
||||||
""" 根据Json数据生成ComicInfo.xml
|
""" 根据Json数据生成ComicInfo.xml
|
||||||
"""
|
"""
|
||||||
@ -395,6 +501,7 @@ class ComicInfoXml:
|
|||||||
comic.Title = json_data.get("chapter", "")
|
comic.Title = json_data.get("chapter", "")
|
||||||
comic.Series = json_data.get("name", "")
|
comic.Series = json_data.get("name", "")
|
||||||
comic.Writer = json_data.get("author", "")
|
comic.Writer = json_data.get("author", "")
|
||||||
|
comic.Count = json_data.get("count", -1)
|
||||||
comic.AgeRating = json_data.get("age_rating", "")
|
comic.AgeRating = json_data.get("age_rating", "")
|
||||||
comic.Tags = json_data.get("tags", "")
|
comic.Tags = json_data.get("tags", "")
|
||||||
comic.Summary = json_data.get("description", "")
|
comic.Summary = json_data.get("description", "")
|
||||||
|
|||||||
@ -122,6 +122,14 @@ class MangaInfo(BaseModel):
|
|||||||
list_value.append(val)
|
list_value.append(val)
|
||||||
return FileNaming.chinese_file_name(",".join(list_value))
|
return FileNaming.chinese_file_name(",".join(list_value))
|
||||||
|
|
||||||
|
status: str
|
||||||
|
"""漫画状态"""
|
||||||
|
@field_validator('status', mode='before')
|
||||||
|
def validate_status(cls, v):
|
||||||
|
if isinstance(v, str):
|
||||||
|
return FileNaming.chinese_file_name(v)
|
||||||
|
return v
|
||||||
|
|
||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
"""漫画描述"""
|
"""漫画描述"""
|
||||||
@field_validator('description', mode='before')
|
@field_validator('description', mode='before')
|
||||||
@ -235,11 +243,17 @@ class MangaItem(BaseModel):
|
|||||||
filename_list = []
|
filename_list = []
|
||||||
for image in cls.chapter_images:
|
for image in cls.chapter_images:
|
||||||
filename_list.append(image.filename)
|
filename_list.append(image.filename)
|
||||||
|
count = -1
|
||||||
|
if cls.info.status == "已完结" and len(cls.chapters) > 1:
|
||||||
|
# 本章节为最终章节刚添加Count字段
|
||||||
|
if cls.number > 0 and cls.number == len(cls.chapters):
|
||||||
|
count = len(cls.chapters)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"name": cls.info.title,
|
"name": cls.info.title,
|
||||||
"chapter": cls.chapter.title,
|
"chapter": cls.chapter.title,
|
||||||
"author": cls.info.author,
|
"author": cls.info.author,
|
||||||
|
"count" : count,
|
||||||
"tags": cls.info.tags,
|
"tags": cls.info.tags,
|
||||||
"images": filename_list,
|
"images": filename_list,
|
||||||
"description": cls.info.description,
|
"description": cls.info.description,
|
||||||
|
|||||||
@ -469,6 +469,36 @@ class CBZUtils:
|
|||||||
#os.remove(cbz_path)
|
#os.remove(cbz_path)
|
||||||
print(f"remove cbz {cbz_path}")
|
print(f"remove cbz {cbz_path}")
|
||||||
|
|
||||||
|
def update_cbz_with_new_xml(self, cbz_path, new_xml_content, output_path=None):
|
||||||
|
"""将新生成的 ComicInfo.xml 更新到 CBZ 文件中"""
|
||||||
|
try:
|
||||||
|
# 默认输出路径为原文件路径(覆盖原文件)
|
||||||
|
if output_path is None:
|
||||||
|
output_path = cbz_path
|
||||||
|
|
||||||
|
# 创建临时文件处理覆盖操作
|
||||||
|
with NamedTemporaryFile(delete=False) as tmp:
|
||||||
|
tmp.close()
|
||||||
|
shutil.move(cbz_path, tmp.name)
|
||||||
|
# 读取原文件并替换 ComicInfo.xml
|
||||||
|
with ZipFile(tmp.name, 'r') as source_zip:
|
||||||
|
with ZipFile(output_path, 'w') as new_zip:
|
||||||
|
# 复制原文件(跳过旧 XML)
|
||||||
|
for item in source_zip.infolist():
|
||||||
|
if item.filename.lower() != 'comicinfo.xml':
|
||||||
|
new_zip.writestr(item, source_zip.read(item.filename))
|
||||||
|
|
||||||
|
# 添加新 XML
|
||||||
|
new_zip.writestr("ComicInfo.xml", new_xml_content)
|
||||||
|
|
||||||
|
os.remove(tmp.name) # 清理临时文件
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"更新 CBZ 文件失败: {e}")
|
||||||
|
if os.path.exists(tmp.name):
|
||||||
|
shutil.move(tmp.name, cbz_path) # 恢复备份
|
||||||
|
raise exit(f"更新失败")
|
||||||
|
|
||||||
class ImageUtils:
|
class ImageUtils:
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@ -11,7 +11,7 @@ from src.common.item import Chapter, MangaItem, MangaInfo,CoverItem
|
|||||||
from src.common.exceptions import SiteError, NetworkError, ParseError
|
from src.common.exceptions import SiteError, NetworkError, ParseError
|
||||||
from src.common.logging import setup_logging
|
from src.common.logging import setup_logging
|
||||||
from src.common.naming import DirectoryNaming,FileNaming
|
from src.common.naming import DirectoryNaming,FileNaming
|
||||||
from src.common.ComicInfo import ComicInfo, ImageInfo
|
from src.common.ComicInfo import ComicInfo, ImageInfo, ComicInfoXml
|
||||||
|
|
||||||
logger = setup_logging(__name__)
|
logger = setup_logging(__name__)
|
||||||
|
|
||||||
@ -88,10 +88,27 @@ class BaseSite(ABC):
|
|||||||
try:
|
try:
|
||||||
# result_type list[Chapter]
|
# result_type list[Chapter]
|
||||||
list_chapter = manga_info.get_list_chapter()
|
list_chapter = manga_info.get_list_chapter()
|
||||||
|
# 临时添加begin
|
||||||
|
# 获取最新章节
|
||||||
|
last_chapter = list_chapter[-1] if list_chapter else []
|
||||||
|
# 临时添加end
|
||||||
down_chapter = []
|
down_chapter = []
|
||||||
for chapter in list_chapter:
|
for chapter in list_chapter:
|
||||||
cbz_path = FileNaming.chapter_cbz(manga_info=manga_info,chapter=chapter)
|
cbz_path = FileNaming.chapter_cbz(manga_info=manga_info,chapter=chapter)
|
||||||
if os.path.exists(cbz_path):
|
if os.path.exists(cbz_path):
|
||||||
|
# 临时添加begin
|
||||||
|
if chapter.title == last_chapter.title and manga_info.status == "已完结":
|
||||||
|
# 如果是最新章节且漫画已完结,则不再下
|
||||||
|
ci = ComicInfoXml()._xml_file_to_comicinfo(cbz_path=cbz_path)
|
||||||
|
if ci.Count == "":
|
||||||
|
# 生成ComicInfo.xml
|
||||||
|
xml_path = ComicInfoXml().update_comicinfo_count(count=len(list_chapter), cbz_path=cbz_path)
|
||||||
|
# 更新ComicInfo.xml至CBZ文件中
|
||||||
|
CBZUtils().update_cbz_with_new_xml(cbz_path, xml_path.read_text(encoding="utf-8"))
|
||||||
|
# 更新完成后删除临时生成的ComicInfo.xml
|
||||||
|
xml_path.unlink()
|
||||||
|
logger.debug(f"更新 {cbz_path} 的 ComicInfo.xml Count完成")
|
||||||
|
# 临时添加end
|
||||||
logger.debug(f"{chapter.title} 章节已存在")
|
logger.debug(f"{chapter.title} 章节已存在")
|
||||||
chapter.status = "downloaded"
|
chapter.status = "downloaded"
|
||||||
down_chapter.append(chapter)
|
down_chapter.append(chapter)
|
||||||
|
|||||||
@ -13,6 +13,9 @@ selectors:
|
|||||||
author:
|
author:
|
||||||
selector: '//div[@class="basis-3/5 text-sm sm:text-base"]//span[@class="text-foreground"]/text()'
|
selector: '//div[@class="basis-3/5 text-sm sm:text-base"]//span[@class="text-foreground"]/text()'
|
||||||
index: 0
|
index: 0
|
||||||
|
status:
|
||||||
|
selector: '//div[@class="basis-3/5 text-sm sm:text-base"]//span[@class="text-foreground"]/text()'
|
||||||
|
index: 1
|
||||||
description:
|
description:
|
||||||
selector: '//div[@class="my-2 text-foreground text-sm sm:text-base"]/p/text()'
|
selector: '//div[@class="my-2 text-foreground text-sm sm:text-base"]/p/text()'
|
||||||
index: 1
|
index: 1
|
||||||
|
|||||||
82
test.py
82
test.py
@ -1,5 +1,5 @@
|
|||||||
from src.common.naming import FileNaming
|
from src.common.naming import FileNaming
|
||||||
from src.common.ComicInfo import ImageInfo
|
from src.common.ComicInfo import ImageInfo, ComicInfo as ci, ComicPageInfo
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import time, shutil,re, xxhash, json
|
import time, shutil,re, xxhash, json
|
||||||
@ -288,6 +288,33 @@ class comicInfo:
|
|||||||
except:
|
except:
|
||||||
raise exit(f"ver_comicinfo_xml 错误")
|
raise exit(f"ver_comicinfo_xml 错误")
|
||||||
|
|
||||||
|
def clear_cbz(self):
|
||||||
|
# 清除3KB以下CBZ文件
|
||||||
|
# comicInfo().update_comicinfo_cbz("")
|
||||||
|
#cbz_path = "/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/福利女姊姊/第1话 福利女姊姊.CBZ"
|
||||||
|
|
||||||
|
dir_path = "CBZ/rm_comic"
|
||||||
|
#dir_path = "/mnt/Comics/CBZ/rm_comic"
|
||||||
|
for dir in os.listdir(dir_path):
|
||||||
|
c_dir = os.path.join(dir_path, dir)
|
||||||
|
if os.path.isdir(c_dir):
|
||||||
|
files = list(FileNaming.get_filenames_optimized(c_dir, ext_filter=['.CBZ']))
|
||||||
|
for file in files:
|
||||||
|
# 获取文件的创建时间(仅在Linux/MacOS中可用)
|
||||||
|
# 修改时间
|
||||||
|
create_time = time.localtime(os.utime(file)) # 注意:st_birthtime 在Linux/MacOS中可用,但不是所有系统都支持
|
||||||
|
# 格式化时间
|
||||||
|
formatted_time = time.strftime('%Y%m%d%H', create_time)
|
||||||
|
if int(formatted_time) > 2025020401:
|
||||||
|
print(f"{file} 文件创建时间:", formatted_time)
|
||||||
|
# 更新ComicInfoxml
|
||||||
|
# comicInfo().update_comicinfo_cbz(file)
|
||||||
|
# 检查CBZ是否存在ComicInfo.xml
|
||||||
|
comicInfo().ver_comicinfo_xml(file)
|
||||||
|
#if size < 3000:
|
||||||
|
# os.remove(file)
|
||||||
|
# print(f"已删除{file}")
|
||||||
|
|
||||||
|
|
||||||
def _comic_info_xml_pages(self, zip_file):
|
def _comic_info_xml_pages(self, zip_file):
|
||||||
"""获取 ComicInfo.xml 文件中的 <PageCount> 标签值"""
|
"""获取 ComicInfo.xml 文件中的 <PageCount> 标签值"""
|
||||||
@ -324,7 +351,7 @@ class comicInfo:
|
|||||||
data["list_hash"] = self.generate_xxhash(list_page)
|
data["list_hash"] = self.generate_xxhash(list_page)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def generate_xxhash(self, data: Any) -> str:
|
def _generate_xxhash(self, data: Any) -> str:
|
||||||
"""
|
"""
|
||||||
使用 xxhash 生成更快的哈希值
|
使用 xxhash 生成更快的哈希值
|
||||||
|
|
||||||
@ -344,7 +371,7 @@ class comicInfo:
|
|||||||
# 返回十六进制摘要
|
# 返回十六进制摘要
|
||||||
return hasher.hexdigest()
|
return hasher.hexdigest()
|
||||||
|
|
||||||
def extract_duplicate_files(self, data: List[Dict[str, str]]) -> Dict[str, List[str]]:
|
def _extract_duplicate_files(self, data: List[Dict[str, str]]) -> Dict[str, List[str]]:
|
||||||
"""
|
"""
|
||||||
提取具有重复 list_hash 的文件名
|
提取具有重复 list_hash 的文件名
|
||||||
|
|
||||||
@ -372,35 +399,16 @@ class comicInfo:
|
|||||||
|
|
||||||
return duplicates
|
return duplicates
|
||||||
|
|
||||||
if __name__ == "__main1__":
|
|
||||||
# 清除3KB以下CBZ文件
|
|
||||||
# comicInfo().update_comicinfo_cbz("")
|
|
||||||
#cbz_path = "/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/福利女姊姊/第1话 福利女姊姊.CBZ"
|
|
||||||
|
|
||||||
dir_path = "CBZ/rm_comic"
|
def delete_repeat_file(self, cbz_path) -> None:
|
||||||
#dir_path = "/mnt/Comics/CBZ/rm_comic"
|
""" 删除 CBZ 文件中的重复图片章节
|
||||||
for dir in os.listdir(dir_path):
|
|
||||||
c_dir = os.path.join(dir_path, dir)
|
Args:
|
||||||
if os.path.isdir(c_dir):
|
cbz_path (_type_): _description_
|
||||||
files = list(FileNaming.get_filenames_optimized(c_dir, ext_filter=['.CBZ']))
|
"""
|
||||||
for file in files:
|
|
||||||
# 获取文件的创建时间(仅在Linux/MacOS中可用)
|
|
||||||
# 修改时间
|
|
||||||
create_time = time.localtime(os.utime(file)) # 注意:st_birthtime 在Linux/MacOS中可用,但不是所有系统都支持
|
|
||||||
# 格式化时间
|
|
||||||
formatted_time = time.strftime('%Y%m%d%H', create_time)
|
|
||||||
if int(formatted_time) > 2025020401:
|
|
||||||
print(f"{file} 文件创建时间:", formatted_time)
|
|
||||||
# 更新ComicInfoxml
|
|
||||||
# comicInfo().update_comicinfo_cbz(file)
|
|
||||||
# 检查CBZ是否存在ComicInfo.xml
|
|
||||||
comicInfo().ver_comicinfo_xml(file)
|
|
||||||
#if size < 3000:
|
|
||||||
# os.remove(file)
|
|
||||||
# print(f"已删除{file}")
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# 批量删除漫画下的重复图片章节
|
# 批量删除漫画下的重复图片章节
|
||||||
# comicInfo()._comic_info_xml_pages("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第36话 36.CBZ")
|
|
||||||
|
|
||||||
dir_path = "CBZ/rm_comic"
|
dir_path = "CBZ/rm_comic"
|
||||||
#dir_path = "/mnt/Comics/CBZ/rm_comic"
|
#dir_path = "/mnt/Comics/CBZ/rm_comic"
|
||||||
for dir in os.listdir(dir_path):
|
for dir in os.listdir(dir_path):
|
||||||
@ -409,12 +417,10 @@ if __name__ == "__main__":
|
|||||||
comic_pages = []
|
comic_pages = []
|
||||||
files = list(FileNaming.get_filenames_optimized(c_dir, ext_filter=['.CBZ']))
|
files = list(FileNaming.get_filenames_optimized(c_dir, ext_filter=['.CBZ']))
|
||||||
for file in files:
|
for file in files:
|
||||||
page_data = comicInfo()._comic_info_xml_pages(file)
|
page_data = self._comic_info_xml_pages(file)
|
||||||
comic_pages.append(page_data)
|
comic_pages.append(page_data)
|
||||||
#print(page_data)
|
|
||||||
# 一本漫画读取完毕
|
# 一本漫画读取完毕
|
||||||
#print(comic_pages)
|
duplicates = comicInfo()._extract_duplicate_files(comic_pages)
|
||||||
duplicates = comicInfo().extract_duplicate_files(comic_pages)
|
|
||||||
for hash_val, delete_files in duplicates.items():
|
for hash_val, delete_files in duplicates.items():
|
||||||
# 删除重复文件
|
# 删除重复文件
|
||||||
for file_path in delete_files:
|
for file_path in delete_files:
|
||||||
@ -424,3 +430,11 @@ if __name__ == "__main__":
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"删除失败 {file_path}: {e}")
|
print(f"删除失败 {file_path}: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("开始处理")
|
||||||
|
# ComicInfoXml()._xml_file_to_comicinfo("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ")
|
||||||
|
xml_path = ComicInfoXml().update_comicinfo_count(37,"/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ")
|
||||||
|
comicInfo().update_cbz_with_new_xml("/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/CBZ/rm_comic/和朋友的妈妈做朋友/第37话 37.CBZ", xml_path.read_text(encoding="utf-8"))
|
||||||
|
#items = ci().__dict__.keys()
|
||||||
|
#print(items)
|
||||||
97
tests/common/test_ComicInfo.py
Normal file
97
tests/common/test_ComicInfo.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
# module_b.py
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
# 获取当前文件所在目录
|
||||||
|
# current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
current_dir = "/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader"
|
||||||
|
# 添加父目录到 sys.path
|
||||||
|
# parent_dir = os.path.join(current_dir, '..')
|
||||||
|
# sys.path.insert(0, parent_dir)
|
||||||
|
sys.path.insert(0, current_dir)
|
||||||
|
|
||||||
|
# 现在可以使用绝对导入
|
||||||
|
from src.common.ComicInfo import ComicInfo, ComicPageInfo, ImageInfo, ComicInfoXml
|
||||||
|
from os import path as Path
|
||||||
|
|
||||||
|
class test_ImageInfo:
|
||||||
|
|
||||||
|
def test_get_image_size(self):
|
||||||
|
print(ImageInfo().get_image_size("photo.jpg"))
|
||||||
|
|
||||||
|
def test_get_image_hash(self):
|
||||||
|
print(ImageInfo().get_image_hash_advanced("photo.jpg"))
|
||||||
|
|
||||||
|
def test_get_image_metadata(self):
|
||||||
|
"""获取图片信息"""
|
||||||
|
page = self.get_image_metadata("photo.jpg")
|
||||||
|
print(page)
|
||||||
|
|
||||||
|
def test_get_image_metadata_from_zip(self):
|
||||||
|
"""从ZIP文件中获取图片信息"""
|
||||||
|
pages = ImageInfo().get_image_metadata_from_zip("test.zip")
|
||||||
|
print(pages)
|
||||||
|
|
||||||
|
# Define the ComicInfo and ComicPageInfo classes
|
||||||
|
class test_ComicInfo:
|
||||||
|
# ComicInfo.xml 中的选项
|
||||||
|
|
||||||
|
def test_ToString(self):
|
||||||
|
"""测试ComicInfo的字符串表示"""
|
||||||
|
comic = ComicInfo()
|
||||||
|
comic.Title = "Test Comic"
|
||||||
|
comic.Series = "Test Series"
|
||||||
|
comic.Number = "1"
|
||||||
|
comic.PageCount = 10
|
||||||
|
comic.Writer = "Test Writer"
|
||||||
|
comic.Pages.append(ComicPageInfo())
|
||||||
|
print(comic.toString())
|
||||||
|
|
||||||
|
class test_ComicPageInfo:
|
||||||
|
# ComicInfo.xml 中的<Page>
|
||||||
|
def test_ToString(self):
|
||||||
|
"""测试ComicPageInfo的字符串表示"""
|
||||||
|
page = ComicPageInfo()
|
||||||
|
page.Image = "test_image.jpg"
|
||||||
|
page.ImageSize = 123456
|
||||||
|
|
||||||
|
class test_ComicInfoXml:
|
||||||
|
|
||||||
|
def test_get_page_count(self):
|
||||||
|
"""测试获取ComicInfo.xml中的PageCount"""
|
||||||
|
zip_file = Path("test.zip")
|
||||||
|
page_count = self.get_page_count(zip_file)
|
||||||
|
print(f"zip_file={zip_file} PageCount: {page_count}")
|
||||||
|
|
||||||
|
def test_scrapy_xml_by_json(self):
|
||||||
|
""" 根据Json数据生成ComicInfo.xml
|
||||||
|
"""
|
||||||
|
json_data = {
|
||||||
|
"name": "选手村母猪调教",
|
||||||
|
"chapter": "第2话-总教练最「疼爱」的选手",
|
||||||
|
"author": "沃林,蜜果实",
|
||||||
|
"tags": "凌辱,调教,报仇,选手村,体操,硬调色情,新作",
|
||||||
|
"images": [
|
||||||
|
"001.jpg", "scramble=6_002.jpg", "scramble=5_003.jpg",
|
||||||
|
"004.jpg", "005.jpg", "scramble=5_006.jpg",
|
||||||
|
"007.jpg", "008.jpg", "scramble=7_009.jpg",
|
||||||
|
"scramble=9_010.jpg", "011.jpg", "012.jpg",
|
||||||
|
"scramble=6_013.jpg", "014.jpg", "015.jpg",
|
||||||
|
"scramble=7_016.jpg", "017.jpg", "018.jpg",
|
||||||
|
"019.jpg"
|
||||||
|
],
|
||||||
|
"description": ("「总教练,我愿意用身体换取机会…」在腐败的选手村里,总教练握有绝对的权力,"
|
||||||
|
+ "选手们只能任凭摆布。人们对成功的渴望不断滋长,却也因为过度的欲望濒临崩溃…"),
|
||||||
|
"genre": "韩漫",
|
||||||
|
"age_rating": "R18+",
|
||||||
|
"series": "选手村母猪调教",
|
||||||
|
"number": 2,
|
||||||
|
'page_count': 286
|
||||||
|
}
|
||||||
|
save_dir = "/Users/cc/Documents/Dev/WorkSpace/VSCodeProjects/NewComicDownloader/output/rm_comic/images/选手村母猪调教/第2话-总教练最「疼爱」的选手"
|
||||||
|
xsd_file = "ComicInfo_2.1.xsd"
|
||||||
|
pages = ComicInfoXml().scrapy_xml_by_json(json_data, save_dir=save_dir, xsd_file=xsd_file)
|
||||||
|
print(f"Generated pages: {pages}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_ComicInfoXml().test_scrapy_xml_by_json()
|
||||||
Loading…
Reference in New Issue
Block a user