diff --git a/Comics/_utils/utils.py b/Comics/_utils/utils.py index 8eb705c..6351aed 100644 --- a/Comics/_utils/utils.py +++ b/Comics/_utils/utils.py @@ -1,5 +1,5 @@ import base64,hashlib,os,shutil,os.path,pathlib -import math,time,json,datetime,logging +import math,time,json,datetime,logging,zipfile import re,requests,time,xmlschema from datetime import date from Comics import settings @@ -504,7 +504,8 @@ class CBZUtils: zf.write(path.joinpath(filename), arc_dir.joinpath(filename)) zf.close() logging.info(f"打包完成:{target_file}") - + + @DeprecationWarning @classmethod def packComicChapterCBZ(cls, src_dir, dts_path, comic_info_images, remove=True): if os.path.exists(src_dir): @@ -522,6 +523,23 @@ class CBZUtils: # validation return cls.cbz_validate(dts_path, comic_info_images) + @classmethod + def comicChapterCBZPack(cls, src_dir, dts_path,remove=True): + if os.path.exists(src_dir): + dirs = os.listdir(src_dir) + for file in dirs: + if file.startswith(ComicPath.PREFIX_SCRAMBLE): + try: + imageUtils.deScrambleImagesByPath(os.path.join(src_dir,file)) + except Exception as e: + print(f"删除 {file} 发生错误 {e},已跳过") + return False + cls.zip_compression(src_dir, dts_path) + time.sleep(0.1) + if remove: shutil.rmtree(src_dir) + # validation + return cls.comic_cbz_validate(dts_path) + @classmethod def replaceZip(cls, filepath, unpack_dir=None): if not cls.compareFileDate(filepath): return None @@ -574,6 +592,7 @@ class CBZUtils: print(e) return result + @DeprecationWarning @classmethod def cbz_validate(cls, zip_path, comic_info_images): cbz_info = cls.zip_info(zip_path) @@ -587,6 +606,38 @@ class CBZUtils: ntfy.gotify(title="漫画校验失败", message=f"validating fail === {zip_path}, cbz_info={cbz_info},zip_info={comic_info_images}") #ntfy.sendMsg(f"validating fail === {zip_path}, cbz_info={cbz_info},zip_info={comic_info_images}", alert=True) return False + + @classmethod + def comic_info_page_count(cls, zip_file): + # 打开ZIP文件 + with zipfile.ZipFile(zip_file, 'r') as z: + # 假设ZIP中的文件名是'text.txt' + with z.open('ComicInfo.xml', 'r') as file: + # 从文件流中解析 XML 数据 + file_string = file.read().decode("utf-8") + # 使用正则表达式提取 标签中的值 + match = re.search(r"(\d+)", file_string) + if match: + page_count = match.group(1) + print(f"zip_file={zip_file} PageCount: {page_count}") + return page_count + + @classmethod + def comic_cbz_validate(cls, zip_file): + cbz_info = cls.zip_info(zip_file) + info_count = cls.comic_info_page_count(zip_file) + if len(cbz_info) == len(info_count) and len(cbz_info) != 0: + # logging.info(f"validating successfully === {zip_path}") + # ntfy.sendMsg(f"validating successfully === {zip_path}", alert=True) + ntfy.gotify(title="漫画下载完成", message=f"validating successfully === {zip_file}") + return True + else: + os.remove(zip_file) + ntfy.gotify(title="漫画校验失败", message=f"validating fail === {zip_file}, cbz_info={cbz_info},zip_info={info_count}") + #ntfy.sendMsg(f"validating fail === {zip_path}, cbz_info={cbz_info},zip_info={comic_info_images}", alert=True) + return False + + # 检测工具类 class checkUtils: diff --git a/Comics/pipelines.py b/Comics/pipelines.py index 560416e..004e7c0 100644 --- a/Comics/pipelines.py +++ b/Comics/pipelines.py @@ -130,6 +130,6 @@ class ImgDownloadPipeline(BaseImagesPipeline): else: # ComicInfoXml 生成 ComicInfoXml().scrapy_xml_by_json(item, save_dir= chapter_dir) - if CBZUtils.packComicChapterCBZ(src_dir= chapter_dir, dts_path= cbz_path, comic_info_images= images_urls, remove=False): + if CBZUtils.comicChapterCBZPack(src_dir= chapter_dir, dts_path= cbz_path, remove=False): super().update_icon(item) self.pack_icon(item) \ No newline at end of file diff --git a/Comics/spiders/rm_comic.py b/Comics/spiders/rm_comic.py index 89750a7..16eff58 100644 --- a/Comics/spiders/rm_comic.py +++ b/Comics/spiders/rm_comic.py @@ -38,7 +38,7 @@ class RmComicSpider(scrapy.Spider): # 循环遍历根据配置文件自动解析并注入的章节名和章节链接 new_chapter = oldUtils().new_files(files=comic_item.get_chapters(), folder=cbz_dir) # 清理多余章节 - oldUtils().clean_old_files(files=comic_item.get_chapters(), folder=cbz_dir, move_folder=move_folder) + # oldUtils().clean_old_files(files=comic_item.get_chapters(), folder=cbz_dir, move_folder=move_folder) for chapter, link in zip(comic_item.get_chapters(), comic_item.get_chapter_href()): if ComicPath.chinese_convert(ComicPath.fix_file_name(chapter)) in new_chapter: # 打包导出item数据 diff --git a/test.py b/test.py index 349e911..4b3f14d 100644 --- a/test.py +++ b/test.py @@ -1,21 +1,20 @@ -import os -from datetime import datetime -from Comics.settings import BASE_OUTPUT +import zipfile,re -def list_files_with_times(root_folder): - # 遍历主文件夹下的子文件夹和文件 - for dirpath, dirnames, filenames in os.walk(root_folder): - for filename in filenames: - file_path = os.path.join(dirpath, filename) - # 获取文件的最后修改时间 - modification_time = os.path.getmtime(file_path) - # 格式化时间 - formatted_time = datetime.fromtimestamp(modification_time).strftime('%Y-%m-%d %H:%M:%S') - remove_time = datetime.fromtimestamp(modification_time).strftime('%Y-%m-%d') - if remove_time == "2024-10-28": - os.remove(file_path) - print(f"File: {file_path} | Last Modified: {formatted_time}") - -# 使用示例 -root_folder = os.path.join(BASE_OUTPUT, 'CBZ/') # 替换为实际文件夹路径 -list_files_with_times(root_folder) \ No newline at end of file +def comic_info_page_count(zip_file): + # 打开ZIP文件 + with zipfile.ZipFile(zip_file, 'r') as z: + # 假设ZIP中的文件名是'text.txt' + with z.open('ComicInfo1.xml', 'r') as file: + # 从文件流中解析 XML 数据 + file_string = file.read().decode("utf-8") + # 使用正则表达式提取 标签中的值 + match = re.search(r"(\d+)", file_string) + if match: + page_count = match.group(1) + print(f"zip_file={zip_file} PageCount: {page_count}") + return page_count + #print(file_string) + +# 打印文件内容 +#print(content) +comic_info_page_count(zip_file='CBZ/第15话.CBZ') \ No newline at end of file