This commit is contained in:
caiwx86 2025-01-04 21:49:03 +08:00
parent 92c5bc19b0
commit c04e9a45c2
4 changed files with 74 additions and 24 deletions

View File

@ -1,5 +1,5 @@
import base64,hashlib,os,shutil,os.path,pathlib
import math,time,json,datetime,logging
import math,time,json,datetime,logging,zipfile
import re,requests,time,xmlschema
from datetime import date
from Comics import settings
@ -505,6 +505,7 @@ class CBZUtils:
zf.close()
logging.info(f"打包完成:{target_file}")
@DeprecationWarning
@classmethod
def packComicChapterCBZ(cls, src_dir, dts_path, comic_info_images, remove=True):
if os.path.exists(src_dir):
@ -522,6 +523,23 @@ class CBZUtils:
# validation
return cls.cbz_validate(dts_path, comic_info_images)
@classmethod
def comicChapterCBZPack(cls, src_dir, dts_path,remove=True):
if os.path.exists(src_dir):
dirs = os.listdir(src_dir)
for file in dirs:
if file.startswith(ComicPath.PREFIX_SCRAMBLE):
try:
imageUtils.deScrambleImagesByPath(os.path.join(src_dir,file))
except Exception as e:
print(f"删除 {file} 发生错误 {e},已跳过")
return False
cls.zip_compression(src_dir, dts_path)
time.sleep(0.1)
if remove: shutil.rmtree(src_dir)
# validation
return cls.comic_cbz_validate(dts_path)
@classmethod
def replaceZip(cls, filepath, unpack_dir=None):
if not cls.compareFileDate(filepath): return None
@ -574,6 +592,7 @@ class CBZUtils:
print(e)
return result
@DeprecationWarning
@classmethod
def cbz_validate(cls, zip_path, comic_info_images):
cbz_info = cls.zip_info(zip_path)
@ -588,6 +607,38 @@ class CBZUtils:
#ntfy.sendMsg(f"validating fail === {zip_path}, cbz_info={cbz_info},zip_info={comic_info_images}", alert=True)
return False
@classmethod
def comic_info_page_count(cls, zip_file):
# 打开ZIP文件
with zipfile.ZipFile(zip_file, 'r') as z:
# 假设ZIP中的文件名是'text.txt'
with z.open('ComicInfo.xml', 'r') as file:
# 从文件流中解析 XML 数据
file_string = file.read().decode("utf-8")
# 使用正则表达式提取 <PageCount> 标签中的值
match = re.search(r"<PageCount>(\d+)</PageCount>", file_string)
if match:
page_count = match.group(1)
print(f"zip_file={zip_file} PageCount: {page_count}")
return page_count
@classmethod
def comic_cbz_validate(cls, zip_file):
cbz_info = cls.zip_info(zip_file)
info_count = cls.comic_info_page_count(zip_file)
if len(cbz_info) == len(info_count) and len(cbz_info) != 0:
# logging.info(f"validating successfully === {zip_path}")
# ntfy.sendMsg(f"validating successfully === {zip_path}", alert=True)
ntfy.gotify(title="漫画下载完成", message=f"validating successfully === {zip_file}")
return True
else:
os.remove(zip_file)
ntfy.gotify(title="漫画校验失败", message=f"validating fail === {zip_file}, cbz_info={cbz_info},zip_info={info_count}")
#ntfy.sendMsg(f"validating fail === {zip_path}, cbz_info={cbz_info},zip_info={comic_info_images}", alert=True)
return False
# 检测工具类
class checkUtils:

View File

@ -130,6 +130,6 @@ class ImgDownloadPipeline(BaseImagesPipeline):
else:
# ComicInfoXml 生成
ComicInfoXml().scrapy_xml_by_json(item, save_dir= chapter_dir)
if CBZUtils.packComicChapterCBZ(src_dir= chapter_dir, dts_path= cbz_path, comic_info_images= images_urls, remove=False):
if CBZUtils.comicChapterCBZPack(src_dir= chapter_dir, dts_path= cbz_path, remove=False):
super().update_icon(item)
self.pack_icon(item)

View File

@ -38,7 +38,7 @@ class RmComicSpider(scrapy.Spider):
# 循环遍历根据配置文件自动解析并注入的章节名和章节链接
new_chapter = oldUtils().new_files(files=comic_item.get_chapters(), folder=cbz_dir)
# 清理多余章节
oldUtils().clean_old_files(files=comic_item.get_chapters(), folder=cbz_dir, move_folder=move_folder)
# oldUtils().clean_old_files(files=comic_item.get_chapters(), folder=cbz_dir, move_folder=move_folder)
for chapter, link in zip(comic_item.get_chapters(), comic_item.get_chapter_href()):
if ComicPath.chinese_convert(ComicPath.fix_file_name(chapter)) in new_chapter:
# 打包导出item数据

37
test.py
View File

@ -1,21 +1,20 @@
import os
from datetime import datetime
from Comics.settings import BASE_OUTPUT
import zipfile,re
def list_files_with_times(root_folder):
# 遍历主文件夹下的子文件夹和文件
for dirpath, dirnames, filenames in os.walk(root_folder):
for filename in filenames:
file_path = os.path.join(dirpath, filename)
# 获取文件的最后修改时间
modification_time = os.path.getmtime(file_path)
# 格式化时间
formatted_time = datetime.fromtimestamp(modification_time).strftime('%Y-%m-%d %H:%M:%S')
remove_time = datetime.fromtimestamp(modification_time).strftime('%Y-%m-%d')
if remove_time == "2024-10-28":
os.remove(file_path)
print(f"File: {file_path} | Last Modified: {formatted_time}")
def comic_info_page_count(zip_file):
# 打开ZIP文件
with zipfile.ZipFile(zip_file, 'r') as z:
# 假设ZIP中的文件名是'text.txt'
with z.open('ComicInfo1.xml', 'r') as file:
# 从文件流中解析 XML 数据
file_string = file.read().decode("utf-8")
# 使用正则表达式提取 <PageCount> 标签中的值
match = re.search(r"<PageCount>(\d+)</PageCount>", file_string)
if match:
page_count = match.group(1)
print(f"zip_file={zip_file} PageCount: {page_count}")
return page_count
#print(file_string)
# 使用示例
root_folder = os.path.join(BASE_OUTPUT, 'CBZ/') # 替换为实际文件夹路径
list_files_with_times(root_folder)
# 打印文件内容
#print(content)
comic_info_page_count(zip_file='CBZ/第15话.CBZ')