From 1e40ebdcfb2ff22c5267d3062573e5be96c99059 Mon Sep 17 00:00:00 2001 From: caiwx86 Date: Mon, 28 Oct 2024 12:40:18 +0800 Subject: [PATCH] fix --- Comics/_utils/utils.py | 2 +- Comics/pipelines.py | 23 +++++++++++++---------- test.py | 22 ++++++++++++++++++++++ 3 files changed, 36 insertions(+), 11 deletions(-) create mode 100644 test.py diff --git a/Comics/_utils/utils.py b/Comics/_utils/utils.py index 12a0de6..bc958d4 100644 --- a/Comics/_utils/utils.py +++ b/Comics/_utils/utils.py @@ -923,7 +923,7 @@ class oldUtils: if os.path.exists(folder): file_names = [f.name for f in pathlib.Path(folder).iterdir() if f.is_file()] else: - return None + return [] old_item = [] for file_name in file_names: file_split = file_name.split(".") diff --git a/Comics/pipelines.py b/Comics/pipelines.py index 7e5e569..a999c68 100644 --- a/Comics/pipelines.py +++ b/Comics/pipelines.py @@ -19,6 +19,7 @@ from Comics._utils.ComicInfo import ComicInfoXml class ComicsPipeline(): ''' 解析前端传入的item数据 + 将数据进行序列化后传出 ''' # item就是yield后面的对象 def process_item(self, item: ComicItem, spider): @@ -64,19 +65,18 @@ class ImgDownloadPipeline(BaseImagesPipeline): def get_media_requests(self, item, info): comic = ComicLoader(item=item) + # 获取需要解析下载的图像 images_item = comic.parse_images() for image_item in images_item: - if_down = True - image_url = image_item["image_url"] - image_path = image_item["image_path"] + image_url, image_path = [ image_item["image_url"], image_item["image_path"]] if image_item["image_type"] == "Icon": image_path = super().get_file_path(item, result_type="icon_cache") if fu.exists(image_path): return False - # 图像(含加密图像)已存在 - if super().image_scramble_exits(item, image_path): - if_down = False - logging.info(f"file exists: IMAGE_STORE {image_path}") - if if_down: + # 图像(含加密图像)不存在 + if not super().image_scramble_exits(item, image_path): + # if_down = False + # logging.info(f"file exists: IMAGE_STORE {image_path}") + # if if_down: logging.info(f"downloading {image_url} --> IMAGE_STORE {image_path}") yield scrapy.Request(url=image_url, meta={'path': image_path}) @@ -101,12 +101,15 @@ class ImgDownloadPipeline(BaseImagesPipeline): item (_type_): Comic item数据 info (_type_): 信息 """ + # 存在未下载图像数据则重试 if not super().success_completed(item, results): return super().update_icon(item) cbz_path = super().get_file_path(item, result_type="cbz") chapter_dir = ComicPath(item=item).file_path(result_type=ComicPath().MAPPING_IMAGES_DIR) - # images_file = oldUtils().old_images(folder=chapter_dir) - # if images_file == None or len(images_file) != len(ComicLoader(item=item).get_image_urls()): return + images_file = oldUtils().old_images(folder=chapter_dir) + # 校验数据是正确 + if len(images_file) != len(ComicLoader(item=item).get_image_urls()): return + # CBZ文件是否已存在 if fu.exists(cbz_path): #self.update_icon(item) chapter = os.path.basename(cbz_path).split(".")[0] diff --git a/test.py b/test.py new file mode 100644 index 0000000..b0796e6 --- /dev/null +++ b/test.py @@ -0,0 +1,22 @@ +import os +from datetime import datetime +from Comics.settings import BASE_OUTPUT + +def list_files_with_times(root_folder): + # 遍历主文件夹下的子文件夹和文件 + for dirpath, dirnames, filenames in os.walk(root_folder): + for filename in filenames: + file_path = os.path.join(dirpath, filename) + # 获取文件的最后修改时间 + modification_time = os.path.getmtime(file_path) + # 格式化时间 + # formatted_time = datetime.fromtimestamp(modification_time).strftime('%Y-%m-%d %H:%M:%S') + remove_time = datetime.fromtimestamp(modification_time).strftime('%Y-%m-%d') + if remove_time == "2024-10-28": + formatted_time = datetime.fromtimestamp(modification_time).strftime('%Y-%m-%d %H:%M:%S') + os.remove(file_path) + print(f"File: {file_path} | Last Modified: {formatted_time}") + +# 使用示例 +root_folder = os.path.join(BASE_OUTPUT, 'CBZ/') # 替换为实际文件夹路径 +list_files_with_times(root_folder) \ No newline at end of file