fix

2024-10-28 12:40:18 +08:00 · 2024-10-28 12:40:18 +08:00 · 1e40ebdcfb
commit 1e40ebdcfb
parent 4f75e2eab9
3 changed files with 36 additions and 11 deletions
--- a/Comics/_utils/utils.py
+++ b/Comics/_utils/utils.py
@ -923,7 +923,7 @@ class oldUtils:
        if os.path.exists(folder):
            file_names = [f.name for f in pathlib.Path(folder).iterdir() if f.is_file()]
        else:
-            return None
+            return [] 
        old_item = []
        for file_name in file_names:
            file_split = file_name.split(".")
--- a/Comics/pipelines.py
+++ b/Comics/pipelines.py
@ -19,6 +19,7 @@ from Comics._utils.ComicInfo import ComicInfoXml
 class ComicsPipeline():
    '''
        解析前端传入的item数据
        将数据进行序列化后传出
    '''
    # item就是yield后面的对象
    def process_item(self, item: ComicItem, spider):
@ -64,19 +65,18 @@ class ImgDownloadPipeline(BaseImagesPipeline):
    def get_media_requests(self, item, info):
        comic = ComicLoader(item=item)
        # 获取需要解析下载的图像
        images_item = comic.parse_images()
        for image_item in images_item:
-            if_down = True
+            image_url, image_path = [ image_item["image_url"], image_item["image_path"]]
            image_url = image_item["image_url"]
            image_path = image_item["image_path"]
            if image_item["image_type"] == "Icon":
               image_path = super().get_file_path(item, result_type="icon_cache")
               if fu.exists(image_path): return False
-            # 图像（含加密图像）已存在
+            # 图像（含加密图像）不存在
-            if super().image_scramble_exits(item, image_path):
+            if not super().image_scramble_exits(item, image_path):
-                if_down = False
+            #    if_down = False
-                logging.info(f"file exists: IMAGE_STORE {image_path}")
+            #    logging.info(f"file exists: IMAGE_STORE {image_path}")
-            if if_down:
+            # if if_down:
                logging.info(f"downloading {image_url} --> IMAGE_STORE {image_path}")
                yield scrapy.Request(url=image_url, meta={'path': image_path}) 
@ -101,12 +101,15 @@ class ImgDownloadPipeline(BaseImagesPipeline):
            item (_type_): Comic item数据
            info (_type_): 信息
        """
        # 存在未下载图像数据则重试
        if not super().success_completed(item, results): return
        super().update_icon(item)
        cbz_path = super().get_file_path(item, result_type="cbz")
        chapter_dir = ComicPath(item=item).file_path(result_type=ComicPath().MAPPING_IMAGES_DIR)
-        # images_file = oldUtils().old_images(folder=chapter_dir)
+        images_file = oldUtils().old_images(folder=chapter_dir)
-        # if images_file == None or len(images_file) != len(ComicLoader(item=item).get_image_urls()): return
+        # 校验数据是正确
        if len(images_file) != len(ComicLoader(item=item).get_image_urls()): return
        # CBZ文件是否已存在
        if fu.exists(cbz_path):
            #self.update_icon(item)
            chapter = os.path.basename(cbz_path).split(".")[0]
--- a/test.py
+++ b/test.py
@ -0,0 +1,22 @@
 import os
 from datetime import datetime
 from Comics.settings import BASE_OUTPUT
 def list_files_with_times(root_folder):
    # 遍历主文件夹下的子文件夹和文件
    for dirpath, dirnames, filenames in os.walk(root_folder):
        for filename in filenames:
            file_path = os.path.join(dirpath, filename)
            # 获取文件的最后修改时间
            modification_time = os.path.getmtime(file_path)
            # 格式化时间
            # formatted_time = datetime.fromtimestamp(modification_time).strftime('%Y-%m-%d %H:%M:%S')
            remove_time = datetime.fromtimestamp(modification_time).strftime('%Y-%m-%d')
            if remove_time == "2024-10-28":
                formatted_time = datetime.fromtimestamp(modification_time).strftime('%Y-%m-%d %H:%M:%S')
                os.remove(file_path)
            print(f"File: {file_path} | Last Modified: {formatted_time}")
 # 使用示例
 root_folder = os.path.join(BASE_OUTPUT, 'CBZ/')  # 替换为实际文件夹路径
 list_files_with_times(root_folder)