From 1e40ebdcfb2ff22c5267d3062573e5be96c99059 Mon Sep 17 00:00:00 2001
From: caiwx86 <caiwenxiu0806@163.com>
Date: Mon, 28 Oct 2024 12:40:18 +0800
Subject: [PATCH] fix

---
 Comics/_utils/utils.py |  2 +-
 Comics/pipelines.py    | 23 +++++++++++++----------
 test.py                | 22 ++++++++++++++++++++++
 3 files changed, 36 insertions(+), 11 deletions(-)
 create mode 100644 test.py

diff --git a/Comics/_utils/utils.py b/Comics/_utils/utils.py
index 12a0de6..bc958d4 100644
--- a/Comics/_utils/utils.py
+++ b/Comics/_utils/utils.py
@@ -923,7 +923,7 @@ class oldUtils:
         if os.path.exists(folder):
             file_names = [f.name for f in pathlib.Path(folder).iterdir() if f.is_file()]
         else:
-            return None
+            return [] 
         old_item = []
         for file_name in file_names:
             file_split = file_name.split(".")
diff --git a/Comics/pipelines.py b/Comics/pipelines.py
index 7e5e569..a999c68 100644
--- a/Comics/pipelines.py
+++ b/Comics/pipelines.py
@@ -19,6 +19,7 @@ from Comics._utils.ComicInfo import ComicInfoXml
 class ComicsPipeline():
     '''
         解析前端传入的item数据
+        将数据进行序列化后传出
     '''
     # item就是yield后面的对象
     def process_item(self, item: ComicItem, spider):
@@ -64,19 +65,18 @@ class ImgDownloadPipeline(BaseImagesPipeline):
     
     def get_media_requests(self, item, info):
         comic = ComicLoader(item=item)
+        # 获取需要解析下载的图像
         images_item = comic.parse_images()
         for image_item in images_item:
-            if_down = True
-            image_url = image_item["image_url"]
-            image_path = image_item["image_path"]
+            image_url, image_path = [ image_item["image_url"], image_item["image_path"]]
             if image_item["image_type"] == "Icon":
                image_path = super().get_file_path(item, result_type="icon_cache")
                if fu.exists(image_path): return False
-            # 图像（含加密图像）已存在
-            if super().image_scramble_exits(item, image_path):
-                if_down = False
-                logging.info(f"file exists: IMAGE_STORE {image_path}")
-            if if_down:
+            # 图像（含加密图像）不存在
+            if not super().image_scramble_exits(item, image_path):
+            #    if_down = False
+            #    logging.info(f"file exists: IMAGE_STORE {image_path}")
+            # if if_down:
                 logging.info(f"downloading {image_url} --> IMAGE_STORE {image_path}")
                 yield scrapy.Request(url=image_url, meta={'path': image_path}) 
 
@@ -101,12 +101,15 @@ class ImgDownloadPipeline(BaseImagesPipeline):
             item (_type_): Comic item数据
             info (_type_): 信息
         """
+        # 存在未下载图像数据则重试
         if not super().success_completed(item, results): return
         super().update_icon(item)
         cbz_path = super().get_file_path(item, result_type="cbz")
         chapter_dir = ComicPath(item=item).file_path(result_type=ComicPath().MAPPING_IMAGES_DIR)
-        # images_file = oldUtils().old_images(folder=chapter_dir)
-        # if images_file == None or len(images_file) != len(ComicLoader(item=item).get_image_urls()): return
+        images_file = oldUtils().old_images(folder=chapter_dir)
+        # 校验数据是正确
+        if len(images_file) != len(ComicLoader(item=item).get_image_urls()): return
+        # CBZ文件是否已存在
         if fu.exists(cbz_path):
             #self.update_icon(item)
             chapter = os.path.basename(cbz_path).split(".")[0]
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..b0796e6
--- /dev/null
+++ b/test.py
@@ -0,0 +1,22 @@
+import os
+from datetime import datetime
+from Comics.settings import BASE_OUTPUT
+
+def list_files_with_times(root_folder):
+    # 遍历主文件夹下的子文件夹和文件
+    for dirpath, dirnames, filenames in os.walk(root_folder):
+        for filename in filenames:
+            file_path = os.path.join(dirpath, filename)
+            # 获取文件的最后修改时间
+            modification_time = os.path.getmtime(file_path)
+            # 格式化时间
+            # formatted_time = datetime.fromtimestamp(modification_time).strftime('%Y-%m-%d %H:%M:%S')
+            remove_time = datetime.fromtimestamp(modification_time).strftime('%Y-%m-%d')
+            if remove_time == "2024-10-28":
+                formatted_time = datetime.fromtimestamp(modification_time).strftime('%Y-%m-%d %H:%M:%S')
+                os.remove(file_path)
+            print(f"File: {file_path} | Last Modified: {formatted_time}")
+
+# 使用示例
+root_folder = os.path.join(BASE_OUTPUT, 'CBZ/')  # 替换为实际文件夹路径
+list_files_with_times(root_folder)
\ No newline at end of file