fix

2024-07-22 02:24:50 +08:00 · 2024-07-22 02:24:50 +08:00 · ceac5dbc49
commit ceac5dbc49
parent 60c4fc2ea5
3 changed files with 45 additions and 18 deletions
--- a/Comics/pipelines.py
+++ b/Comics/pipelines.py
@ -27,7 +27,7 @@ class ComicsPipeline():
            if fu.exists(ComicPath(item).PATH_CBZ()): result_item = ItemExporter().export_obj(item)
            # 不存在漫画CBZ文件 
            else: result_item = JsonExport(file=ComicPath(item).getDirJosnComicChapter()).export_json(ComicLoader(item).load_item(), if_return=True)
-            oldUtils().clean_old_files(files=result_item["chapters"], folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_CBZ_DIR), move_folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_OLD_CBZ_DIR))
+            #oldUtils().clean_old_files(files=result_item["chapters"], folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_CBZ_DIR), move_folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_OLD_CBZ_DIR))
            return result_item

 class BaseImagesPipeline(ImagesPipeline):
--- a/Comics/spiders/rm_comic.py
+++ b/Comics/spiders/rm_comic.py
@ -3,6 +3,7 @@ from Comics.items import ComicItem
 from Comics.loader import ComicLoader
 from Comics.utils import ComicPath
 from Comics.utils import Conf
+from Comics.utils import oldUtils

 class RmComicSpider(scrapy.Spider):
    name = 'rm_comic'
@ -29,20 +30,27 @@ class RmComicSpider(scrapy.Spider):
    def parse_comic(self, response):
        # 初始化Comic数据并根据工程名称读取配置文件并自动解析
        comic_item = Conf().comic(self.name, ComicLoader(ComicItem(), response))
+        path_comic = comic_item.load_item()
+        cbz_dir = ComicPath(path_comic).file_path(result_type=ComicPath.MAPPING_CBZ_DIR)
+        move_folder = ComicPath(path_comic).file_path(result_type=ComicPath.MAPPING_OLD_CBZ_DIR)
        # 循环遍历根据配置文件自动解析并注入的章节名和章节链接 
+        new_chapter = oldUtils().new_files(files=comic_item.get_chapters(), folder=cbz_dir)
+        # 清理多余章节
+        oldUtils().clean_old_files(files=comic_item.get_chapters(), folder=cbz_dir, move_folder=move_folder)
        for chapter, link in zip(comic_item.get_chapters(), comic_item.get_chapter_href()):
-            # 打包导出item数据
-            item = comic_item.load_item(chapter=chapter)
-            # 获取最终存放CBZ的路径
-            cbz_path = ComicPath(item=item).PATH_CBZ()
-            # 校验繁体和简体中文CBZ路径是否存在
-            # if not checkUtils().is_error(item) and os.path.exists(cbz_path):
-            if cbz_path !=None and os.path.exists(cbz_path):
-                logging.info(f"漫画 {cbz_path} 已存在, 跳过中...")
-                yield item
-            else:
-                # 开始访问章节链接并跳转到self.parse_chapter
-                yield scrapy.Request(self.main_url+link, meta={'item': item}, callback=self.parse_chapter)
+            if chapter in new_chapter:
+                # 打包导出item数据
+                item = comic_item.load_item(chapter=chapter)
+                # 获取最终存放CBZ的路径
+                cbz_path = ComicPath(item=item).PATH_CBZ()
+                # 校验繁体和简体中文CBZ路径是否存在
+                # if not checkUtils().is_error(item) and os.path.exists(cbz_path):
+                if cbz_path !=None and os.path.exists(cbz_path):
+                    logging.info(f"漫画 {cbz_path} 已存在, 跳过中...")
+                    yield item
+                else:
+                    # 开始访问章节链接并跳转到self.parse_chapter
+                    yield scrapy.Request(self.main_url+link, meta={'item': item}, callback=self.parse_chapter)

    # 读取某章节下的所有图片
    def parse_chapter(self, response):
--- a/Comics/utils.py
+++ b/Comics/utils.py
@ -862,7 +862,18 @@ class DBUtils:
        db.remove(Query().name == name)
        
 class oldUtils:
-    def clean_old_files(self, files, folder, move_folder, suffix="CBZ"):
+    def new_files(self, files, folder, suffix="CBZ", result_type="new"):
+        result_files =  self.old_files(files=files, folder=folder, suffix=suffix, result_type=result_type)
+        new_files = []
+        if result_files == None: 
+            if isinstance(files, str): new_files.append(ComicPath.chinese_convert(ComicPath.fix_file_name(files)))
+            else:
+                for file in files: new_files.append(ComicPath.chinese_convert(ComicPath.fix_file_name(file)))
+            return new_files
+        else: return result_files
+    
+    def old_files(self, files, folder, suffix="CBZ", result_type="old"):
+        result = None
        # 方法三：使用pathlib模块的iterdir方法获取文件夹下的所有文件和文件夹
        # 如果只需要文件名而不是文件的绝对路径，可以使用name属性获取文件名
        if os.path.exists(folder):
@ -878,7 +889,7 @@ class oldUtils:
               old_item.append(file_prefix)
        
        new_item = []
-        if isinstance(files, str): new_item.append(ComicPath.chinese_convert(ComicPath.fix_file_name(file)))
+        if isinstance(files, str): new_item.append(ComicPath.chinese_convert(ComicPath.fix_file_name(files)))
        else:
            for file in files: new_item.append(ComicPath.chinese_convert(ComicPath.fix_file_name(file)))
        only_in_new_item = [item for item in new_item if item not in old_item]
@ -887,8 +898,16 @@ class oldUtils:
 
        logging.debug(f"只在new_item中: {only_in_new_item}")
        logging.debug(f"只在old_item中: {only_in_old_item}")
-        logging.debug(f"在new_item和old_item中都有: {in_new_item_and_old_item}")               
-               
+        logging.debug(f"在new_item和old_item中都有: {in_new_item_and_old_item}")
+        if result_type == "old": result = only_in_old_item
+        if result_type == "new": result = only_in_new_item 
+        return result 
+    
+    def clean_old_files(self, files, folder, move_folder, suffix="CBZ"):
+        # 方法三：使用pathlib模块的iterdir方法获取文件夹下的所有文件和文件夹
+        # 如果只需要文件名而不是文件的绝对路径，可以使用name属性获取文件名
+              
+        only_in_old_item = self.old_files(files=files, folder=folder, suffix=suffix)       
               
        def move_file():
            """移动文件
@ -906,4 +925,4 @@ class oldUtils:
                except:
                        print(f"Error: move old_file={new_move_file} --> {old_move_file}")
        
-        move_file()
+        if only_in_old_item != None: move_file()