diff --git a/Comics/pipelines.py b/Comics/pipelines.py index 4698f83..9ccb0b4 100644 --- a/Comics/pipelines.py +++ b/Comics/pipelines.py @@ -27,7 +27,7 @@ class ComicsPipeline(): if fu.exists(ComicPath(item).PATH_CBZ()): result_item = ItemExporter().export_obj(item) # 不存在漫画CBZ文件 else: result_item = JsonExport(file=ComicPath(item).getDirJosnComicChapter()).export_json(ComicLoader(item).load_item(), if_return=True) - oldUtils().clean_old_files(files=result_item["chapters"], folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_CBZ_DIR), move_folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_OLD_CBZ_DIR)) + #oldUtils().clean_old_files(files=result_item["chapters"], folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_CBZ_DIR), move_folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_OLD_CBZ_DIR)) return result_item class BaseImagesPipeline(ImagesPipeline): diff --git a/Comics/spiders/rm_comic.py b/Comics/spiders/rm_comic.py index 038473c..ad75478 100644 --- a/Comics/spiders/rm_comic.py +++ b/Comics/spiders/rm_comic.py @@ -3,6 +3,7 @@ from Comics.items import ComicItem from Comics.loader import ComicLoader from Comics.utils import ComicPath from Comics.utils import Conf +from Comics.utils import oldUtils class RmComicSpider(scrapy.Spider): name = 'rm_comic' @@ -29,20 +30,27 @@ class RmComicSpider(scrapy.Spider): def parse_comic(self, response): # 初始化Comic数据并根据工程名称读取配置文件并自动解析 comic_item = Conf().comic(self.name, ComicLoader(ComicItem(), response)) + path_comic = comic_item.load_item() + cbz_dir = ComicPath(path_comic).file_path(result_type=ComicPath.MAPPING_CBZ_DIR) + move_folder = ComicPath(path_comic).file_path(result_type=ComicPath.MAPPING_OLD_CBZ_DIR) # 循环遍历根据配置文件自动解析并注入的章节名和章节链接 + new_chapter = oldUtils().new_files(files=comic_item.get_chapters(), folder=cbz_dir) + # 清理多余章节 + oldUtils().clean_old_files(files=comic_item.get_chapters(), folder=cbz_dir, move_folder=move_folder) for chapter, link in zip(comic_item.get_chapters(), comic_item.get_chapter_href()): - # 打包导出item数据 - item = comic_item.load_item(chapter=chapter) - # 获取最终存放CBZ的路径 - cbz_path = ComicPath(item=item).PATH_CBZ() - # 校验繁体和简体中文CBZ路径是否存在 - # if not checkUtils().is_error(item) and os.path.exists(cbz_path): - if cbz_path !=None and os.path.exists(cbz_path): - logging.info(f"漫画 {cbz_path} 已存在, 跳过中...") - yield item - else: - # 开始访问章节链接并跳转到self.parse_chapter - yield scrapy.Request(self.main_url+link, meta={'item': item}, callback=self.parse_chapter) + if chapter in new_chapter: + # 打包导出item数据 + item = comic_item.load_item(chapter=chapter) + # 获取最终存放CBZ的路径 + cbz_path = ComicPath(item=item).PATH_CBZ() + # 校验繁体和简体中文CBZ路径是否存在 + # if not checkUtils().is_error(item) and os.path.exists(cbz_path): + if cbz_path !=None and os.path.exists(cbz_path): + logging.info(f"漫画 {cbz_path} 已存在, 跳过中...") + yield item + else: + # 开始访问章节链接并跳转到self.parse_chapter + yield scrapy.Request(self.main_url+link, meta={'item': item}, callback=self.parse_chapter) # 读取某章节下的所有图片 def parse_chapter(self, response): diff --git a/Comics/utils.py b/Comics/utils.py index 35d2570..8d761e7 100644 --- a/Comics/utils.py +++ b/Comics/utils.py @@ -862,7 +862,18 @@ class DBUtils: db.remove(Query().name == name) class oldUtils: - def clean_old_files(self, files, folder, move_folder, suffix="CBZ"): + def new_files(self, files, folder, suffix="CBZ", result_type="new"): + result_files = self.old_files(files=files, folder=folder, suffix=suffix, result_type=result_type) + new_files = [] + if result_files == None: + if isinstance(files, str): new_files.append(ComicPath.chinese_convert(ComicPath.fix_file_name(files))) + else: + for file in files: new_files.append(ComicPath.chinese_convert(ComicPath.fix_file_name(file))) + return new_files + else: return result_files + + def old_files(self, files, folder, suffix="CBZ", result_type="old"): + result = None # 方法三:使用pathlib模块的iterdir方法获取文件夹下的所有文件和文件夹 # 如果只需要文件名而不是文件的绝对路径,可以使用name属性获取文件名 if os.path.exists(folder): @@ -878,7 +889,7 @@ class oldUtils: old_item.append(file_prefix) new_item = [] - if isinstance(files, str): new_item.append(ComicPath.chinese_convert(ComicPath.fix_file_name(file))) + if isinstance(files, str): new_item.append(ComicPath.chinese_convert(ComicPath.fix_file_name(files))) else: for file in files: new_item.append(ComicPath.chinese_convert(ComicPath.fix_file_name(file))) only_in_new_item = [item for item in new_item if item not in old_item] @@ -887,8 +898,16 @@ class oldUtils: logging.debug(f"只在new_item中: {only_in_new_item}") logging.debug(f"只在old_item中: {only_in_old_item}") - logging.debug(f"在new_item和old_item中都有: {in_new_item_and_old_item}") - + logging.debug(f"在new_item和old_item中都有: {in_new_item_and_old_item}") + if result_type == "old": result = only_in_old_item + if result_type == "new": result = only_in_new_item + return result + + def clean_old_files(self, files, folder, move_folder, suffix="CBZ"): + # 方法三:使用pathlib模块的iterdir方法获取文件夹下的所有文件和文件夹 + # 如果只需要文件名而不是文件的绝对路径,可以使用name属性获取文件名 + + only_in_old_item = self.old_files(files=files, folder=folder, suffix=suffix) def move_file(): """移动文件 @@ -906,4 +925,4 @@ class oldUtils: except: print(f"Error: move old_file={new_move_file} --> {old_move_file}") - move_file() \ No newline at end of file + if only_in_old_item != None: move_file() \ No newline at end of file