From e45f173bbbb990b38fac43aaa7c7b1de0a263482 Mon Sep 17 00:00:00 2001 From: caiwx86 Date: Mon, 22 Jul 2024 00:52:50 +0800 Subject: [PATCH] fix --- Comics/loader.py | 6 +++-- Comics/pipelines.py | 10 ++++++--- Comics/settings.py | 1 + Comics/utils.py | 55 ++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 64 insertions(+), 8 deletions(-) diff --git a/Comics/loader.py b/Comics/loader.py index 4eaa77a..f9fefc9 100644 --- a/Comics/loader.py +++ b/Comics/loader.py @@ -103,10 +103,12 @@ class ComicLoader(ItemLoader): # 图像链接 def image_urls(self, value=None, xpath=None, index=None, sexec=None): self.set_properties('image_urls', value, xpath, index, sexec) - def get_output_value(self, field_name): + def get_output_value(self, field_name, skip_field=["chapter"]): value = super().get_output_value(field_name) try: - if isinstance(value, list) and len(value) == 1 : value = value[0] + if isinstance(value, list) and len(value) == 1: + if field_name not in skip_field: value = value[0] + else: value = "".join(value) except: print(f"get_output_value value={value} type={type(value)}") return value diff --git a/Comics/pipelines.py b/Comics/pipelines.py index 914e71f..5002603 100644 --- a/Comics/pipelines.py +++ b/Comics/pipelines.py @@ -11,7 +11,7 @@ from Comics.items import ComicItem from Comics.loader import ComicLoader from Comics.utils import CBZUtils,fileUtils as fu from Comics.utils import ComicPath -from Comics.utils import checkUtils +from Comics.utils import checkUtils,oldUtils from Comics.exporters import JsonExport,ItemExporter from scrapy.pipelines.images import ImagesPipeline from Comics._utils.ComicInfo import ComicInfoXml @@ -23,9 +23,13 @@ class ComicsPipeline(): if isinstance(item, ComicItem): # 'output/rm_comic/json/壞X/第1話 壞X' # 已存在漫画CBZ文件 调用转换 - if fu.exists(ComicPath(item).PATH_CBZ()): return ItemExporter().export_obj(item) + result_item = None + if fu.exists(ComicPath(item).PATH_CBZ()): result_item = ItemExporter().export_obj(item) # 不存在漫画CBZ文件 - else: return JsonExport(file=ComicPath(item).getDirJosnComicChapter()).export_json(ComicLoader(item).load_item(), if_return=True) + else: result_item = JsonExport(file=ComicPath(item).getDirJosnComicChapter()).export_json(ComicLoader(item).load_item(), if_return=True) + c_chapter = ComicPath.chinese_convert(result_item["chapters"]) + oldUtils().clean_old_files(files=c_chapter, folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_CBZ_DIR), move_folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_OLD_CBZ_DIR)) + return result_item class BaseImagesPipeline(ImagesPipeline): diff --git a/Comics/settings.py b/Comics/settings.py index 8e85c21..d1c8882 100644 --- a/Comics/settings.py +++ b/Comics/settings.py @@ -124,5 +124,6 @@ LOG_LEVEL = "INFO" # 日志等级 LOG_STDOUT = True # 标准化输出 CBZ_EXPORT_PATH = os.path.join(BASE_OUTPUT,"CBZ") +OLD_CBZ_EXPORT_PATH = os.path.join(BASE_OUTPUT,"Old_CBZ") #数据导出类 排序 COMIC_INFO_XML_FILE = "ComicInfo.xml" \ No newline at end of file diff --git a/Comics/utils.py b/Comics/utils.py index 06a4573..4b4276c 100644 --- a/Comics/utils.py +++ b/Comics/utils.py @@ -1,4 +1,4 @@ -import base64,hashlib,os,shutil,os.path +import base64,hashlib,os,shutil,os.path,pathlib import math,time,json,datetime,logging import re,requests,time,xmlschema from datetime import date @@ -629,6 +629,8 @@ class ComicPath: MAPPING_ICON = "icon" MAPPING_ICON_CACHE = "icon_cache" MAPPING_CBZ = "cbz" + MAPPING_CBZ_DIR = "cbz_dir" + MAPPING_OLD_CBZ_DIR = "old_cbz_dir" MAPPING_IMAGES_DIR = "images_dir" MAPPING_COMIC_JSON = "comic_json" @@ -644,7 +646,9 @@ class ComicPath: self.MAPPING_ICON_CACHE: os.path.join(self.project, "icons", ".cache", self.name+".jpg"), self.MAPPING_CBZ: os.path.join(settings.CBZ_EXPORT_PATH, self.project, self.name, self.chapter+".CBZ"), self.MAPPING_IMAGES_DIR: os.path.join(settings.IMAGES_STORE, self.project, "images", self.name, self.chapter), - self.MAPPING_COMIC_JSON: os.path.join(settings.IMAGES_STORE, self.project, "json", self.name, self.chapter+".json") + self.MAPPING_COMIC_JSON: os.path.join(settings.IMAGES_STORE, self.project, "json", self.name, self.chapter+".json"), + self.MAPPING_CBZ_DIR: os.path.join(settings.CBZ_EXPORT_PATH, self.project, self.name), + self.MAPPING_OLD_CBZ_DIR: os.path.join(settings.OLD_CBZ_EXPORT_PATH, self.project, self.name) } def PATH_CBZ(self, result_type=MAPPING_CBZ): return self.file_path(result_type=result_type) @@ -691,6 +695,8 @@ class ComicPath: #处理成符合规定的文件名 @classmethod def fix_file_name(cls, filename, replace=None): + if isinstance(filename, list): + for file in filename: cls.fix_file_name(file) if not isinstance(filename, str): return filename in_tab = r'[?*/\|.:><]' @@ -855,4 +861,47 @@ class DBUtils: @classmethod def remove(cls,name,db_name=None): db = cls.init_db(db_name) - db.remove(Query().name == name) \ No newline at end of file + db.remove(Query().name == name) + +class oldUtils: + def clean_old_files(self, files, folder, move_folder, suffix="CBZ"): + # 方法三:使用pathlib模块的iterdir方法获取文件夹下的所有文件和文件夹 + # 如果只需要文件名而不是文件的绝对路径,可以使用name属性获取文件名 + if os.path.exists(folder): + file_names = [f.name for f in pathlib.Path(folder).iterdir() if f.is_file()] + else: + return None + old_item = [] + for file_name in file_names: + file_split = file_name.split(".") + file_suffix = file_split[-1] + file_prefix = file_split[0] + if file_suffix == suffix: + old_item.append(file_prefix) + new_item = ComicPath.fix_file_name(files) + only_in_new_item = [item for item in new_item if item not in old_item] + only_in_old_item = [item for item in old_item if item not in new_item] + in_new_item_and_old_item = [item for item in new_item if item in old_item] + + print(f"只在new_item中: {only_in_new_item}") + print(f"只在old_item中: {only_in_old_item}") + print(f"在new_item和old_item中都有: {in_new_item_and_old_item}") + + + def move_file(): + """移动文件 + """ + if not os.path.exists(move_folder): os.makedirs(move_folder) + for old_file in only_in_old_item: + try: + suffixs = [ suffix, "jpg" ] + for suf in suffixs: + new_move_file = os.path.join(folder, old_file)+"."+suf + old_move_file = os.path.join(move_folder, old_file)+"."+suf + if os.path.exists(new_move_file): + shutil.move(new_move_file, old_move_file) + print(f"move old_file={new_move_file} --> {old_move_file}") + except: + print(f"Error: move old_file={new_move_file} --> {old_move_file}") + + move_file() \ No newline at end of file