This commit is contained in:
caiwx86 2024-07-22 02:24:50 +08:00
parent 60c4fc2ea5
commit ceac5dbc49
3 changed files with 45 additions and 18 deletions

View File

@ -27,7 +27,7 @@ class ComicsPipeline():
if fu.exists(ComicPath(item).PATH_CBZ()): result_item = ItemExporter().export_obj(item)
# 不存在漫画CBZ文件
else: result_item = JsonExport(file=ComicPath(item).getDirJosnComicChapter()).export_json(ComicLoader(item).load_item(), if_return=True)
oldUtils().clean_old_files(files=result_item["chapters"], folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_CBZ_DIR), move_folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_OLD_CBZ_DIR))
#oldUtils().clean_old_files(files=result_item["chapters"], folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_CBZ_DIR), move_folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_OLD_CBZ_DIR))
return result_item
class BaseImagesPipeline(ImagesPipeline):

View File

@ -3,6 +3,7 @@ from Comics.items import ComicItem
from Comics.loader import ComicLoader
from Comics.utils import ComicPath
from Comics.utils import Conf
from Comics.utils import oldUtils
class RmComicSpider(scrapy.Spider):
name = 'rm_comic'
@ -29,20 +30,27 @@ class RmComicSpider(scrapy.Spider):
def parse_comic(self, response):
# 初始化Comic数据并根据工程名称读取配置文件并自动解析
comic_item = Conf().comic(self.name, ComicLoader(ComicItem(), response))
path_comic = comic_item.load_item()
cbz_dir = ComicPath(path_comic).file_path(result_type=ComicPath.MAPPING_CBZ_DIR)
move_folder = ComicPath(path_comic).file_path(result_type=ComicPath.MAPPING_OLD_CBZ_DIR)
# 循环遍历根据配置文件自动解析并注入的章节名和章节链接
new_chapter = oldUtils().new_files(files=comic_item.get_chapters(), folder=cbz_dir)
# 清理多余章节
oldUtils().clean_old_files(files=comic_item.get_chapters(), folder=cbz_dir, move_folder=move_folder)
for chapter, link in zip(comic_item.get_chapters(), comic_item.get_chapter_href()):
# 打包导出item数据
item = comic_item.load_item(chapter=chapter)
# 获取最终存放CBZ的路径
cbz_path = ComicPath(item=item).PATH_CBZ()
# 校验繁体和简体中文CBZ路径是否存在
# if not checkUtils().is_error(item) and os.path.exists(cbz_path):
if cbz_path !=None and os.path.exists(cbz_path):
logging.info(f"漫画 {cbz_path} 已存在, 跳过中...")
yield item
else:
# 开始访问章节链接并跳转到self.parse_chapter
yield scrapy.Request(self.main_url+link, meta={'item': item}, callback=self.parse_chapter)
if chapter in new_chapter:
# 打包导出item数据
item = comic_item.load_item(chapter=chapter)
# 获取最终存放CBZ的路径
cbz_path = ComicPath(item=item).PATH_CBZ()
# 校验繁体和简体中文CBZ路径是否存在
# if not checkUtils().is_error(item) and os.path.exists(cbz_path):
if cbz_path !=None and os.path.exists(cbz_path):
logging.info(f"漫画 {cbz_path} 已存在, 跳过中...")
yield item
else:
# 开始访问章节链接并跳转到self.parse_chapter
yield scrapy.Request(self.main_url+link, meta={'item': item}, callback=self.parse_chapter)
# 读取某章节下的所有图片
def parse_chapter(self, response):

View File

@ -862,7 +862,18 @@ class DBUtils:
db.remove(Query().name == name)
class oldUtils:
def clean_old_files(self, files, folder, move_folder, suffix="CBZ"):
def new_files(self, files, folder, suffix="CBZ", result_type="new"):
result_files = self.old_files(files=files, folder=folder, suffix=suffix, result_type=result_type)
new_files = []
if result_files == None:
if isinstance(files, str): new_files.append(ComicPath.chinese_convert(ComicPath.fix_file_name(files)))
else:
for file in files: new_files.append(ComicPath.chinese_convert(ComicPath.fix_file_name(file)))
return new_files
else: return result_files
def old_files(self, files, folder, suffix="CBZ", result_type="old"):
result = None
# 方法三使用pathlib模块的iterdir方法获取文件夹下的所有文件和文件夹
# 如果只需要文件名而不是文件的绝对路径可以使用name属性获取文件名
if os.path.exists(folder):
@ -878,7 +889,7 @@ class oldUtils:
old_item.append(file_prefix)
new_item = []
if isinstance(files, str): new_item.append(ComicPath.chinese_convert(ComicPath.fix_file_name(file)))
if isinstance(files, str): new_item.append(ComicPath.chinese_convert(ComicPath.fix_file_name(files)))
else:
for file in files: new_item.append(ComicPath.chinese_convert(ComicPath.fix_file_name(file)))
only_in_new_item = [item for item in new_item if item not in old_item]
@ -887,8 +898,16 @@ class oldUtils:
logging.debug(f"只在new_item中: {only_in_new_item}")
logging.debug(f"只在old_item中: {only_in_old_item}")
logging.debug(f"在new_item和old_item中都有: {in_new_item_and_old_item}")
logging.debug(f"在new_item和old_item中都有: {in_new_item_and_old_item}")
if result_type == "old": result = only_in_old_item
if result_type == "new": result = only_in_new_item
return result
def clean_old_files(self, files, folder, move_folder, suffix="CBZ"):
# 方法三使用pathlib模块的iterdir方法获取文件夹下的所有文件和文件夹
# 如果只需要文件名而不是文件的绝对路径可以使用name属性获取文件名
only_in_old_item = self.old_files(files=files, folder=folder, suffix=suffix)
def move_file():
"""移动文件
@ -906,4 +925,4 @@ class oldUtils:
except:
print(f"Error: move old_file={new_move_file} --> {old_move_file}")
move_file()
if only_in_old_item != None: move_file()