This commit is contained in:
caiwx86 2024-03-31 18:18:16 +08:00
parent 1c85177352
commit 7499cd78c7
3 changed files with 37 additions and 39 deletions

View File

@ -23,11 +23,11 @@ class ComicsPipeline():
if isinstance(item, ComicItem): if isinstance(item, ComicItem):
# 'output/rm_comic/json/壞X/第1話 壞X' # 'output/rm_comic/json/壞X/第1話 壞X'
# 已存在漫画CBZ文件 调用转换 # 已存在漫画CBZ文件 调用转换
if fu.exists(ComicPath(item).path_cbz()): return ItemExporter().export_obj(item) if fu.exists(ComicPath(item).PATH_CBZ()): return ItemExporter().export_obj(item)
# 不存在漫画CBZ文件 # 不存在漫画CBZ文件
else: return JsonExport(file=ComicPath(item).getDirJosnComicChapter(item)).export_json(ComicLoader(item).load_item(), if_return=True) else: return JsonExport(file=ComicPath(item).getDirJosnComicChapter()).export_json(ComicLoader(item).load_item(), if_return=True)
class BaseComicImagesPipeline(ImagesPipeline): class BaseImagesPipeline(ImagesPipeline):
def image_scramble_exits(self, item,image_path): def image_scramble_exits(self, item,image_path):
en_image_path = ComicPath(item).getFileScrambleImageSave(image_path, relative="fullpath") en_image_path = ComicPath(item).getFileScrambleImageSave(image_path, relative="fullpath")
@ -39,6 +39,15 @@ class BaseComicImagesPipeline(ImagesPipeline):
# 封面路径 # 封面路径
def file_path(self, request, response=None, info=None, *, item=None): return request.meta['path'] def file_path(self, request, response=None, info=None, *, item=None): return request.meta['path']
# 判断是否需要更新封面
def update_icon(self, item):
# 下载后的缓存封面路径
image_path = self.get_file_path(item, result_type="down_cache_icon")
# 最终封面保存路径
save_path = self.get_file_path(item=item, result_type="down_icon")
fu.update_icon(image_path, save_path)
def success_completed(self, item, results): def success_completed(self, item, results):
is_success = False is_success = False
fail_data = [] fail_data = []
@ -49,34 +58,26 @@ class BaseComicImagesPipeline(ImagesPipeline):
return is_success return is_success
# 封面下载操作类 # 封面下载操作类
class IconDownloadPipeline(BaseComicImagesPipeline): class IconDownloadPipeline(BaseImagesPipeline):
# 数据处理 # 数据处理
def get_media_requests(self, item, info): def get_media_requests(self, item, info):
comic = ComicLoader(item=item) comic = ComicLoader(item=item)
# 获取封面链接和封面保存路径 # 获取封面链接和封面保存路径
icon_url, icon_cache_path = [ comic.get_icon(), self.get_file_path(item, result_type="icon_cache") ] icon_url, icon_cache_path = [ comic.get_icon(), super().get_file_path(item, result_type="icon_cache") ]
# 封面已存在 # 封面已存在
if fu.exists(icon_cache_path): return False if fu.exists(icon_cache_path): return False
else: yield scrapy.Request(url=icon_url, meta={'path': icon_cache_path }) else: yield scrapy.Request(url=icon_url, meta={'path': icon_cache_path })
# 判断是否需要更新封面
def update_icon(self, item):
# 下载后的缓存封面路径
image_path = self.get_file_path(item, result_type="down_cache_icon")
# 最终封面保存路径
save_path = self.get_file_path(item=item, result_type="down_icon")
fu.update_icon(image_path, save_path)
def item_completed(self, results, item, info): def item_completed(self, results, item, info):
if self.success_completed(item, results): if super().success_completed(item, results):
print(" icon download success") print(" icon download success")
# 更新封面到Icon文件夹内 # 更新封面到Icon文件夹内
self.update_icon(item) super().update_icon(item)
return item
class ImgDownloadPipeline(BaseComicImagesPipeline): class ImgDownloadPipeline(BaseImagesPipeline):
def get_media_requests(self, item, info): def get_media_requests(self, item, info):
@ -85,9 +86,9 @@ class ImgDownloadPipeline(BaseComicImagesPipeline):
# 添加封面下载信息至下载列表中 # 添加封面下载信息至下载列表中
# self.add_download_icon(item) # self.add_download_icon(item)
for image_url,image in zip(self.image_urls,self.images): for image_url,image in zip(self.image_urls,self.images):
if_down, image_path = [ True, self.get_file_path(item, image)] if_down, image_path = [ True, super().get_file_path(item, image)]
# 图像(含加密图像)已存在 # 图像(含加密图像)已存在
if self.image_scramble_exits(item, image_path): if super().image_scramble_exits(item, image_path):
#if image_path == self.get_file_path(item, result_type="icon_cache"): #if image_path == self.get_file_path(item, result_type="icon_cache"):
# logging.info(f"icon file exists: IMAGE_STORE {image_path}") # logging.info(f"icon file exists: IMAGE_STORE {image_path}")
#else: #else:
@ -99,8 +100,8 @@ class ImgDownloadPipeline(BaseComicImagesPipeline):
# 打包cbz封面 # 打包cbz封面
def pack_icon(self, item): def pack_icon(self, item):
cbz_icon = self.get_file_path(item=item, result_type="cbz_icon") cbz_icon = super().get_file_path(item=item, result_type="cbz_icon")
dwn_icon = self.get_file_path(item=item, result_type="down_icon") dwn_icon = super().get_file_path(item=item, result_type="down_icon")
base_dir = fu.dirname(dwn_icon) base_dir = fu.dirname(dwn_icon)
name = fu.basename(dwn_icon).split(".")[0] name = fu.basename(dwn_icon).split(".")[0]
for dirname in os.listdir(base_dir): for dirname in os.listdir(base_dir):
@ -112,7 +113,7 @@ class ImgDownloadPipeline(BaseComicImagesPipeline):
def item_completed(self, results, item, info): def item_completed(self, results, item, info):
# return item # return item
# 打包 # 打包
cbz_path = self.get_file_path(item, result_type="cbz") cbz_path = super().get_file_path(item, result_type="cbz")
success_data = [] success_data = []
for result in results: for result in results:
if result[0]: success_data.append(result[1]) if result[0]: success_data.append(result[1])
@ -122,11 +123,11 @@ class ImgDownloadPipeline(BaseComicImagesPipeline):
self.pack_icon(item) self.pack_icon(item)
else: else:
# ComicInfoXml 生成 # ComicInfoXml 生成
comic_info = ComicInfoXmlItemExporter(dir=self.get_file_path(item=item, result_type="comic_info")).export_xml(item) comic_info = ComicInfoXmlItemExporter(dir=super().get_file_path(item=item, result_type="comic_info")).export_xml(item)
if CBZUtils.packComicChapterCBZ(src_dir= self.get_file_path(item, result_type="images_dir"), if CBZUtils.packComicChapterCBZ(src_dir= super().get_file_path(item, result_type="images_dir"),
dts_path= cbz_path, dts_path= cbz_path,
comic_info_images= comic_info['Pages'], remove=True): comic_info_images= comic_info['Pages'], remove=True):
self.update_icon(item) super().update_icon(item)
self.pack_icon(item) self.pack_icon(item)
# CBZ校验失败 # CBZ校验失败
else: else:

View File

@ -34,7 +34,6 @@ class RmComicSpider(scrapy.Spider):
# 打包导出item数据 # 打包导出item数据
item = comic_item.load_item(chapter=chapter) item = comic_item.load_item(chapter=chapter)
# 获取最终存放CBZ的路径 # 获取最终存放CBZ的路径
#cbz_path = ComicPath(item=item).file_path(ComicPath.PATH_CBZ, convert=True, chapter=chapter)
cbz_path = ComicPath(item=item).PATH_CBZ() cbz_path = ComicPath(item=item).PATH_CBZ()
# 校验繁体和简体中文CBZ路径是否存在 # 校验繁体和简体中文CBZ路径是否存在
# if not checkUtils().is_error(item) and os.path.exists(cbz_path): # if not checkUtils().is_error(item) and os.path.exists(cbz_path):
@ -65,7 +64,7 @@ class RmComicSpider(scrapy.Spider):
# 加密数据API处理 # 加密数据API处理
def parse_chapter_api(self, response): def parse_chapter_api(self, response):
comic_item = ComicLoader(item=response.meta['item'], response=response) comic_item = ComicLoader(item=response.meta['item'], response=response)
item: ComicLoader = Conf().parse_chapter(item=comic_item, value=response.text) item: ComicLoader = Conf().parse_chapter_api(item=comic_item, value=response.text)
yield item.load_item() yield item.load_item()

View File

@ -59,6 +59,9 @@ class Conf():
def parse_chapter(self,item: ComicLoader, value): def parse_chapter(self,item: ComicLoader, value):
return self.comic(item.get_project_name(), item, "parse_chapter", value) return self.comic(item.get_project_name(), item, "parse_chapter", value)
def parse_chapter_api(self, item: ComicLoader, value):
return self.comic(item.get_project_name(), item, "parse_chapter_api", value)
# 文件操作类 # 文件操作类
class fileUtils: class fileUtils:
@ -627,6 +630,7 @@ class ComicPath:
MAPPING_ICON_CACHE = "icon_cache" MAPPING_ICON_CACHE = "icon_cache"
MAPPING_CBZ = "cbz" MAPPING_CBZ = "cbz"
MAPPING_IMAGES_DIR = "images_dir" MAPPING_IMAGES_DIR = "images_dir"
MAPPING_COMIC_JSON = "comic_json"
def PATH_MAPPING(self): def PATH_MAPPING(self):
if self.project == None or self.name == None or self.chapter == None: return None if self.project == None or self.name == None or self.chapter == None: return None
@ -639,11 +643,14 @@ class ComicPath:
self.MAPPING_ICON: os.path.join(self.project, "icons", self.name, self.name+".jpg"), self.MAPPING_ICON: os.path.join(self.project, "icons", self.name, self.name+".jpg"),
self.MAPPING_ICON_CACHE: os.path.join(self.project, "icons", ".cache", self.name+".jpg"), self.MAPPING_ICON_CACHE: os.path.join(self.project, "icons", ".cache", self.name+".jpg"),
self.MAPPING_CBZ: os.path.join(settings.CBZ_EXPORT_PATH, self.project, self.name, self.chapter+".CBZ"), self.MAPPING_CBZ: os.path.join(settings.CBZ_EXPORT_PATH, self.project, self.name, self.chapter+".CBZ"),
self.MAPPING_IMAGES_DIR: os.path.join(settings.IMAGES_STORE, self.project, "images", self.name, self.chapter) self.MAPPING_IMAGES_DIR: os.path.join(settings.IMAGES_STORE, self.project, "images", self.name, self.chapter),
self.MAPPING_COMIC_JSON: os.path.join(settings.IMAGES_STORE, self.project, "json", self.name, self.chapter+".json")
} }
def PATH_CBZ(self, result_type=MAPPING_CBZ): return self.file_path(result_type=result_type) def PATH_CBZ(self, result_type=MAPPING_CBZ): return self.file_path(result_type=result_type)
def getDirJosnComicChapter(self, result_type=MAPPING_COMIC_JSON): return self.file_path(result_type=result_type)
def file_path(self, result_type=MAPPING_IMAGE, file=None, convert=True, chapter=None): def file_path(self, result_type=MAPPING_IMAGE, file=None, convert=True, chapter=None):
if chapter != None: self.chapter = chapter if chapter != None: self.chapter = chapter
path = self.PATH_MAPPING().get(result_type, None) path = self.PATH_MAPPING().get(result_type, None)
@ -659,15 +666,9 @@ class ComicPath:
# 格式化为年-月-日 # 格式化为年-月-日
return today.strftime("%Y%m%d") return today.strftime("%Y%m%d")
@classmethod #@classmethod
def getDirComicChapter(cls, item, categorize=""): #def getDirComicChapter(cls, result_type=): return cls.file_path(result_type=result_type)
comic = ComicLoader(item=item)
return os.path.join(OUTPUT_DIR, comic.get_project_name(), categorize, comic.get_name(), comic.get_chapter())
@classmethod
def getDirJosnComicChapter(cls, item):
return cls.getDirComicChapter(item=item, categorize="json")
@classmethod @classmethod
def getFileScrambleImageName(cls,count,block,suffix=".jpg"): return cls.PREFIX_SCRAMBLE+str(block)+"_"+str(count)+suffix def getFileScrambleImageName(cls,count,block,suffix=".jpg"): return cls.PREFIX_SCRAMBLE+str(block)+"_"+str(count)+suffix
@ -710,9 +711,6 @@ class ComicPath:
@classmethod @classmethod
def new_file_name(cls, name): return cls.fix_file_name(cls.chinese_convert(name)) def new_file_name(cls, name): return cls.fix_file_name(cls.chinese_convert(name))
def path_cbz(self):
return self.file_path(result_type=self.PATH_CBZ, convert=True)
@classmethod @classmethod
def images_icon(cls, file, count): def images_icon(cls, file, count):
if count == 0: return file if count == 0: return file