This commit is contained in:
caiwx86 2024-03-31 18:18:16 +08:00
parent 1c85177352
commit 7499cd78c7
3 changed files with 37 additions and 39 deletions

View File

@ -23,11 +23,11 @@ class ComicsPipeline():
if isinstance(item, ComicItem):
# 'output/rm_comic/json/壞X/第1話 壞X'
# 已存在漫画CBZ文件 调用转换
if fu.exists(ComicPath(item).path_cbz()): return ItemExporter().export_obj(item)
if fu.exists(ComicPath(item).PATH_CBZ()): return ItemExporter().export_obj(item)
# 不存在漫画CBZ文件
else: return JsonExport(file=ComicPath(item).getDirJosnComicChapter(item)).export_json(ComicLoader(item).load_item(), if_return=True)
else: return JsonExport(file=ComicPath(item).getDirJosnComicChapter()).export_json(ComicLoader(item).load_item(), if_return=True)
class BaseComicImagesPipeline(ImagesPipeline):
class BaseImagesPipeline(ImagesPipeline):
def image_scramble_exits(self, item,image_path):
en_image_path = ComicPath(item).getFileScrambleImageSave(image_path, relative="fullpath")
@ -39,6 +39,15 @@ class BaseComicImagesPipeline(ImagesPipeline):
# 封面路径
def file_path(self, request, response=None, info=None, *, item=None): return request.meta['path']
# 判断是否需要更新封面
def update_icon(self, item):
# 下载后的缓存封面路径
image_path = self.get_file_path(item, result_type="down_cache_icon")
# 最终封面保存路径
save_path = self.get_file_path(item=item, result_type="down_icon")
fu.update_icon(image_path, save_path)
def success_completed(self, item, results):
is_success = False
fail_data = []
@ -49,34 +58,26 @@ class BaseComicImagesPipeline(ImagesPipeline):
return is_success
# 封面下载操作类
class IconDownloadPipeline(BaseComicImagesPipeline):
class IconDownloadPipeline(BaseImagesPipeline):
# 数据处理
def get_media_requests(self, item, info):
comic = ComicLoader(item=item)
# 获取封面链接和封面保存路径
icon_url, icon_cache_path = [ comic.get_icon(), self.get_file_path(item, result_type="icon_cache") ]
icon_url, icon_cache_path = [ comic.get_icon(), super().get_file_path(item, result_type="icon_cache") ]
# 封面已存在
if fu.exists(icon_cache_path): return False
else: yield scrapy.Request(url=icon_url, meta={'path': icon_cache_path })
# 判断是否需要更新封面
def update_icon(self, item):
# 下载后的缓存封面路径
image_path = self.get_file_path(item, result_type="down_cache_icon")
# 最终封面保存路径
save_path = self.get_file_path(item=item, result_type="down_icon")
fu.update_icon(image_path, save_path)
def item_completed(self, results, item, info):
if self.success_completed(item, results):
if super().success_completed(item, results):
print(" icon download success")
# 更新封面到Icon文件夹内
self.update_icon(item)
super().update_icon(item)
return item
class ImgDownloadPipeline(BaseComicImagesPipeline):
class ImgDownloadPipeline(BaseImagesPipeline):
def get_media_requests(self, item, info):
@ -85,9 +86,9 @@ class ImgDownloadPipeline(BaseComicImagesPipeline):
# 添加封面下载信息至下载列表中
# self.add_download_icon(item)
for image_url,image in zip(self.image_urls,self.images):
if_down, image_path = [ True, self.get_file_path(item, image)]
if_down, image_path = [ True, super().get_file_path(item, image)]
# 图像(含加密图像)已存在
if self.image_scramble_exits(item, image_path):
if super().image_scramble_exits(item, image_path):
#if image_path == self.get_file_path(item, result_type="icon_cache"):
# logging.info(f"icon file exists: IMAGE_STORE {image_path}")
#else:
@ -99,8 +100,8 @@ class ImgDownloadPipeline(BaseComicImagesPipeline):
# 打包cbz封面
def pack_icon(self, item):
cbz_icon = self.get_file_path(item=item, result_type="cbz_icon")
dwn_icon = self.get_file_path(item=item, result_type="down_icon")
cbz_icon = super().get_file_path(item=item, result_type="cbz_icon")
dwn_icon = super().get_file_path(item=item, result_type="down_icon")
base_dir = fu.dirname(dwn_icon)
name = fu.basename(dwn_icon).split(".")[0]
for dirname in os.listdir(base_dir):
@ -112,7 +113,7 @@ class ImgDownloadPipeline(BaseComicImagesPipeline):
def item_completed(self, results, item, info):
# return item
# 打包
cbz_path = self.get_file_path(item, result_type="cbz")
cbz_path = super().get_file_path(item, result_type="cbz")
success_data = []
for result in results:
if result[0]: success_data.append(result[1])
@ -122,11 +123,11 @@ class ImgDownloadPipeline(BaseComicImagesPipeline):
self.pack_icon(item)
else:
# ComicInfoXml 生成
comic_info = ComicInfoXmlItemExporter(dir=self.get_file_path(item=item, result_type="comic_info")).export_xml(item)
if CBZUtils.packComicChapterCBZ(src_dir= self.get_file_path(item, result_type="images_dir"),
comic_info = ComicInfoXmlItemExporter(dir=super().get_file_path(item=item, result_type="comic_info")).export_xml(item)
if CBZUtils.packComicChapterCBZ(src_dir= super().get_file_path(item, result_type="images_dir"),
dts_path= cbz_path,
comic_info_images= comic_info['Pages'], remove=True):
self.update_icon(item)
super().update_icon(item)
self.pack_icon(item)
# CBZ校验失败
else:

View File

@ -34,7 +34,6 @@ class RmComicSpider(scrapy.Spider):
# 打包导出item数据
item = comic_item.load_item(chapter=chapter)
# 获取最终存放CBZ的路径
#cbz_path = ComicPath(item=item).file_path(ComicPath.PATH_CBZ, convert=True, chapter=chapter)
cbz_path = ComicPath(item=item).PATH_CBZ()
# 校验繁体和简体中文CBZ路径是否存在
# if not checkUtils().is_error(item) and os.path.exists(cbz_path):
@ -65,7 +64,7 @@ class RmComicSpider(scrapy.Spider):
# 加密数据API处理
def parse_chapter_api(self, response):
comic_item = ComicLoader(item=response.meta['item'], response=response)
item: ComicLoader = Conf().parse_chapter(item=comic_item, value=response.text)
item: ComicLoader = Conf().parse_chapter_api(item=comic_item, value=response.text)
yield item.load_item()

View File

@ -59,6 +59,9 @@ class Conf():
def parse_chapter(self,item: ComicLoader, value):
return self.comic(item.get_project_name(), item, "parse_chapter", value)
def parse_chapter_api(self, item: ComicLoader, value):
return self.comic(item.get_project_name(), item, "parse_chapter_api", value)
# 文件操作类
class fileUtils:
@ -627,6 +630,7 @@ class ComicPath:
MAPPING_ICON_CACHE = "icon_cache"
MAPPING_CBZ = "cbz"
MAPPING_IMAGES_DIR = "images_dir"
MAPPING_COMIC_JSON = "comic_json"
def PATH_MAPPING(self):
if self.project == None or self.name == None or self.chapter == None: return None
@ -639,11 +643,14 @@ class ComicPath:
self.MAPPING_ICON: os.path.join(self.project, "icons", self.name, self.name+".jpg"),
self.MAPPING_ICON_CACHE: os.path.join(self.project, "icons", ".cache", self.name+".jpg"),
self.MAPPING_CBZ: os.path.join(settings.CBZ_EXPORT_PATH, self.project, self.name, self.chapter+".CBZ"),
self.MAPPING_IMAGES_DIR: os.path.join(settings.IMAGES_STORE, self.project, "images", self.name, self.chapter)
self.MAPPING_IMAGES_DIR: os.path.join(settings.IMAGES_STORE, self.project, "images", self.name, self.chapter),
self.MAPPING_COMIC_JSON: os.path.join(settings.IMAGES_STORE, self.project, "json", self.name, self.chapter+".json")
}
def PATH_CBZ(self, result_type=MAPPING_CBZ): return self.file_path(result_type=result_type)
def getDirJosnComicChapter(self, result_type=MAPPING_COMIC_JSON): return self.file_path(result_type=result_type)
def file_path(self, result_type=MAPPING_IMAGE, file=None, convert=True, chapter=None):
if chapter != None: self.chapter = chapter
path = self.PATH_MAPPING().get(result_type, None)
@ -659,15 +666,9 @@ class ComicPath:
# 格式化为年-月-日
return today.strftime("%Y%m%d")
@classmethod
def getDirComicChapter(cls, item, categorize=""):
comic = ComicLoader(item=item)
return os.path.join(OUTPUT_DIR, comic.get_project_name(), categorize, comic.get_name(), comic.get_chapter())
#@classmethod
#def getDirComicChapter(cls, result_type=): return cls.file_path(result_type=result_type)
@classmethod
def getDirJosnComicChapter(cls, item):
return cls.getDirComicChapter(item=item, categorize="json")
@classmethod
def getFileScrambleImageName(cls,count,block,suffix=".jpg"): return cls.PREFIX_SCRAMBLE+str(block)+"_"+str(count)+suffix
@ -710,9 +711,6 @@ class ComicPath:
@classmethod
def new_file_name(cls, name): return cls.fix_file_name(cls.chinese_convert(name))
def path_cbz(self):
return self.file_path(result_type=self.PATH_CBZ, convert=True)
@classmethod
def images_icon(cls, file, count):
if count == 0: return file