This commit is contained in:
caiwx86 2024-10-28 04:41:46 +08:00
parent e9a65d1be1
commit 4f75e2eab9
2 changed files with 12 additions and 3 deletions

View File

@ -49,6 +49,13 @@ def serialize_to_images(value): return _serialize_to_images(value)
# 图像链接处理方法
def serialize_to_image_urls(value): return _serialize_to_images(value, result_type="image_urls")
# 序列化-作者
def serializer_to_writer(value):
(list_value, value) = [[], str(value).replace("&", " ")]
for v in set(str(value).split(" ")):
list_value.append(v)
return ComicPath.chinese_convert(",".join(list_value))
# ComicItem
class ComicItem(Item):
# 工程
@ -70,7 +77,7 @@ class ComicItem(Item):
# 图片链接
list_img = Field(serializer=serialize_to_images)
# 作者
author = Field(serialize_to_chinese=serialize_to_chinese, output_processor=TakeFirst())
author = Field(serializer=serializer_to_writer, output_processor=TakeFirst())
# 封面链接
icon = Field(output_processor=TakeFirst())
# 标签

View File

@ -17,7 +17,9 @@ from scrapy.pipelines.images import ImagesPipeline
from Comics._utils.ComicInfo import ComicInfoXml
class ComicsPipeline():
'''
解析前端传入的item数据
'''
# item就是yield后面的对象
def process_item(self, item: ComicItem, spider):
if isinstance(item, ComicItem):
@ -29,7 +31,7 @@ class ComicsPipeline():
return result_item
class BaseImagesPipeline(ImagesPipeline):
def image_scramble_exits(self, item,image_path):
en_image_path = ComicPath(item).getFileScrambleImageSave(image_path, relative="fullpath")
return fu.exists(fu.join(settings.IMAGES_STORE, self.get_file_path(item, en_image_path)))