diff --git a/Comics/_utils/items.py b/Comics/_utils/items.py index 6272242..b73bd96 100644 --- a/Comics/_utils/items.py +++ b/Comics/_utils/items.py @@ -49,6 +49,13 @@ def serialize_to_images(value): return _serialize_to_images(value) # 图像链接处理方法 def serialize_to_image_urls(value): return _serialize_to_images(value, result_type="image_urls") +# 序列化-作者 +def serializer_to_writer(value): + (list_value, value) = [[], str(value).replace("&", " ")] + for v in set(str(value).split(" ")): + list_value.append(v) + return ComicPath.chinese_convert(",".join(list_value)) + # ComicItem class ComicItem(Item): # 工程 @@ -70,7 +77,7 @@ class ComicItem(Item): # 图片链接 list_img = Field(serializer=serialize_to_images) # 作者 - author = Field(serialize_to_chinese=serialize_to_chinese, output_processor=TakeFirst()) + author = Field(serializer=serializer_to_writer, output_processor=TakeFirst()) # 封面链接 icon = Field(output_processor=TakeFirst()) # 标签 diff --git a/Comics/pipelines.py b/Comics/pipelines.py index ad8ebc1..7e5e569 100644 --- a/Comics/pipelines.py +++ b/Comics/pipelines.py @@ -17,7 +17,9 @@ from scrapy.pipelines.images import ImagesPipeline from Comics._utils.ComicInfo import ComicInfoXml class ComicsPipeline(): - + ''' + 解析前端传入的item数据 + ''' # item就是yield后面的对象 def process_item(self, item: ComicItem, spider): if isinstance(item, ComicItem): @@ -29,7 +31,7 @@ class ComicsPipeline(): return result_item class BaseImagesPipeline(ImagesPipeline): - + def image_scramble_exits(self, item,image_path): en_image_path = ComicPath(item).getFileScrambleImageSave(image_path, relative="fullpath") return fu.exists(fu.join(settings.IMAGES_STORE, self.get_file_path(item, en_image_path)))