This commit is contained in:
caiwx86 2024-10-28 04:41:46 +08:00
parent e9a65d1be1
commit 4f75e2eab9
2 changed files with 12 additions and 3 deletions

View File

@ -49,6 +49,13 @@ def serialize_to_images(value): return _serialize_to_images(value)
# 图像链接处理方法
def serialize_to_image_urls(value): return _serialize_to_images(value, result_type="image_urls")
# 序列化-作者
def serializer_to_writer(value):
(list_value, value) = [[], str(value).replace("&", " ")]
for v in set(str(value).split(" ")):
list_value.append(v)
return ComicPath.chinese_convert(",".join(list_value))
# ComicItem
class ComicItem(Item):
# 工程
@ -70,7 +77,7 @@ class ComicItem(Item):
# 图片链接
list_img = Field(serializer=serialize_to_images)
# 作者
author = Field(serialize_to_chinese=serialize_to_chinese, output_processor=TakeFirst())
author = Field(serializer=serializer_to_writer, output_processor=TakeFirst())
# 封面链接
icon = Field(output_processor=TakeFirst())
# 标签

View File

@ -17,7 +17,9 @@ from scrapy.pipelines.images import ImagesPipeline
from Comics._utils.ComicInfo import ComicInfoXml
class ComicsPipeline():
'''
解析前端传入的item数据
'''
# item就是yield后面的对象
def process_item(self, item: ComicItem, spider):
if isinstance(item, ComicItem):