This commit is contained in:
caiwx86 2023-05-16 07:36:12 +08:00
parent 3c3f8846cb
commit fe8699cc12
2 changed files with 27 additions and 1 deletions

View File

@ -13,6 +13,8 @@ from Comics.items import ComicItem
from Comics.items import ImageItem
from scrapy.pipelines.images import ImagesPipeline
from scrapy.exporters import XmlItemExporter
from itemadapter import ItemAdapter
class ComicsPipeline:
def open_spider(self,spider):
@ -71,5 +73,28 @@ class ImgDownloadPipeline(ImagesPipeline):
img_path = os.path.join(settings.IMAGES_STORE,img['path'])
#解密图片
imageUtils.deScrambleImagesByPath(img_path)
XmlItemExporter()
return item
class ComicInfoXmlPipeline:
def open_spider(self, spider):
self.xml_exporter = {}
def close_spider(self, spider):
for exporter, xml_file in self.xml_exporter.values():
exporter.finish_exporting()
xml_file.close()
def _exporter_for_item(self, item):
adapter = ItemAdapter(item)
xml_file = open("ComicInfo.xml", "wb")
exporter = XmlItemExporter(xml_file)
exporter.start_exporting()
self.xml_exporter = (exporter, xml_file)
return self.xml_exporter
def process_item(self, item, spider):
exporter = self._exporter_for_item(item)
exporter.export_item(item)
return item

View File

@ -82,6 +82,7 @@ ITEM_PIPELINES = {
'Comics.pipelines.ComicsPipeline': 300,
'Comics.pipelines.ImageParsePipeline': 400,
'Comics.pipelines.ImgDownloadPipeline': 500,
'Comics.pipelines.ComicInfoXmlPipeline': 600,
}
# Enable and configure the AutoThrottle extension (disabled by default)