ComicScrapy/Comics/loader.py
2023-06-20 02:52:51 +08:00

56 lines
2.1 KiB
Python

import json
from scrapy.loader import ItemLoader
class ComicLoader(ItemLoader):
def parseExec(cls,data,exec):
if data !=None and exec != None:
dots = str(exec).split(".")
if not isinstance(data,dict): data = json.loads(data)
for dot in dots:
data = data.get(dot)
return data
def add_xpath(self, field_name, xpath, *processors, index=None, exec=None, re=None, **kw):
"""
Similar to :meth:`ItemLoader.add_value` but receives an XPath instead of a
value, which is used to extract a list of strings from the
selector associated with this :class:`ItemLoader`.
See :meth:`get_xpath` for ``kwargs``.
:param xpath: the XPath to extract data from
:type xpath: str
Examples::
# HTML snippet: <p class="product-name">Color TV</p>
loader.add_xpath('name', '//p[@class="product-name"]')
# HTML snippet: <p id="price">the price is $1200</p>
loader.add_xpath('price', '//p[@id="price"]', re='the price is (.*)')
"""
values = self._get_xpathvalues(xpath, **kw)
if exec is not None:
values = self.parseExec(values, exec)
if index is not None:
values = values[index]
self.add_value(field_name, values, *processors, re=re, **kw)
def add_exec(self, field_name, value, str_exec=None, *processors, re=None, **kw):
if str_exec is not None:
value = self.parseExec(value, str_exec)
self.add_value(field_name, value, *processors, re=re, **kw)
def get_exec(self, value, str_exec):
return self.parseExec(value, str_exec)
def add_value(self, field_name, value, *processors, re=None, **kw):
if self.auto_replace_value(field_name, value):
return super().add_value(field_name, value, *processors, re=re, **kw)
def auto_replace_value(self, field_name, value):
if self.get_output_value(field_name) != None:
self._replace_value(field_name, value)
return False
else: return True