56 lines
2.1 KiB
Python
56 lines
2.1 KiB
Python
import json
|
|
from scrapy.loader import ItemLoader
|
|
|
|
class ComicLoader(ItemLoader):
|
|
def parseExec(cls,data,exec):
|
|
if data !=None and exec != None:
|
|
dots = str(exec).split(".")
|
|
if not isinstance(data,dict): data = json.loads(data)
|
|
for dot in dots:
|
|
data = data.get(dot)
|
|
return data
|
|
|
|
def add_xpath(self, field_name, xpath, *processors, index=None, exec=None, re=None, **kw):
|
|
"""
|
|
Similar to :meth:`ItemLoader.add_value` but receives an XPath instead of a
|
|
value, which is used to extract a list of strings from the
|
|
selector associated with this :class:`ItemLoader`.
|
|
|
|
See :meth:`get_xpath` for ``kwargs``.
|
|
|
|
:param xpath: the XPath to extract data from
|
|
:type xpath: str
|
|
|
|
Examples::
|
|
|
|
# HTML snippet: <p class="product-name">Color TV</p>
|
|
loader.add_xpath('name', '//p[@class="product-name"]')
|
|
# HTML snippet: <p id="price">the price is $1200</p>
|
|
loader.add_xpath('price', '//p[@id="price"]', re='the price is (.*)')
|
|
|
|
"""
|
|
values = self._get_xpathvalues(xpath, **kw)
|
|
if exec is not None:
|
|
values = self.parseExec(values, exec)
|
|
if index is not None:
|
|
values = values[index]
|
|
self.add_value(field_name, values, *processors, re=re, **kw)
|
|
|
|
def add_exec(self, field_name, value, str_exec=None, *processors, re=None, **kw):
|
|
if str_exec is not None:
|
|
value = self.parseExec(value, str_exec)
|
|
self.add_value(field_name, value, *processors, re=re, **kw)
|
|
|
|
def get_exec(self, value, str_exec):
|
|
return self.parseExec(value, str_exec)
|
|
|
|
def add_value(self, field_name, value, *processors, re=None, **kw):
|
|
if self.auto_replace_value(field_name, value):
|
|
return super().add_value(field_name, value, *processors, re=re, **kw)
|
|
|
|
|
|
def auto_replace_value(self, field_name, value):
|
|
if self.get_output_value(field_name) != None:
|
|
self._replace_value(field_name, value)
|
|
return False
|
|
else: return True |