131 lines
5.3 KiB
Python
131 lines
5.3 KiB
Python
import os.path,json,ast
|
|
|
|
from Comics.settings import COMIC_INFO_FIELDS_TO_EXPORT
|
|
from scrapy.exporters import XmlItemExporter
|
|
from scrapy.exporters import PythonItemExporter
|
|
from scrapy.exporters import JsonItemExporter
|
|
from Comics.items import ComicInfoItem
|
|
from Comics.items import ComicItem
|
|
from Comics.settings import COMIC_INFO_XML_STORE
|
|
from Comics.utils.Constant import ComicPath
|
|
from scrapy.utils.python import is_listlike, to_bytes, to_unicode
|
|
from itemadapter import ItemAdapter
|
|
|
|
class CommonExporter():
|
|
def getPath(self, file , sufix=None):
|
|
sufix = "."+sufix
|
|
dirname = os.path.dirname(file)
|
|
if not os.path.exists(dirname):
|
|
os.makedirs(dirname)
|
|
if sufix != None and sufix not in file:
|
|
file = file + sufix
|
|
return file
|
|
|
|
class ItemExporter(PythonItemExporter):
|
|
def convert(self, data):
|
|
if isinstance(data, bytes): return data.decode("utf-8")
|
|
if isinstance(data, dict): return dict(map(self.convert, data.items()))
|
|
if isinstance(data, tuple): return map(self.convert, data)
|
|
if isinstance(data, list): return [self.convert(i) for i in data]
|
|
return data
|
|
|
|
def export_obj(self, obj_item):
|
|
self.start_exporting()
|
|
obj_item = self.convert(self.export_item(obj_item))
|
|
self.finish_exporting()
|
|
return obj_item
|
|
|
|
class JsonExport(JsonItemExporter):
|
|
def __init__(self, file, **kwargs):
|
|
file = CommonExporter().getPath(file=file, sufix= "json")
|
|
self.file = open(file, "wb")
|
|
super(JsonExport, self).__init__(self.file, **kwargs)
|
|
|
|
def export_json(self, json_object, if_return=False):
|
|
self.start_exporting()
|
|
self.export_item(json_object)
|
|
self.finish_exporting()
|
|
self.file.close()
|
|
if if_return:
|
|
return ItemExporter().export_obj(json_object)
|
|
|
|
|
|
class ComicInfoXmlItemExporter(XmlItemExporter):
|
|
custom_root_element = "ComicInfo"
|
|
def __init__(self, comic, chapter):
|
|
file_path = os.path.join(COMIC_INFO_XML_STORE, comic,
|
|
chapter, f"{self.custom_root_element}.xml")
|
|
dir_path = os.path.dirname(file_path)
|
|
if not os.path.exists(dir_path): os.makedirs(dir_path)
|
|
self.xml_file = open(file_path, "wb")
|
|
super(ComicInfoXmlItemExporter, self).__init__(self.xml_file,
|
|
root_element=self.custom_root_element,
|
|
indent=1,fields_to_export=COMIC_INFO_FIELDS_TO_EXPORT)
|
|
|
|
def serialize_field(self, field, name, value):
|
|
#通过序列化
|
|
value = ComicPath.chinese_convert(value)
|
|
return super().serialize_field(field, name, value)
|
|
|
|
def start_exporting(self):
|
|
self.xg.startDocument()
|
|
self.xg.startElement(self.custom_root_element, {})
|
|
|
|
def comic_to_info_item(self, comic_item):
|
|
comic_info = {}
|
|
info_item = ItemAdapter(ComicInfoItem())
|
|
comic_info_dict = {}
|
|
for field in info_item.field_names():
|
|
meta_info = info_item.get_field_meta(field).get('info')
|
|
if meta_info is not None:
|
|
comic_info_dict[meta_info] = field
|
|
for key, value in ComicItem(comic_item).items():
|
|
new_key = comic_info_dict.get(key)
|
|
if new_key is not None:
|
|
comic_info[new_key] = value
|
|
return ItemExporter().export_obj(ComicInfoItem(comic_info))
|
|
|
|
def export_item(self, item):
|
|
comic_info = self.comic_to_info_item(item)
|
|
child_element = "Page"
|
|
self._beautify_indent(depth=1)
|
|
self._beautify_newline()
|
|
for name, value in self._get_serialized_fields(comic_info, default_value=""):
|
|
if name == "Pages":
|
|
value = ast.literal_eval(value)
|
|
if value is not None or value != "":
|
|
self._export_xml_field(name, value, depth=2, child_element=child_element)
|
|
#self._beautify_indent(depth=1)
|
|
return comic_info
|
|
|
|
def _export_xml_field(self, name, serialized_value, depth, child_element="value"):
|
|
self._beautify_indent(depth=depth)
|
|
self.xg.startElement(name, {})
|
|
if hasattr(serialized_value, "items"):
|
|
self._beautify_newline()
|
|
for sub_name, value in serialized_value.items():
|
|
self._export_xml_field(sub_name, value, depth=depth + 1)
|
|
self._beautify_indent(depth=depth)
|
|
elif is_listlike(serialized_value):
|
|
self._beautify_newline()
|
|
for value in serialized_value:
|
|
self._export_xml_field(child_element, value, depth=depth + 1)
|
|
self._beautify_indent(depth=depth)
|
|
elif isinstance(serialized_value, str):
|
|
self.xg.characters(serialized_value)
|
|
else:
|
|
self.xg.characters(str(serialized_value))
|
|
self.xg.endElement(name)
|
|
self._beautify_newline()
|
|
|
|
def finish_exporting(self):
|
|
self.xg.endElement(self.custom_root_element)
|
|
self.xg.endDocument()
|
|
self.xml_file.close()
|
|
|
|
def export_xml(self, item):
|
|
self.start_exporting()
|
|
comic_info = self.export_item(item)
|
|
self.finish_exporting()
|
|
return comic_info
|