ComicScrapy/Comics/exporters.py

import os.path,json,ast

from Comics.settings import COMIC_INFO_FIELDS_TO_EXPORT
from scrapy.exporters import XmlItemExporter
from scrapy.exporters import PythonItemExporter
from scrapy.exporters import JsonItemExporter
from Comics.items import ComicInfoItem
from Comics.items import ComicItem
from Comics.settings import COMIC_INFO_XML_STORE
from Comics.utils.Constant import ComicPath
from scrapy.utils.python import is_listlike, to_bytes, to_unicode
from itemadapter import ItemAdapter

class CommonExporter():
    def getPath(self, file , sufix=None):
        sufix = "."+sufix
        dirname = os.path.dirname(file)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        if sufix != None and sufix not in file:
            file = file + sufix
        return file

class ItemExporter(PythonItemExporter):
    def convert(self, data):
        if isinstance(data, bytes):  return data.decode("utf-8")
        if isinstance(data, dict):   return dict(map(self.convert, data.items()))
        if isinstance(data, tuple):  return map(self.convert, data)
        if isinstance(data, list):   return [self.convert(i) for i in data]
        return data

    def export_obj(self, obj_item):
        self.start_exporting()
        obj_item = self.convert(self.export_item(obj_item))
        self.finish_exporting()
        return obj_item

class JsonExport(JsonItemExporter):
    def __init__(self, file, **kwargs):
        file = CommonExporter().getPath(file=file, sufix= "json")
        self.file = open(file, "wb")
        super(JsonExport, self).__init__(self.file, **kwargs)

    def export_json(self, json_object, if_return=False):
        self.start_exporting()
        self.export_item(json_object)
        self.finish_exporting()
        self.file.close()
        if if_return:
            return ItemExporter().export_obj(json_object)


class ComicInfoXmlItemExporter(XmlItemExporter):
    custom_root_element = "ComicInfo"
    def __init__(self, comic, chapter):
        file_path = os.path.join(COMIC_INFO_XML_STORE, comic,
                                 chapter, f"{self.custom_root_element}.xml")
        dir_path = os.path.dirname(file_path)
        if not os.path.exists(dir_path): os.makedirs(dir_path)
        self.xml_file = open(file_path, "wb")
        super(ComicInfoXmlItemExporter, self).__init__(self.xml_file,
                                                      root_element=self.custom_root_element,
                                                       indent=1,fields_to_export=COMIC_INFO_FIELDS_TO_EXPORT)

    def serialize_field(self, field, name, value):
        #通过序列化
        value = ComicPath.chinese_convert(value)
        return super().serialize_field(field, name, value)

    def start_exporting(self):
        self.xg.startDocument()
        self.xg.startElement(self.custom_root_element, {})

    def comic_to_info_item(self, comic_item):
        comic_info = {}
        info_item = ItemAdapter(ComicInfoItem())
        comic_info_dict = {}
        for field in info_item.field_names():
            meta_info = info_item.get_field_meta(field).get('info')
            if meta_info is not None:
                comic_info_dict[meta_info] = field
        for key, value in ComicItem(comic_item).items():
            new_key = comic_info_dict.get(key)
            if new_key is not None:
                comic_info[new_key] = value
        return ItemExporter().export_obj(ComicInfoItem(comic_info))

    def export_item(self, item):
        comic_info = self.comic_to_info_item(item)
        child_element = "Page"
        self._beautify_indent(depth=1)
        self._beautify_newline()
        for name, value in self._get_serialized_fields(comic_info, default_value=""):
            if name == "Pages":
               value = ast.literal_eval(value)
            if value is not None or value != "":
                self._export_xml_field(name, value, depth=2, child_element=child_element)
        #self._beautify_indent(depth=1)
        return comic_info

    def _export_xml_field(self, name, serialized_value, depth, child_element="value"):
        self._beautify_indent(depth=depth)
        self.xg.startElement(name, {})
        if hasattr(serialized_value, "items"):
            self._beautify_newline()
            for sub_name, value in serialized_value.items():
                self._export_xml_field(sub_name, value, depth=depth + 1)
            self._beautify_indent(depth=depth)
        elif is_listlike(serialized_value):
            self._beautify_newline()
            for value in serialized_value:
                self._export_xml_field(child_element, value, depth=depth + 1)
            self._beautify_indent(depth=depth)
        elif isinstance(serialized_value, str):
            self.xg.characters(serialized_value)
        else:
            self.xg.characters(str(serialized_value))
        self.xg.endElement(name)
        self._beautify_newline()

    def finish_exporting(self):
        self.xg.endElement(self.custom_root_element)
        self.xg.endDocument()
        self.xml_file.close()

    def export_xml(self, item):
        self.start_exporting()
        comic_info = self.export_item(item)
        self.finish_exporting()
        return comic_info