import os.path,json,ast from Comics.settings import COMIC_INFO_FIELDS_TO_EXPORT from scrapy.exporters import XmlItemExporter from scrapy.exporters import PythonItemExporter from scrapy.exporters import JsonItemExporter from Comics.items import ComicInfoItem from Comics.items import ComicItem from Comics.settings import COMIC_INFO_XML_STORE from Comics.utils.Constant import ComicPath from scrapy.utils.python import is_listlike, to_bytes, to_unicode from itemadapter import ItemAdapter class CommonExporter(): def getPath(self, file , sufix=None): sufix = "."+sufix dirname = os.path.dirname(file) if not os.path.exists(dirname): os.makedirs(dirname) if sufix != None and sufix not in file: file = file + sufix return file class ItemExporter(PythonItemExporter): def convert(self, data): if isinstance(data, bytes): return data.decode("utf-8") if isinstance(data, dict): return dict(map(self.convert, data.items())) if isinstance(data, tuple): return map(self.convert, data) if isinstance(data, list): return [self.convert(i) for i in data] return data def export_obj(self, obj_item): self.start_exporting() obj_item = self.convert(self.export_item(obj_item)) self.finish_exporting() return obj_item class JsonExport(JsonItemExporter): def __init__(self, file, **kwargs): file = CommonExporter().getPath(file=file, sufix= "json") self.file = open(file, "wb") super(JsonExport, self).__init__(self.file, **kwargs) def export_json(self, json_object, if_return=False): self.start_exporting() self.export_item(json_object) self.finish_exporting() self.file.close() if if_return: return ItemExporter().export_obj(json_object) class ComicInfoXmlItemExporter(XmlItemExporter): custom_root_element = "ComicInfo" def __init__(self, comic, chapter): file_path = os.path.join(COMIC_INFO_XML_STORE, comic, chapter, f"{self.custom_root_element}.xml") dir_path = os.path.dirname(file_path) if not os.path.exists(dir_path): os.makedirs(dir_path) self.xml_file = open(file_path, "wb") super(ComicInfoXmlItemExporter, self).__init__(self.xml_file, root_element=self.custom_root_element, indent=1,fields_to_export=COMIC_INFO_FIELDS_TO_EXPORT) def serialize_field(self, field, name, value): #通过序列化 value = ComicPath.chinese_convert(value) return super().serialize_field(field, name, value) def start_exporting(self): self.xg.startDocument() self.xg.startElement(self.custom_root_element, {}) def comic_to_info_item(self, comic_item): comic_info = {} info_item = ItemAdapter(ComicInfoItem()) comic_info_dict = {} for field in info_item.field_names(): meta_info = info_item.get_field_meta(field).get('info') if meta_info is not None: comic_info_dict[meta_info] = field for key, value in ComicItem(comic_item).items(): new_key = comic_info_dict.get(key) if new_key is not None: comic_info[new_key] = value return ItemExporter().export_obj(ComicInfoItem(comic_info)) def export_item(self, item): comic_info = self.comic_to_info_item(item) child_element = "Page" self._beautify_indent(depth=1) self._beautify_newline() for name, value in self._get_serialized_fields(comic_info, default_value=""): if name == "Pages": value = ast.literal_eval(value) if value is not None or value != "": self._export_xml_field(name, value, depth=2, child_element=child_element) #self._beautify_indent(depth=1) return comic_info def _export_xml_field(self, name, serialized_value, depth, child_element="value"): self._beautify_indent(depth=depth) self.xg.startElement(name, {}) if hasattr(serialized_value, "items"): self._beautify_newline() for sub_name, value in serialized_value.items(): self._export_xml_field(sub_name, value, depth=depth + 1) self._beautify_indent(depth=depth) elif is_listlike(serialized_value): self._beautify_newline() for value in serialized_value: self._export_xml_field(child_element, value, depth=depth + 1) self._beautify_indent(depth=depth) elif isinstance(serialized_value, str): self.xg.characters(serialized_value) else: self.xg.characters(str(serialized_value)) self.xg.endElement(name) self._beautify_newline() def finish_exporting(self): self.xg.endElement(self.custom_root_element) self.xg.endDocument() self.xml_file.close() def export_xml(self, item): self.start_exporting() comic_info = self.export_item(item) self.finish_exporting() return comic_info