重构ComicInfo
This commit is contained in:
parent
d6cb8e4251
commit
dca89dccc7
127
ComicInfo_2.1.xsd
Normal file
127
ComicInfo_2.1.xsd
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<xs:schema elementFormDefault="qualified" xmlns:xs="http://www.w3.org/2001/XMLSchema">
|
||||||
|
<xs:element name="ComicInfo" nillable="true" type="ComicInfo"/>
|
||||||
|
<xs:complexType name="ComicInfo">
|
||||||
|
<xs:sequence>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Title" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Series" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Number" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="-1" name="Count" type="xs:int"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="-1" name="Volume" type="xs:int"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="AlternateSeries" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="AlternateNumber" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="-1" name="AlternateCount" type="xs:int"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Summary" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Notes" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="-1" name="Year" type="xs:int"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="-1" name="Month" type="xs:int"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="-1" name="Day" type="xs:int"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Writer" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Penciller" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Inker" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Colorist" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Letterer" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="CoverArtist" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Editor" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Translator" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Publisher" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Imprint" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Genre" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Tags" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Web" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="0" name="PageCount" type="xs:int"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="LanguageISO" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Format" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="Unknown" name="BlackAndWhite" type="YesNo"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="Unknown" name="Manga" type="Manga"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Characters" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Teams" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Locations" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="ScanInformation" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="StoryArc" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="StoryArcNumber" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="SeriesGroup" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="Unknown" name="AgeRating" type="AgeRating"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" name="Pages" type="ArrayOfComicPageInfo"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" name="CommunityRating" type="Rating"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="MainCharacterOrTeam" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="Review" type="xs:string"/>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="1" default="" name="GTIN" type="xs:string"/>
|
||||||
|
</xs:sequence>
|
||||||
|
</xs:complexType>
|
||||||
|
<xs:simpleType name="YesNo">
|
||||||
|
<xs:restriction base="xs:string">
|
||||||
|
<xs:enumeration value="Unknown"/>
|
||||||
|
<xs:enumeration value="No"/>
|
||||||
|
<xs:enumeration value="Yes"/>
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
<xs:simpleType name="Manga">
|
||||||
|
<xs:restriction base="xs:string">
|
||||||
|
<xs:enumeration value="Unknown"/>
|
||||||
|
<xs:enumeration value="No"/>
|
||||||
|
<xs:enumeration value="Yes"/>
|
||||||
|
<xs:enumeration value="YesAndRightToLeft"/>
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
<xs:simpleType name="Rating">
|
||||||
|
<xs:restriction base="xs:decimal">
|
||||||
|
<xs:minInclusive value="0"/>
|
||||||
|
<xs:maxInclusive value="5"/>
|
||||||
|
<xs:fractionDigits value="1"/>
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
<xs:simpleType name="AgeRating">
|
||||||
|
<xs:restriction base="xs:string">
|
||||||
|
<xs:enumeration value="Unknown"/>
|
||||||
|
<xs:enumeration value="Adults Only 18+"/>
|
||||||
|
<xs:enumeration value="Early Childhood"/>
|
||||||
|
<xs:enumeration value="Everyone"/>
|
||||||
|
<xs:enumeration value="Everyone 10+"/>
|
||||||
|
<xs:enumeration value="G"/>
|
||||||
|
<xs:enumeration value="Kids to Adults"/>
|
||||||
|
<xs:enumeration value="M"/>
|
||||||
|
<xs:enumeration value="MA15+"/>
|
||||||
|
<xs:enumeration value="Mature 17+"/>
|
||||||
|
<xs:enumeration value="PG"/>
|
||||||
|
<xs:enumeration value="R18+"/>
|
||||||
|
<xs:enumeration value="Rating Pending"/>
|
||||||
|
<xs:enumeration value="Teen"/>
|
||||||
|
<xs:enumeration value="X18+"/>
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
<xs:complexType name="ArrayOfComicPageInfo">
|
||||||
|
<xs:sequence>
|
||||||
|
<xs:element minOccurs="0" maxOccurs="unbounded" name="Page" nillable="true" type="ComicPageInfo"/>
|
||||||
|
</xs:sequence>
|
||||||
|
</xs:complexType>
|
||||||
|
<xs:complexType name="ComicPageInfo">
|
||||||
|
<xs:attribute name="Image" type="xs:int" use="required"/>
|
||||||
|
<xs:attribute default="Story" name="Type" type="ComicPageType"/>
|
||||||
|
<xs:attribute default="false" name="DoublePage" type="xs:boolean"/>
|
||||||
|
<xs:attribute default="0" name="ImageSize" type="xs:long"/>
|
||||||
|
<xs:attribute default="" name="Key" type="xs:string"/>
|
||||||
|
<xs:attribute default="" name="Bookmark" type="xs:string"/>
|
||||||
|
<xs:attribute default="-1" name="ImageWidth" type="xs:int"/>
|
||||||
|
<xs:attribute default="-1" name="ImageHeight" type="xs:int"/>
|
||||||
|
</xs:complexType>
|
||||||
|
<xs:simpleType name="ComicPageType">
|
||||||
|
<xs:list>
|
||||||
|
<xs:simpleType>
|
||||||
|
<xs:restriction base="xs:string">
|
||||||
|
<xs:enumeration value="FrontCover"/>
|
||||||
|
<xs:enumeration value="InnerCover"/>
|
||||||
|
<xs:enumeration value="Roundup"/>
|
||||||
|
<xs:enumeration value="Story"/>
|
||||||
|
<xs:enumeration value="Advertisement"/>
|
||||||
|
<xs:enumeration value="Editorial"/>
|
||||||
|
<xs:enumeration value="Letters"/>
|
||||||
|
<xs:enumeration value="Preview"/>
|
||||||
|
<xs:enumeration value="BackCover"/>
|
||||||
|
<xs:enumeration value="Other"/>
|
||||||
|
<xs:enumeration value="Deleted"/>
|
||||||
|
</xs:restriction>
|
||||||
|
</xs:simpleType>
|
||||||
|
</xs:list>
|
||||||
|
</xs:simpleType>
|
||||||
|
</xs:schema>
|
||||||
168
Comics/_utils/ComicInfo.py
Normal file
168
Comics/_utils/ComicInfo.py
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
from xml.dom import minidom
|
||||||
|
from typing import List
|
||||||
|
import json,os
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
# Define the ComicInfo and ComicPageInfo classes
|
||||||
|
class ComicInfo:
|
||||||
|
def __init__(self):
|
||||||
|
self.Title: str = ""
|
||||||
|
"""标题"""
|
||||||
|
self.Series: str = ""
|
||||||
|
self.Number: str = ""
|
||||||
|
self.Count: int = -1
|
||||||
|
self.Volume: int = -1
|
||||||
|
self.AlternateSeries: str = ""
|
||||||
|
self.AlternateNumber: str = ""
|
||||||
|
self.AlternateCount: int = -1
|
||||||
|
self.Summary: str = ""
|
||||||
|
self.Notes: str = ""
|
||||||
|
self.Year: int = -1
|
||||||
|
self.Month: int = -1
|
||||||
|
self.Day: int = -1
|
||||||
|
self.Writer: str = ""
|
||||||
|
self.Penciller: str = ""
|
||||||
|
self.Inker: str = ""
|
||||||
|
self.Colorist: str = ""
|
||||||
|
self.Letterer: str = ""
|
||||||
|
self.CoverArtist: str = ""
|
||||||
|
self.Editor: str = ""
|
||||||
|
self.Publisher: str = ""
|
||||||
|
self.Imprint: str = ""
|
||||||
|
self.Genre: str = ""
|
||||||
|
self.Tags: str = ""
|
||||||
|
self.Web: str = ""
|
||||||
|
self.PageCount: int = -1
|
||||||
|
self.LanguageISO: str = ""
|
||||||
|
self.Format: str = ""
|
||||||
|
self.BlackAndWhite: str = ""
|
||||||
|
self.Manga: str = ""
|
||||||
|
self.Characters: str = ""
|
||||||
|
self.Teams: str = ""
|
||||||
|
self.Locations: str = ""
|
||||||
|
self.ScanInformation: str = ""
|
||||||
|
self.StoryArc: str = ""
|
||||||
|
self.SeriesGroup: str = ""
|
||||||
|
self.AgeRating: str = ""
|
||||||
|
self.Pages: List[ComicPageInfo] = []
|
||||||
|
|
||||||
|
class ComicPageInfo:
|
||||||
|
def __init__(self):
|
||||||
|
self.Image: int = -1
|
||||||
|
self.Type: str = "Story"
|
||||||
|
self.DoublePage: bool = False
|
||||||
|
self.ImageSize: int = -1
|
||||||
|
self.Key: str = ""
|
||||||
|
self.Bookmark: str = ""
|
||||||
|
self.ImageWidth: int = -1
|
||||||
|
self.ImageHeight: int = -1
|
||||||
|
|
||||||
|
def toString(self):
|
||||||
|
data = {}
|
||||||
|
def add(key, value):
|
||||||
|
if value != -1 and value != "": data[key] = str(value)
|
||||||
|
add("Image", self.Image)
|
||||||
|
add("ImageSize", self.ImageSize)
|
||||||
|
add("ImageWidth", self.ImageWidth)
|
||||||
|
add("ImageHeight", self.ImageHeight)
|
||||||
|
return data
|
||||||
|
|
||||||
|
class ComicInfoXml:
|
||||||
|
def save_xml_to_file(self, xml_string, filename):
|
||||||
|
"""
|
||||||
|
Save the XML string to a file
|
||||||
|
"""
|
||||||
|
base_dir = os.path.dirname(filename)
|
||||||
|
if not os.path.exists(base_dir): os.makedirs(base_dir)
|
||||||
|
with open(filename, "w", encoding="utf-8") as file:
|
||||||
|
file.write(xml_string)
|
||||||
|
|
||||||
|
def validate_xml_with_xsd_file(self, xml_file, xsd_file, remove=True):
|
||||||
|
"""
|
||||||
|
Validate the XML file against the XSD file
|
||||||
|
"""
|
||||||
|
xml_doc = etree.parse(xml_file)
|
||||||
|
with open(xsd_file, 'r', encoding="utf-8") as file:
|
||||||
|
xsd_doc = etree.XMLSchema(etree.parse(file))
|
||||||
|
try:
|
||||||
|
xsd_doc.assertValid(xml_doc)
|
||||||
|
print("XML is valid according to the XSD.")
|
||||||
|
except etree.DocumentInvalid as e:
|
||||||
|
print("XML is not valid:")
|
||||||
|
print(e)
|
||||||
|
if remove:
|
||||||
|
os.remove(xml_file)
|
||||||
|
|
||||||
|
def parse_comicinfo(self, comic: ComicInfo, save_dir=None, xml_filename="ComicInfo.xml", xsd_filename="ComicInfo_2.1.xsd"):
|
||||||
|
"""_summary_
|
||||||
|
|
||||||
|
Args:
|
||||||
|
comic (ComicInfo): _description_
|
||||||
|
save_dir (_type_, optional): _description_. Defaults to None.
|
||||||
|
xml_filename (str, optional): _description_. Defaults to "ComicInfo.xml".
|
||||||
|
xsd_filename (str, optional): _description_. Defaults to "ComicInfo_2.1.xsd".
|
||||||
|
"""
|
||||||
|
# Serialize to XML with formatted output
|
||||||
|
def serialize_comic_info(comic: ComicInfo) -> str:
|
||||||
|
# Create root element with XML declaration and namespaces
|
||||||
|
comic_elem = ET.Element('ComicInfo')
|
||||||
|
comic_elem.set('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema')
|
||||||
|
comic_elem.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
|
||||||
|
# Add subelements and attributes based on presence and requirements
|
||||||
|
for attr, value in comic.__dict__.items():
|
||||||
|
# if value or (attr in ['Volume', 'Year', 'Month', 'Day', 'PageCount'] and (value == -1 or value == "" ) ): # Check required attributes
|
||||||
|
if value != -1 and value != '':
|
||||||
|
if attr == 'Pages':
|
||||||
|
pages_elem = ET.SubElement(comic_elem, 'Pages')
|
||||||
|
for page in value:
|
||||||
|
cpi = ComicPageInfo()
|
||||||
|
cpi.Image = page.Image
|
||||||
|
cpi.ImageSize = page.ImageSize
|
||||||
|
cpi.ImageWidth = page.ImageWidth
|
||||||
|
cpi.ImageHeight = page.ImageHeight
|
||||||
|
page_elem = ET.SubElement(pages_elem, 'Page', cpi.toString())
|
||||||
|
else:
|
||||||
|
ET.SubElement(comic_elem, attr).text = str(value)
|
||||||
|
|
||||||
|
# Create a formatted XML string
|
||||||
|
xml_str = ET.tostring(comic_elem, encoding='utf-8', method='xml')
|
||||||
|
parsed_xml = minidom.parseString(xml_str)
|
||||||
|
formatted_xml = parsed_xml.toprettyxml(indent=" ", encoding="utf-8") # Adjust the number of spaces for indentation as needed
|
||||||
|
|
||||||
|
# Convert bytes to string and add XML declaration
|
||||||
|
return formatted_xml.decode('utf-8')
|
||||||
|
|
||||||
|
# Serialize the ComicInfo object
|
||||||
|
serialized_xml = serialize_comic_info(comic)
|
||||||
|
print(serialized_xml)
|
||||||
|
|
||||||
|
# 保存数据XML到文件
|
||||||
|
if save_dir != None: xml_filename = os.path.join(save_dir, xml_filename)
|
||||||
|
self.save_xml_to_file(serialized_xml, xml_filename)
|
||||||
|
self.validate_xml_with_xsd_file(xml_filename, xsd_filename) # 将 JSON 转换为 XML
|
||||||
|
#xml_data = json_to_xml_with_declaration(json_data)
|
||||||
|
#print(xml_data)
|
||||||
|
|
||||||
|
def scrapy_xml_by_json(self, json_data, save_dir=None):
|
||||||
|
comic = ComicInfo()
|
||||||
|
comic.Title = json_data.get("chapter", "")
|
||||||
|
comic.Series = json_data.get("name", "")
|
||||||
|
comic.Writer = json_data.get("author", "")
|
||||||
|
comic.AgeRating = json_data.get("age_rating", "")
|
||||||
|
comic.Tags = json_data.get("tags", "")
|
||||||
|
comic.Summary = json_data.get("dep", "")
|
||||||
|
comic.Genre = json_data.get("genre", "")
|
||||||
|
comic.Number = json_data.get("index", "")
|
||||||
|
comic.PageCount = json_data.get("count", "")
|
||||||
|
comic.Writer = json_data.get("author", "")
|
||||||
|
image_names = json_data.get("images", "")
|
||||||
|
pages = []
|
||||||
|
# Adding pages to the comic
|
||||||
|
for image_name in image_names:
|
||||||
|
page = ComicPageInfo()
|
||||||
|
page.Image = image_name.split(".")[0].split("_")[-1]
|
||||||
|
pages.append(page.Image)
|
||||||
|
comic.Pages.append(page)
|
||||||
|
self.parse_comicinfo(comic, save_dir=save_dir)
|
||||||
|
return pages
|
||||||
@ -1,14 +1,6 @@
|
|||||||
import os.path,json,ast
|
import os.path
|
||||||
from Comics.settings import COMIC_INFO_FIELDS_TO_EXPORT
|
|
||||||
from scrapy.exporters import XmlItemExporter
|
|
||||||
from scrapy.exporters import PythonItemExporter
|
from scrapy.exporters import PythonItemExporter
|
||||||
from scrapy.exporters import JsonItemExporter
|
from scrapy.exporters import JsonItemExporter
|
||||||
from Comics.items import ComicInfoItem
|
|
||||||
from Comics.items import ComicItem
|
|
||||||
from Comics.settings import COMIC_INFO_XML_STORE
|
|
||||||
from Comics.utils import ComicPath
|
|
||||||
from scrapy.utils.python import is_listlike, to_bytes, to_unicode
|
|
||||||
from itemadapter import ItemAdapter
|
|
||||||
|
|
||||||
class CommonExporter():
|
class CommonExporter():
|
||||||
def getPath(self, file , sufix=None):
|
def getPath(self, file , sufix=None):
|
||||||
@ -47,83 +39,3 @@ class JsonExport(JsonItemExporter):
|
|||||||
self.file.close()
|
self.file.close()
|
||||||
if if_return:
|
if if_return:
|
||||||
return ItemExporter().export_obj(json_object)
|
return ItemExporter().export_obj(json_object)
|
||||||
|
|
||||||
|
|
||||||
class ComicInfoXmlItemExporter(XmlItemExporter):
|
|
||||||
custom_root_element = "ComicInfo"
|
|
||||||
def __init__(self, dir):
|
|
||||||
file_path = os.path.join(COMIC_INFO_XML_STORE, dir,
|
|
||||||
f"{self.custom_root_element}.xml")
|
|
||||||
dir_path = os.path.dirname(file_path)
|
|
||||||
if not os.path.exists(dir_path): os.makedirs(dir_path)
|
|
||||||
self.xml_file = open(file_path, "wb")
|
|
||||||
super(ComicInfoXmlItemExporter, self).__init__(self.xml_file,
|
|
||||||
root_element=self.custom_root_element,
|
|
||||||
indent=1,fields_to_export=COMIC_INFO_FIELDS_TO_EXPORT)
|
|
||||||
|
|
||||||
def serialize_field(self, field, name, value):
|
|
||||||
#通过序列化
|
|
||||||
value = ComicPath.chinese_convert(value)
|
|
||||||
return super().serialize_field(field, name, value)
|
|
||||||
|
|
||||||
def start_exporting(self):
|
|
||||||
self.xg.startDocument()
|
|
||||||
self.xg.startElement(self.custom_root_element, {})
|
|
||||||
|
|
||||||
def comic_to_info_item(self, comic_item):
|
|
||||||
comic_info = {}
|
|
||||||
info_item = ItemAdapter(ComicInfoItem())
|
|
||||||
comic_info_dict = {}
|
|
||||||
for field in info_item.field_names():
|
|
||||||
meta_info = info_item.get_field_meta(field).get('info')
|
|
||||||
if meta_info is not None:
|
|
||||||
comic_info_dict[meta_info] = field
|
|
||||||
for key, value in ComicItem(comic_item).items():
|
|
||||||
new_key = comic_info_dict.get(key)
|
|
||||||
if new_key is not None:
|
|
||||||
comic_info[new_key] = value
|
|
||||||
return ItemExporter().export_obj(ComicInfoItem(comic_info))
|
|
||||||
|
|
||||||
def export_item(self, item):
|
|
||||||
comic_info = self.comic_to_info_item(item)
|
|
||||||
child_element = "Page"
|
|
||||||
self._beautify_indent(depth=1)
|
|
||||||
self._beautify_newline()
|
|
||||||
for name, value in self._get_serialized_fields(comic_info, default_value=""):
|
|
||||||
if name == "Pages":
|
|
||||||
value = ast.literal_eval(value)
|
|
||||||
if value is not None or value != "":
|
|
||||||
self._export_xml_field(name, value, depth=2, child_element=child_element)
|
|
||||||
#self._beautify_indent(depth=1)
|
|
||||||
return comic_info
|
|
||||||
|
|
||||||
def _export_xml_field(self, name, serialized_value, depth, child_element="value"):
|
|
||||||
self._beautify_indent(depth=depth)
|
|
||||||
self.xg.startElement(name, {})
|
|
||||||
if hasattr(serialized_value, "items"):
|
|
||||||
self._beautify_newline()
|
|
||||||
for sub_name, value in serialized_value.items():
|
|
||||||
self._export_xml_field(sub_name, value, depth=depth + 1)
|
|
||||||
self._beautify_indent(depth=depth)
|
|
||||||
elif is_listlike(serialized_value):
|
|
||||||
self._beautify_newline()
|
|
||||||
for value in serialized_value:
|
|
||||||
self._export_xml_field(child_element, value, depth=depth + 1)
|
|
||||||
self._beautify_indent(depth=depth)
|
|
||||||
elif isinstance(serialized_value, str):
|
|
||||||
self.xg.characters(serialized_value)
|
|
||||||
else:
|
|
||||||
self.xg.characters(str(serialized_value))
|
|
||||||
self.xg.endElement(name)
|
|
||||||
self._beautify_newline()
|
|
||||||
|
|
||||||
def finish_exporting(self):
|
|
||||||
self.xg.endElement(self.custom_root_element)
|
|
||||||
self.xg.endDocument()
|
|
||||||
self.xml_file.close()
|
|
||||||
|
|
||||||
def export_xml(self, item):
|
|
||||||
self.start_exporting()
|
|
||||||
comic_info = self.export_item(item)
|
|
||||||
self.finish_exporting()
|
|
||||||
return comic_info
|
|
||||||
|
|||||||
@ -9,12 +9,12 @@ import os,scrapy,logging
|
|||||||
from Comics import settings
|
from Comics import settings
|
||||||
from Comics.items import ComicItem
|
from Comics.items import ComicItem
|
||||||
from Comics.loader import ComicLoader
|
from Comics.loader import ComicLoader
|
||||||
from Comics.exporters import ComicInfoXmlItemExporter
|
|
||||||
from Comics.utils import CBZUtils,fileUtils as fu
|
from Comics.utils import CBZUtils,fileUtils as fu
|
||||||
from Comics.utils import ComicPath
|
from Comics.utils import ComicPath
|
||||||
from Comics.utils import checkUtils
|
from Comics.utils import checkUtils
|
||||||
from Comics.exporters import JsonExport,ItemExporter
|
from Comics.exporters import JsonExport,ItemExporter
|
||||||
from scrapy.pipelines.images import ImagesPipeline
|
from scrapy.pipelines.images import ImagesPipeline
|
||||||
|
from Comics._utils.ComicInfo import ComicInfoXml
|
||||||
|
|
||||||
class ComicsPipeline():
|
class ComicsPipeline():
|
||||||
|
|
||||||
@ -123,10 +123,14 @@ class ImgDownloadPipeline(BaseImagesPipeline):
|
|||||||
self.pack_icon(item)
|
self.pack_icon(item)
|
||||||
else:
|
else:
|
||||||
# ComicInfoXml 生成
|
# ComicInfoXml 生成
|
||||||
comic_info = ComicInfoXmlItemExporter(dir=super().get_file_path(item=item, result_type="comic_info")).export_xml(item)
|
#comic_info = ComicInfoXmlItemExporter(dir=super().get_file_path(item=item, result_type="comic_info")).export_xml(item)
|
||||||
|
comic_pages = ComicInfoXml().scrapy_xml_by_json(item, save_dir=super().get_file_path(item=item, result_type="images_dir"))
|
||||||
|
#if CBZUtils.packComicChapterCBZ(src_dir= super().get_file_path(item, result_type="images_dir"),
|
||||||
|
# dts_path= cbz_path,
|
||||||
|
# comic_info_images= comic_info['Pages'], remove=True):
|
||||||
if CBZUtils.packComicChapterCBZ(src_dir= super().get_file_path(item, result_type="images_dir"),
|
if CBZUtils.packComicChapterCBZ(src_dir= super().get_file_path(item, result_type="images_dir"),
|
||||||
dts_path= cbz_path,
|
dts_path= cbz_path,
|
||||||
comic_info_images= comic_info['Pages'], remove=True):
|
comic_info_images= comic_pages, remove=True):
|
||||||
super().update_icon(item)
|
super().update_icon(item)
|
||||||
self.pack_icon(item)
|
self.pack_icon(item)
|
||||||
# CBZ校验失败
|
# CBZ校验失败
|
||||||
|
|||||||
@ -126,22 +126,3 @@ LOG_STDOUT = True # 标准化输出
|
|||||||
CBZ_EXPORT_PATH = os.path.join(BASE_OUTPUT,"CBZ")
|
CBZ_EXPORT_PATH = os.path.join(BASE_OUTPUT,"CBZ")
|
||||||
#数据导出类 排序
|
#数据导出类 排序
|
||||||
COMIC_INFO_XML_FILE = "ComicInfo.xml"
|
COMIC_INFO_XML_FILE = "ComicInfo.xml"
|
||||||
COMIC_INFO_FIELDS_TO_EXPORT = [
|
|
||||||
"Title",
|
|
||||||
"Series",
|
|
||||||
"Number",
|
|
||||||
"SeriesGroup",
|
|
||||||
"Summary",
|
|
||||||
"Year",
|
|
||||||
"Month",
|
|
||||||
"Day",
|
|
||||||
"Writer",
|
|
||||||
"Publisher",
|
|
||||||
"Genre",
|
|
||||||
"Tags",
|
|
||||||
"Web",
|
|
||||||
"PageCount",
|
|
||||||
"LanguageISO",
|
|
||||||
"AgeRating",
|
|
||||||
"Pages"
|
|
||||||
]
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user