This commit is contained in:
caiwx86 2024-10-28 02:30:56 +08:00
parent 5f5b6cb6ec
commit ca805fe252
5 changed files with 10 additions and 63 deletions

View File

@ -1,9 +1,9 @@
import xml.etree.ElementTree as ET
from xml.dom import minidom
from typing import List
import json,os
import os
from lxml import etree
from Comics.settings import COMIC_INFO_XML_FILE,COMIC_INFO_XSD_FILE,OUTPUT_DIR,PROJECT_KEY
from Comics.settings import COMIC_INFO_XSD_FILE
# Define the ComicInfo and ComicPageInfo classes
class ComicInfo:

View File

@ -103,59 +103,4 @@ class ComicItem(Item):
class BooksItem(Item):
current_project = Field()
names = Field()
urls = Field()
class ImageItem(Item):
image_url = Field()
image_name = Field()
image_path = Field()
image_type = Field()
isScramble = Field()
class Image():
def setImage(self, url, scramble): return { "src" : url, "scramble": scramble}
# 序列化-作者
def serializer_info_writer(value):
(list_value, value) = [[], str(value).replace("&", " ")]
for v in set(str(value).split(" ")):
list_value.append(v)
return ",".join(list_value)
# (私有)序列化-图像
def _serialize_info_images(value, result_type=None):
images = []
for image in value:
if os.sep not in image:
images.append(ComicPath().getFileScrambleImageSave(image,True,False))
if result_type == "count":
return len(images)
else:
return images
# 序列化-图像
def serializer_info_images(value): return _serialize_info_images(value)
# 序列化-合计
def serializer_info_images_count(value): return _serialize_info_images(value, "count")
class ComicInfoItem(Item):
Title = Field(desc="章节名", info='chapter')
Series = Field(desc="漫画名", info='name')
Number = Field(desc="编号", info='index')
SeriesGroup = Field(desc="别名")
Summary = Field(desc="概述", info='dep')
Year = Field(desc="")
Month = Field(desc="")
Day = Field(desc="")
Writer = Field(desc="作者", info='author',serializer=serializer_info_writer)
Publisher = Field(desc="出版社")
Genre = Field(desc="流派", info='genre')
Tags = Field(desc="标签", info='tags')
Web = Field(desc="主页")
PageCount = Field(desc="总页数", info='count')
LanguageISO = Field(desc="语言")
AgeRating = Field(desc="年龄分级", info='age_rating')
Pages = Field(desc="页码", info='images', serializer=serializer_info_images)
# ComicInfo.xml and ComicChapter.json end
urls = Field()

View File

@ -182,7 +182,9 @@ class ComicLoader(BaseLoader):
if chapter != None: self.set_chapter(chapter)
self.save_sname_schapter()
return super().load_item()
def setImageItem(self, url, scramble): return { "src" : url, "scramble": scramble}
def set_image_item(self, image_url, image_path, image_name, image_scramble="False", image_type="Image"):
return { "image_url" : image_url, "image_path" : image_path, "image_name" : image_name, "image_scramble" : image_scramble , "image_type" : image_type}

View File

@ -7,7 +7,7 @@
# useful for handling different item types with a single interface
import os,scrapy,logging,shutil
from Comics import settings
from Comics.items import ComicItem,ImageItem
from Comics.items import ComicItem
from Comics.loader import ComicLoader
from Comics.utils import CBZUtils,fileUtils as fu
from Comics.utils import ComicPath

View File

@ -1,5 +1,5 @@
import scrapy,logging,os,skip,json,re
from Comics.items import ComicItem,Image
import scrapy,logging,os,skip,re
from Comics.items import ComicItem
from Comics.items import BooksItem
from Comics.loader import ComicLoader
from Comics.loader import BooksLoader
@ -72,7 +72,7 @@ class RmComicSpider(scrapy.Spider):
sr = sr_value.group(1) # group(1) 返回第一个捕获组,即数字部分
else:
print("No match found")
images_urls.append(Image().setImage(url=link, scramble=sr.replace("0", "False").replace("1", "True")))
images_urls.append(ci.setImageItem(url=link, scramble=sr.replace("0", "False").replace("1", "True")))
ci.image_urls(value=images_urls)
yield ci.load_item()