push
This commit is contained in:
parent
fbcf514bed
commit
2a9820949b
@ -7,14 +7,14 @@ from scrapy.item import Item, Field
|
||||
from Comics.utils.Constant import ComicPath
|
||||
from Comics.utils.FileUtils import imageUtils
|
||||
from itemloaders.processors import TakeFirst, MapCompose, Join
|
||||
from scrapy.spiders import Spider
|
||||
|
||||
def current_project(): return Spider.name
|
||||
|
||||
# 繁体中文转为简体中文
|
||||
def serialize_to_chinese(value): return ComicPath.chinese_convert(value)
|
||||
|
||||
# 将路径字符串转为合法路径
|
||||
def serialize_to_fix_file(value): return ComicPath.fix_file_name(ComicPath.chinese_convert(value))
|
||||
|
||||
# 图片数据解析(私有方法)
|
||||
def _serialize_to_images(value, result_type=None):
|
||||
(count, images_item, image_urls) = [1,[],[]]
|
||||
for image in value:
|
||||
@ -30,7 +30,6 @@ def _serialize_to_images(value, result_type=None):
|
||||
de_str = str(image_src).split("/")[-1].replace(suffix, "==")
|
||||
blocks_num = imageUtils.encodeImage(de_str)
|
||||
image_name = ComicPath.getFileScrambleImageName(count=count_image, block=blocks_num, suffix=suffix)
|
||||
#images_item.append(ImagesItem(image_name=count_image + suffix, image_url=image_src, image_path=image_name))
|
||||
if str(image_src).startswith('http'):
|
||||
images_item.append(image_name)
|
||||
else:
|
||||
@ -41,11 +40,13 @@ def _serialize_to_images(value, result_type=None):
|
||||
if result_type == "image_urls": return image_urls
|
||||
else: return images_item
|
||||
|
||||
# 图像处理方法
|
||||
def serialize_to_images(value): return _serialize_to_images(value)
|
||||
|
||||
|
||||
# 图像链接处理方法
|
||||
def serialize_to_image_urls(value): return _serialize_to_images(value, result_type="image_urls")
|
||||
|
||||
# ComicItem
|
||||
class ComicItem(Item):
|
||||
# 工程
|
||||
current_project = Field()
|
||||
@ -75,42 +76,25 @@ class ComicItem(Item):
|
||||
genre = Field(output_processor=TakeFirst())
|
||||
# 年龄分级
|
||||
age_rating = Field(output_processor=TakeFirst())
|
||||
|
||||
# 合计
|
||||
count = Field()
|
||||
# 旧图片
|
||||
images_old = Field(serializer=serialize_to_images)
|
||||
# 章节图像合集
|
||||
images = Field(serializer=serialize_to_images)
|
||||
# 图像链接
|
||||
image_urls = Field(serializer=serialize_to_image_urls)
|
||||
# 图像名
|
||||
images_name = Field()
|
||||
|
||||
class ImagesItem(Item):
|
||||
image_name = Field()
|
||||
image_url = Field()
|
||||
image_path = Field()
|
||||
images = Field()
|
||||
image_urls = Field()
|
||||
comic = Field()
|
||||
|
||||
# 序列化-作者
|
||||
def serializer_info_writer(value):
|
||||
(list_value, value) = [[], str(value).replace("&", " ")]
|
||||
for v in set(str(value).split(" ")):
|
||||
list_value.append(v)
|
||||
return ",".join(list_value)
|
||||
|
||||
# Result_type name
|
||||
def _serializer_info_imagesa(value, result_type=None):
|
||||
info = []
|
||||
for success, img in value:
|
||||
img_path = os.path.join(settings.IMAGES_STORE, img['path'])
|
||||
if result_type == 'name':
|
||||
info.append(ComicPath().getFileScrambleImageSave(img_path,True,False))
|
||||
else:
|
||||
info.append(img_path)
|
||||
if result_type == "len":
|
||||
value = len(info)
|
||||
else:
|
||||
value = info
|
||||
return value
|
||||
|
||||
# (私有)序列化-图像
|
||||
def _serialize_info_images(value, result_type=None):
|
||||
images = []
|
||||
for image in value:
|
||||
@ -121,34 +105,28 @@ def _serialize_info_images(value, result_type=None):
|
||||
else:
|
||||
return images
|
||||
|
||||
|
||||
# 序列化-图像
|
||||
def serializer_info_images(value): return _serialize_info_images(value)
|
||||
|
||||
# 序列化-合计
|
||||
def serializer_info_images_count(value): return _serialize_info_images(value, "count")
|
||||
|
||||
def serializer_info_images_completed(value): return _serialize_info_images(value, result_type='name')
|
||||
|
||||
def serializer_info_images_count(value): return _serialize_info_images(value, result_type='len')
|
||||
|
||||
|
||||
class ComicInfoItem(Item):
|
||||
Title = Field(info='chapter')#"章节名",True]
|
||||
Series = Field(info='name')# ","漫画名",True]
|
||||
Number = Field(info='index')# ","编号",True]
|
||||
SeriesGroup = Field()# ","别名",False]
|
||||
Summary = Field(info='dep')# ","概述",True]
|
||||
Year = Field()# ","年",False]
|
||||
Month = Field()# ","月",False]
|
||||
Day = Field()# ","日",False]
|
||||
Writer = Field(info='author',serializer=serializer_info_writer)# "作者",True]
|
||||
Publisher = Field()# ","出版社",False]
|
||||
Genre = Field(info='genre')# ","流派",True]
|
||||
Tags = Field(info='tags')# ","标签",True]
|
||||
Web = Field()# ","主页",False]
|
||||
PageCount = Field(info='count')# ","总页数",True]
|
||||
#PageCount = Field(info='count',serializer=serializer_info_images_count)# ","总页数",True]
|
||||
LanguageISO = Field()#","语言",True]
|
||||
AgeRating = Field(info='age_rating')#","年龄分级",False]
|
||||
#Pages = Field(info='images_name', serializer=serializer_info_images_completed)#","页码",True]
|
||||
Pages = Field(info='images', serializer=serializer_info_images)#","页码",True]
|
||||
Title = Field(desc="章节名", info='chapter')
|
||||
Series = Field(desc="漫画名", info='name')
|
||||
Number = Field(desc="编号", info='index')
|
||||
SeriesGroup = Field(desc="别名")
|
||||
Summary = Field(desc="概述", info='dep')
|
||||
Year = Field(desc="年")
|
||||
Month = Field(desc="月")
|
||||
Day = Field(desc="日")
|
||||
Writer = Field(desc="作者", info='author',serializer=serializer_info_writer)
|
||||
Publisher = Field(desc="出版社")
|
||||
Genre = Field(desc="流派", info='genre')
|
||||
Tags = Field(desc="标签", info='tags')
|
||||
Web = Field(desc="主页")
|
||||
PageCount = Field(desc="总页数", info='count')
|
||||
LanguageISO = Field(desc="语言")
|
||||
AgeRating = Field(desc="年龄分级", info='age_rating')
|
||||
Pages = Field(desc="页码", info='images', serializer=serializer_info_images)
|
||||
# ComicInfo.xml and ComicChapter.json end
|
||||
@ -5,27 +5,27 @@
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
import os, scrapy,logging,time,random,shutil
|
||||
import os,scrapy,logging
|
||||
from Comics import settings
|
||||
from Comics.settings import CBZ_EXPORT_PATH,OUTPUT_DIR,PROJECT_KEY
|
||||
from Comics.utils.Constant import ComicPath
|
||||
from Comics.items import ComicItem
|
||||
from scrapy.pipelines.images import ImagesPipeline
|
||||
from Comics.exporters import ComicInfoXmlItemExporter,JsonExport,ItemExporter, ItemImport
|
||||
from Comics.utils.FileUtils import CBZUtils
|
||||
from Comics.utils.FileUtils import fileUtils as fu
|
||||
from Comics.settings import OUTPUT_DIR
|
||||
from Comics.loader import ComicEntity
|
||||
from Comics.exporters import ComicInfoXmlItemExporter
|
||||
from Comics.utils.FileUtils import CBZUtils,fileUtils as fu
|
||||
from Comics.utils.Constant import ComicPath
|
||||
from Comics.utils.ComicUtils import checkUtils
|
||||
from Comics.exporters import JsonExport,ItemExporter
|
||||
from scrapy.pipelines.images import ImagesPipeline
|
||||
|
||||
class ComicsPipeline:
|
||||
class ComicsPipeline():
|
||||
def open_spider(self, spider):
|
||||
pass
|
||||
|
||||
# item就是yield后面的对象
|
||||
def process_item(self, item, spider):
|
||||
if isinstance(item, ComicItem):
|
||||
# 'output/rm_comic/json/壞X/第1話 壞X'
|
||||
if os.path.exists(ComicPath.CBZ(item=item)):
|
||||
# 'output/rm_comic/json/壞X/第1話 壞X'
|
||||
if fu.exists(ComicPath.path_cbz(item=item)):
|
||||
return ItemExporter().export_obj(item)
|
||||
else:
|
||||
file = os.path.join(OUTPUT_DIR, spider.name, "json", item['name'], item['chapter'])
|
||||
@ -37,18 +37,18 @@ class ComicsPipeline:
|
||||
|
||||
class ImgDownloadPipeline(ImagesPipeline):
|
||||
def get_file_path(self, item, file=None, result_type="image"):
|
||||
return ComicPath.get_file_path(item=item, file=file, result_type= result_type)
|
||||
return ComicPath().get_file_path(item=item, file=file, result_type= result_type)
|
||||
|
||||
def image_scramble_exits(self, item,image_path):
|
||||
en_image_path = ComicPath().getFileScrambleImageSave(image_path, relative="fullpath")
|
||||
return os.path.exists(os.path.join(settings.IMAGES_STORE, self.get_file_path(item, en_image_path)))
|
||||
return fu.exists(fu.join(settings.IMAGES_STORE, self.get_file_path(item, en_image_path)))
|
||||
|
||||
## Icon Path : CBZ/NAME/CHAPTER.jpg
|
||||
def download_icon(self, item, result_type="download"):
|
||||
icon_path = self.get_file_path(item, result_type="icon_cache")
|
||||
if result_type == "fullpath":
|
||||
return os.path.join(settings.IMAGES_STORE, icon_path)
|
||||
if os.path.exists(icon_path):
|
||||
return fu.join(settings.IMAGES_STORE, icon_path)
|
||||
if fu.exists(icon_path):
|
||||
return False
|
||||
else:
|
||||
self.image_urls.append(item['icon'])
|
||||
@ -80,10 +80,10 @@ class ImgDownloadPipeline(ImagesPipeline):
|
||||
def pack_icon(self, item):
|
||||
cbz_icon = self.get_file_path(item=item, result_type="cbz_icon")
|
||||
dwn_icon = self.get_file_path(item=item, result_type="down_icon")
|
||||
base_dir = os.path.dirname(dwn_icon)
|
||||
name = os.path.basename(dwn_icon).split(".")[0]
|
||||
base_dir = fu.dirname(dwn_icon)
|
||||
name = fu.basename(dwn_icon).split(".")[0]
|
||||
for dirname in os.listdir(base_dir):
|
||||
path = os.path.join(base_dir, dirname)
|
||||
path = fu.join(base_dir, dirname)
|
||||
if os.path.isfile(path) and dirname.startswith(name):
|
||||
fu.update_icon(path, cbz_icon)
|
||||
|
||||
@ -97,12 +97,12 @@ class ImgDownloadPipeline(ImagesPipeline):
|
||||
save_path = self.get_file_path(item=item, result_type="down_icon")
|
||||
|
||||
fu.update_icon(image_path, save_path)
|
||||
|
||||
|
||||
def item_completed(self, results, item, info):
|
||||
# return item
|
||||
# 打包
|
||||
cbz_path = self.get_file_path(item, result_type="cbz")
|
||||
if os.path.exists(cbz_path):
|
||||
if fu.exists(cbz_path):
|
||||
self.update_icon(item)
|
||||
self.pack_icon(item)
|
||||
else:
|
||||
|
||||
@ -90,7 +90,7 @@ class ComicPath:
|
||||
return file
|
||||
|
||||
@classmethod
|
||||
def CBZ(cls, item):
|
||||
def path_cbz(cls, item):
|
||||
return cls.get_file_path(item, result_type="cbz", convert=True)
|
||||
|
||||
|
||||
|
||||
@ -8,6 +8,19 @@ from Comics.settings import COMIC_INFO_XML_FILE,CBZ_EXPORT_PATH,IMAGES_STORE
|
||||
from Comics.utils.Constant import ntfy
|
||||
|
||||
class fileUtils:
|
||||
|
||||
@classmethod
|
||||
def exists(cls, path): return os.path.exists(path)
|
||||
|
||||
@classmethod
|
||||
def join(cls, path, *paths): return os.path.join(path, *paths);
|
||||
|
||||
@classmethod
|
||||
def dirname(cls, path): return os.path.dirname(path);
|
||||
|
||||
@classmethod
|
||||
def basename(cls, path): return os.path.basename(path);
|
||||
|
||||
@classmethod
|
||||
def save_file(cls,path,data):
|
||||
root_dir = os.path.dirname(path)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user