update
This commit is contained in:
parent
5884f1e92c
commit
8f55a51140
3
.idea/.gitignore
vendored
Normal file
3
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
8
.idea/ComicScrapy.iml
Normal file
8
.idea/ComicScrapy.iml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="stable_vscode" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
4
.idea/misc.xml
Normal file
4
.idea/misc.xml
Normal file
@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="stable_vscode" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/ComicScrapy.iml" filepath="$PROJECT_DIR$/.idea/ComicScrapy.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
98
Comics/exporters.py
Normal file
98
Comics/exporters.py
Normal file
@ -0,0 +1,98 @@
|
||||
import os.path,json,ast
|
||||
|
||||
from Comics.settings import COMIC_INFO_FIELDS_TO_EXPORT
|
||||
from scrapy.exporters import XmlItemExporter
|
||||
from scrapy.exporters import PythonItemExporter
|
||||
from Comics.items import ComicInfoItem
|
||||
from Comics.items import ComicItem
|
||||
from Comics.settings import COMIC_INFO_XML_STORE
|
||||
from Comics.utils.Constant import ComicPath
|
||||
from scrapy.utils.python import is_listlike, to_bytes, to_unicode
|
||||
|
||||
class ItemExporter(PythonItemExporter):
|
||||
def convert(self, data):
|
||||
if isinstance(data, bytes): return data.decode("utf-8")
|
||||
if isinstance(data, dict): return dict(map(self.convert, data.items()))
|
||||
if isinstance(data, tuple): return map(self.convert, data)
|
||||
if isinstance(data, list): return [self.convert(i) for i in data]
|
||||
return data
|
||||
|
||||
def export_obj(self, obj_item):
|
||||
self.start_exporting()
|
||||
obj_item = self.convert(self.export_item(obj_item))
|
||||
self.finish_exporting()
|
||||
return obj_item
|
||||
|
||||
class ComicInfoXmlItemExporter(XmlItemExporter):
|
||||
custom_root_element = "ComicInfo"
|
||||
def __init__(self, comic, chapter):
|
||||
file_path = os.path.join(COMIC_INFO_XML_STORE, comic,
|
||||
chapter, f"{self.custom_root_element}.xml")
|
||||
dir_path = os.path.dirname(file_path)
|
||||
if not os.path.exists(dir_path): os.makedirs(dir_path)
|
||||
self.xml_file = open(file_path, "wb")
|
||||
super(ComicInfoXmlItemExporter, self).__init__(self.xml_file,
|
||||
root_element=self.custom_root_element,
|
||||
indent=1,fields_to_export=COMIC_INFO_FIELDS_TO_EXPORT)
|
||||
|
||||
def serialize_field(self, field, name, value):
|
||||
#通过序列化
|
||||
value = ComicPath.chinese_convert(value)
|
||||
return super().serialize_field(field, name, value)
|
||||
|
||||
def start_exporting(self):
|
||||
self.xg.startDocument()
|
||||
self.xg.startElement(self.custom_root_element, {})
|
||||
|
||||
def comic_to_info_item(self, comic_item):
|
||||
comic_info = {}
|
||||
comic_info_dict = getattr(ComicItem, "data", 0)
|
||||
for key, value in ComicItem(comic_item).items():
|
||||
new_key = comic_info_dict.get(key)
|
||||
if new_key is not None:
|
||||
comic_info[new_key] = value
|
||||
return ItemExporter().export_obj(ComicInfoItem(comic_info))
|
||||
|
||||
def export_item(self, item):
|
||||
comic_info = self.comic_to_info_item(item)
|
||||
child_element = "Page"
|
||||
self._beautify_indent(depth=1)
|
||||
self._beautify_newline()
|
||||
for name, value in self._get_serialized_fields(comic_info, default_value=""):
|
||||
if name is "Pages":
|
||||
value = str(value).split(',')
|
||||
if value is not None or value != "":
|
||||
self._export_xml_field(name, value, depth=2, child_element=child_element)
|
||||
self._beautify_indent(depth=1)
|
||||
return comic_info
|
||||
|
||||
def _export_xml_field(self, name, serialized_value, depth, child_element="value"):
|
||||
self._beautify_indent(depth=depth)
|
||||
self.xg.startElement(name, {})
|
||||
if hasattr(serialized_value, "items"):
|
||||
self._beautify_newline()
|
||||
for subname, value in serialized_value.items():
|
||||
self._export_xml_field(subname, value, depth=depth + 1)
|
||||
self._beautify_indent(depth=depth)
|
||||
elif is_listlike(serialized_value):
|
||||
self._beautify_newline()
|
||||
for value in serialized_value:
|
||||
self._export_xml_field(child_element, value, depth=depth + 1)
|
||||
self._beautify_indent(depth=depth)
|
||||
elif isinstance(serialized_value, str):
|
||||
self.xg.characters(serialized_value)
|
||||
else:
|
||||
self.xg.characters(str(serialized_value))
|
||||
self.xg.endElement(name)
|
||||
self._beautify_newline()
|
||||
|
||||
def finish_exporting(self):
|
||||
self.xg.endElement(self.custom_root_element)
|
||||
self.xg.endDocument()
|
||||
self.xml_file.close()
|
||||
|
||||
def export_xml(self, item):
|
||||
self.start_exporting()
|
||||
comic_info = self.export_item(item)
|
||||
self.finish_exporting()
|
||||
return comic_info
|
||||
129
Comics/items.py
129
Comics/items.py
@ -1,52 +1,93 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
# https://docs.org/en/latest/topics/items.html
|
||||
from scrapy.item import Item, Field
|
||||
from Comics.utils.Constant import ComicPath
|
||||
from dataclasses import dataclass
|
||||
from scrapy.loader.processors import TakeFirst, MapCompose, Join
|
||||
|
||||
|
||||
class ComicsItem(scrapy.Item):
|
||||
#漫画名
|
||||
name = scrapy.Field()
|
||||
#链接
|
||||
link = scrapy.Field()
|
||||
data = {}
|
||||
def setinfo(**kwds):
|
||||
def decorate(f):
|
||||
for k in kwds: data[k] = kwds[k]
|
||||
setattr(f, "data", data)
|
||||
return f
|
||||
return decorate
|
||||
|
||||
class ComicItem(scrapy.Item):
|
||||
name = scrapy.Field()
|
||||
chapter = scrapy.Field()
|
||||
list_img = scrapy.Field()
|
||||
author= scrapy.Field()
|
||||
icon = scrapy.Field()
|
||||
tags = scrapy.Field()
|
||||
dep = scrapy.Field()
|
||||
date = scrapy.Field()
|
||||
chapters = scrapy.Field()
|
||||
chapter_href= scrapy.Field()
|
||||
genre = scrapy.Field()
|
||||
age_rating = scrapy.Field()
|
||||
def serialize_to_chinese(value):
|
||||
return ComicPath.chinese_convert(value)
|
||||
|
||||
class ImageItem(scrapy.Item):
|
||||
image_name = scrapy.Field()
|
||||
image_url = scrapy.Field()
|
||||
image_path = scrapy.Field()
|
||||
def serialize_to_fix_file(value):
|
||||
file = ComicPath.chinese_convert(value)
|
||||
return ComicPath.fix_file_name(file)
|
||||
|
||||
class ComicInfoItem(scrapy.Item):
|
||||
Title= scrapy.Field()#"章节名",True]
|
||||
Series = scrapy.Field()# ","漫画名",True]
|
||||
Number = scrapy.Field()# ","编号",True]
|
||||
SeriesGroup = scrapy.Field()# ","别名",False]
|
||||
Summary = scrapy.Field()# ","概述",True]
|
||||
Year = scrapy.Field()# ","年",False]
|
||||
Month = scrapy.Field()# ","月",False]
|
||||
Day = scrapy.Field()# ","日",False]
|
||||
Writer = scrapy.Field()# "作者",True]
|
||||
Publisher = scrapy.Field()# ","出版社",False]
|
||||
Genre = scrapy.Field()# ","流派",True]
|
||||
Tags = scrapy.Field()# ","标签",True]
|
||||
Web = scrapy.Field()# ","主页",False]
|
||||
PageCount = scrapy.Field()# ","总页数",True]
|
||||
LanguageISO = scrapy.Field()#","语言",True]
|
||||
AgeRating = scrapy.Field()#","年龄分级",False]
|
||||
Pages = scrapy.Field()#","页码",True]
|
||||
# ComicInfo.xml and ComicChapter.json end
|
||||
class ComicOItem(Item):
|
||||
name = Field()
|
||||
chapterItem = Field()
|
||||
|
||||
@setinfo(name="Series", chapter="Title",
|
||||
author="Writer", tags="Tags",
|
||||
dep="Summary", genre="Genre",
|
||||
index="Number", images_name="Pages",
|
||||
age_rating="AgeRating")
|
||||
class ComicItem(Item):
|
||||
# 编号
|
||||
index = Field()
|
||||
# 漫画名
|
||||
name = Field(serializer=serialize_to_fix_file, output_processor=TakeFirst())
|
||||
# 章节名
|
||||
chapter = Field(serializer=serialize_to_fix_file)
|
||||
# 图片链接
|
||||
list_img = Field()
|
||||
# 作者
|
||||
author = Field(serialize_to_chinese=serialize_to_chinese, output_processor=TakeFirst())
|
||||
# 封面链接
|
||||
icon = Field()
|
||||
# 标签
|
||||
tags = Field(serializer=serialize_to_chinese)
|
||||
# 概述
|
||||
dep = Field(serializer=serialize_to_chinese)
|
||||
# 时间
|
||||
date = Field()
|
||||
# 流派
|
||||
genre = Field()
|
||||
# 年龄分级
|
||||
age_rating = Field()
|
||||
|
||||
images = Field()
|
||||
images_name = Field()
|
||||
|
||||
class ImageItem(Item):
|
||||
image_name = Field()
|
||||
image_url = Field()
|
||||
image_path = Field()
|
||||
|
||||
def serializer_info_writer(value):
|
||||
list_value = []
|
||||
str(value).replace("&", " ")
|
||||
for v in str(value).split(" "):
|
||||
list_value.append(v)
|
||||
return ",".join(list_value)
|
||||
|
||||
class ComicInfoItem(Item):
|
||||
Title = Field()#"章节名",True]
|
||||
Series = Field()# ","漫画名",True]
|
||||
Number = Field()# ","编号",True]
|
||||
SeriesGroup = Field()# ","别名",False]
|
||||
Summary = Field()# ","概述",True]
|
||||
Year = Field()# ","年",False]
|
||||
Month = Field()# ","月",False]
|
||||
Day = Field()# ","日",False]
|
||||
Writer = Field(serializer=serializer_info_writer)# "作者",True]
|
||||
Publisher = Field()# ","出版社",False]
|
||||
Genre = Field()# ","流派",True]
|
||||
Tags = Field()# ","标签",True]
|
||||
Web = Field()# ","主页",False]
|
||||
PageCount = Field()# ","总页数",True]
|
||||
LanguageISO = Field()#","语言",True]
|
||||
AgeRating = Field()#","年龄分级",False]
|
||||
Pages = Field()#","页码",True]
|
||||
Page = Field()
|
||||
# ComicInfo.xml and ComicChapter.json end
|
||||
@ -8,93 +8,74 @@
|
||||
import os,requests,re,scrapy,logging
|
||||
from Comics import settings
|
||||
from Comics.utils.FileUtils import imageUtils
|
||||
from Comics.utils.FileUtils import fileUtils
|
||||
from Comics.utils.Constant import ComicPath
|
||||
from Comics.items import ComicItem
|
||||
from Comics.items import ImageItem
|
||||
from scrapy.pipelines.images import ImagesPipeline
|
||||
from scrapy.exporters import XmlItemExporter
|
||||
from itemadapter import ItemAdapter
|
||||
|
||||
from Comics.exporters import ComicInfoXmlItemExporter
|
||||
from Comics.exporters import ItemExporter
|
||||
from Comics.utils.CBZUtils import CBZUtils
|
||||
|
||||
class ComicsPipeline:
|
||||
def open_spider(self,spider):
|
||||
self.fp = open('book.json','w',encoding='utf-8')
|
||||
|
||||
def open_spider(self, spider):
|
||||
pass
|
||||
# item就是yield后面的对象
|
||||
def process_item(self, item, spider):
|
||||
self.fp.write(str(item))
|
||||
if isinstance(item, ComicItem):
|
||||
item = ComicItem(ItemExporter().export_obj(item))
|
||||
file = os.path.join("json",item['name'],item['chapter'])
|
||||
fileUtils.save_file(f"{file}.json",item)
|
||||
return item
|
||||
#image解析
|
||||
|
||||
def close_spider(self,spider):
|
||||
self.fp.close()
|
||||
pass
|
||||
|
||||
class ImageParsePipeline:
|
||||
def process_item(self, item, spider):
|
||||
if isinstance(item, ComicItem):
|
||||
list_img = item['list_img']
|
||||
count = 1
|
||||
scramble_count = 0
|
||||
list_image_item = []
|
||||
for image in list_img:
|
||||
images_item = []
|
||||
for image in item['list_img']:
|
||||
(image_src,scramble) = [image.get("src"),image.get("scramble")]
|
||||
count_image = "{:0>3d}".format(count)
|
||||
image_src_suffix = "."+str(image_src).split(".")[-1]
|
||||
image_file_name = count_image+image_src_suffix
|
||||
suffix = "."+str(image_src).split(".")[-1]
|
||||
image_name = count_image + suffix
|
||||
if scramble:
|
||||
de_str = str(image_src).split("/")[-1].replace(image_src_suffix,"==")
|
||||
de_str = str(image_src).split("/")[-1].replace(suffix,"==")
|
||||
blocks_num = imageUtils.encodeImage(de_str)
|
||||
scramble_image_file_name = ComicPath.getFileScrambleImageName(count=count_image,block=blocks_num,suffix=image_src_suffix)
|
||||
scramble_count += 1
|
||||
image_path = os.path.join(item['name'],item['chapter'],scramble_image_file_name)
|
||||
image_path = ComicPath.ChineseConvert(image_path)
|
||||
list_image_item.append(ImageItem(image_name=image_file_name,image_url=image_src,image_path=image_path))
|
||||
count+=1
|
||||
return list_image_item
|
||||
image_name = ComicPath.getFileScrambleImageName(count=count_image,block=blocks_num,suffix=suffix)
|
||||
image_path = os.path.join(item['name'],item['chapter'], image_name)
|
||||
images_item.append(ImageItem(image_name=count_image + suffix,image_url=image_src,image_path=image_path))
|
||||
count += 1
|
||||
item['images'] = images_item
|
||||
return item
|
||||
|
||||
class ImgDownloadPipeline(ImagesPipeline):
|
||||
def file_path(self, request, response=None, info=None, *, item=None):
|
||||
image = request.meta['item']
|
||||
image_path = image['image_path']
|
||||
en_image_path = os.path.join(os.path.dirname(image_path),image['image_name'])
|
||||
if os.path.exists(en_image_path): return en_image_path
|
||||
else: return image_path
|
||||
en_image_path = os.path.join(os.path.dirname(image_path), image['image_name'])
|
||||
if os.path.exists(os.path.join(settings.IMAGES_STORE, en_image_path)):
|
||||
return en_image_path
|
||||
else:
|
||||
return image_path
|
||||
|
||||
def get_media_requests(self, item, info):
|
||||
for image in item:
|
||||
host = re.sub(r'(http://|https://)', '', image['image_url']).split('/')[0]
|
||||
for image in item['images']:
|
||||
yield scrapy.Request(url= image['image_url'], meta= {'item' : image})
|
||||
|
||||
def item_completed(self, results, item, info):
|
||||
if len(results) == len(item):
|
||||
for image in results:
|
||||
success = image[0]
|
||||
img = image[1]
|
||||
img_path = os.path.join(settings.IMAGES_STORE,img['path'])
|
||||
#解密图片
|
||||
imageUtils.deScrambleImagesByPath(img_path)
|
||||
return item
|
||||
|
||||
class ComicInfoXmlPipeline:
|
||||
|
||||
def open_spider(self, spider):
|
||||
self.xml_exporter = {}
|
||||
|
||||
def close_spider(self, spider):
|
||||
for exporter, xml_file in self.xml_exporter.values():
|
||||
exporter.finish_exporting()
|
||||
xml_file.close()
|
||||
|
||||
def _exporter_for_item(self, item):
|
||||
adapter = ItemAdapter(item)
|
||||
xml_file = open("ComicInfo.xml", "wb")
|
||||
exporter = XmlItemExporter(xml_file)
|
||||
exporter.start_exporting()
|
||||
self.xml_exporter = (exporter, xml_file)
|
||||
return self.xml_exporter
|
||||
|
||||
def process_item(self, item, spider):
|
||||
exporter = self._exporter_for_item(item)
|
||||
exporter.export_item(item)
|
||||
return item
|
||||
|
||||
info_img = []
|
||||
for success, img in results:
|
||||
img_path = os.path.join(settings.IMAGES_STORE, img['path'])
|
||||
#解密图片
|
||||
img_path = imageUtils.deScrambleImagesByPath(img_path)
|
||||
info_img.append(os.path.basename(img_path).split('.')[0])
|
||||
item['images_name'] = ",".join(info_img)
|
||||
#return item
|
||||
#ComicInfoXml 生成
|
||||
ComicInfoXmlItemExporter(comic=item['name'], chapter=item['chapter']).export_xml(item)
|
||||
#打包
|
||||
CBZUtils.packComicChapterCBZ(comic=item['name'], chapter=item['chapter'], remove= False)
|
||||
@ -18,17 +18,17 @@ NEWSPIDER_MODULE = 'Comics.spiders'
|
||||
#USER_AGENT = 'Comics (+http://www.yourdomain.com)'
|
||||
USER_AGENT = UserAgent().random
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = False
|
||||
ROBOTSTXT_OBEY = False
|
||||
|
||||
HTTPERROR_ALLOWED_CODES = [ 200 , 403]
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
#CONCURRENT_REQUESTS = 32
|
||||
|
||||
# Configure a delay for requests for the same website (default: 0)
|
||||
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||
# See also autothrottle settings and docs
|
||||
IMAGES_URLS_FIELD = "image_url"
|
||||
IMAGES_RESULT_FIELD = "image_path"
|
||||
IMAGES_STORE = 'images'
|
||||
COMIC_INFO_XML_STORE = 'images'
|
||||
DOWNLOAD_DELAY = 20
|
||||
#重试
|
||||
RETRY_ENABLED = True
|
||||
@ -66,7 +66,7 @@ COOKIES_ENABLED = False
|
||||
DOWNLOADER_MIDDLEWARES = {
|
||||
# 'Comics.middlewares.ComicsDownloaderMiddleware': 543,
|
||||
# 'scrapy.downloadermiddlewares.retry.RetryMiddleware': 500,
|
||||
'Comics.middlewares.ProxyMiddleware' : 100,
|
||||
'Comics.middlewares.ProxyMiddleware': 100,
|
||||
'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 400,
|
||||
}
|
||||
|
||||
@ -82,7 +82,6 @@ ITEM_PIPELINES = {
|
||||
'Comics.pipelines.ComicsPipeline': 300,
|
||||
'Comics.pipelines.ImageParsePipeline': 400,
|
||||
'Comics.pipelines.ImgDownloadPipeline': 500,
|
||||
'Comics.pipelines.ComicInfoXmlPipeline': 600,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
@ -103,5 +102,28 @@ AUTOTHROTTLE_DEBUG = False
|
||||
HTTPCACHE_ENABLED = True
|
||||
HTTPCACHE_EXPIRATION_SECS = 0
|
||||
HTTPCACHE_DIR = 'httpcache'
|
||||
HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
HTTPCACHE_IGNORE_HTTP_CODES = [500, 502, 404, 403]
|
||||
HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
|
||||
|
||||
CBZ_EXPORT_PATH = "CBZ"
|
||||
#数据导出类 排序
|
||||
COMIC_INFO_XML_FILE = "ComicInfo.xml"
|
||||
COMIC_INFO_FIELDS_TO_EXPORT = [
|
||||
"Title",
|
||||
"Series",
|
||||
"Number",
|
||||
"SeriesGroup",
|
||||
"Summary",
|
||||
"Year",
|
||||
"Month",
|
||||
"Day",
|
||||
"Writer",
|
||||
"Publisher",
|
||||
"Genre",
|
||||
"Tags",
|
||||
"Web",
|
||||
"PageCount",
|
||||
"LanguageISO",
|
||||
"AgeRating",
|
||||
"Pages"
|
||||
]
|
||||
|
||||
@ -1,26 +1,9 @@
|
||||
import urllib.parse
|
||||
|
||||
import scrapy,json,requests
|
||||
from Comics.items import ComicItem
|
||||
from Comics.utils.FileUtils import CommonUtils
|
||||
import threading
|
||||
import toml
|
||||
|
||||
class ErrorLog:
|
||||
def __init__(self) -> None:
|
||||
self.lock = threading.Lock()
|
||||
|
||||
def err_ls(self, dic):
|
||||
self.lock.acquire()
|
||||
with open('error.toml', 'r+t') as f:
|
||||
data = toml.load('error.toml')
|
||||
f.seek(0, 0)
|
||||
f.truncate()
|
||||
dic_name = f'err_{len(data)}'
|
||||
data[dic_name] = dic
|
||||
_ = toml.dump(data, f)
|
||||
self.lock.release()
|
||||
|
||||
|
||||
error_logger = ErrorLog()
|
||||
from scrapy.loader import ItemLoader
|
||||
|
||||
class RmComicSpider(scrapy.Spider):
|
||||
name = 'rm_comic'
|
||||
@ -29,45 +12,51 @@ class RmComicSpider(scrapy.Spider):
|
||||
#start_urls = ['https://rm01.xyz/books/63b65185-f798-4c8f-a0b0-8811615908fd/0']
|
||||
|
||||
def start_requests(self):
|
||||
yield scrapy.Request(self.main_url + '/books/0a7e8bd1-4cfa-481a-b067-1df663fb2017', callback=self.parse_comic)
|
||||
yield scrapy.Request('https://rm01.xyz'
|
||||
'/books/306ec1e2-f701-4fda-bb78-041ad6ec4020', callback=self.parse_comic)
|
||||
|
||||
def parse_comic(self, response):
|
||||
comic = ComicItem()
|
||||
# comic_item = ItemLoader(item=ComicItem(), response=response)
|
||||
comic['name'] = response.xpath('//div[@class="col"]/h5/text()').extract_first()
|
||||
comic['icon'] = response.xpath('//img[@class="img-thumbnail"]/@src').extract_first()
|
||||
comic['author'] = response.xpath('//div[contains(@class,"bookid_bookInfo")]/p[1]/text()').extract()[1]
|
||||
comic['tags'] = response.xpath('//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()').extract_first()
|
||||
comic['dep'] = response.xpath('//div[contains(@class,"bookid_bookInfo")]/p[4]/text()').extract()[1]
|
||||
comic['date'] = response.xpath('//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()').extract()[1]
|
||||
comic['chapters'] = response.xpath('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()').extract()
|
||||
comic['chapter_href'] = response.xpath('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/@href').extract()
|
||||
for link in comic['chapter_href']:
|
||||
yield scrapy.Request(self.main_url+link,meta={'item' : comic} , callback=self.parse_chapter,errback=self.err)
|
||||
|
||||
def err(self):
|
||||
print("Error=====")
|
||||
|
||||
comic['genre'] = "韩漫"
|
||||
comic['age_rating'] = "R18+"
|
||||
chapters = response.xpath('//div[contains(@class,"bookid_chapterBox")]'
|
||||
'//div[contains(@class,"bookid_chapter")]/a/text()').extract()
|
||||
chapter_href = response.xpath('//div[contains(@class,"bookid_chapterBox")]'
|
||||
'//div[contains(@class,"bookid_chapter")]/a/@href').extract()
|
||||
#for chapter, link in zip(chapters, chapter_href):
|
||||
for i, link in enumerate(chapter_href, start=1):
|
||||
yield scrapy.Request(self.main_url+link, meta={'item' : comic, 'number': i}, callback=self.parse_chapter)
|
||||
|
||||
def parse_chapter(self, response):
|
||||
item = response.meta['item']
|
||||
number = response.meta['number']
|
||||
data = response.xpath('//script[@id="__NEXT_DATA__"]/text()').extract_first()
|
||||
str_exec="props.pageProps."
|
||||
comic_name = CommonUtils.parseExec(data,str_exec+"bookName")
|
||||
chapterName = CommonUtils.parseExec(data,str_exec+"chapterName")
|
||||
description = CommonUtils.parseExec(data,str_exec+"description")
|
||||
images = CommonUtils.parseExec(data,str_exec+"images")
|
||||
chapter_api_url = CommonUtils.parseExec(data,str_exec+"chapterAPIPath")
|
||||
str_exec = "props.pageProps."
|
||||
comic_name = CommonUtils.parseExec(data, str_exec+"bookName")
|
||||
chapterName = CommonUtils.parseExec(data, str_exec+"chapterName")
|
||||
description = CommonUtils.parseExec(data, str_exec+"description")
|
||||
images = CommonUtils.parseExec(data, str_exec+"images")
|
||||
chapter_api_url = CommonUtils.parseExec(data, str_exec+"chapterAPIPath")
|
||||
item['chapter'] = chapterName
|
||||
item['list_img'] = images
|
||||
item['index'] = number
|
||||
if chapter_api_url != None:
|
||||
yield scrapy.Request(url=self.main_url+chapter_api_url,meta={'item' : item}, callback=self.parse_chapter_api, errback=self.err)
|
||||
yield scrapy.Request(self.main_url+ chapter_api_url,meta={'item' : item}, callback= self.parse_chapter_api)
|
||||
else:
|
||||
item['list_img'] = images
|
||||
yield item
|
||||
|
||||
def parse_chapter_api(self,response,item):
|
||||
data = response.meta['item']
|
||||
print(item)
|
||||
return response
|
||||
|
||||
|
||||
def parse_chapter_api(self, response):
|
||||
item = response.meta['item']
|
||||
item['chapter'] = CommonUtils.parseExec(response.text, "chapter.name")
|
||||
item['list_img'] = CommonUtils.parseExec(response.text, "chapter.images")
|
||||
yield item
|
||||
|
||||
def parse(self, response):
|
||||
raise NotImplementedError
|
||||
105
Comics/utils/CBZUtils.py
Normal file
105
Comics/utils/CBZUtils.py
Normal file
@ -0,0 +1,105 @@
|
||||
import os, shutil, time, logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from zipfile import ZipFile
|
||||
from Comics.settings import COMIC_INFO_XML_FILE,CBZ_EXPORT_PATH,IMAGES_STORE
|
||||
|
||||
class CBZUtils:
|
||||
|
||||
@classmethod
|
||||
def readDirsOrFiles(cls,dir,type):
|
||||
data = []
|
||||
files = os.listdir(dir)
|
||||
for file in files:
|
||||
path = os.path.join(dir,file)
|
||||
if type == "files" and os.path.isfile(path):
|
||||
data.append(path)
|
||||
if type == "dirs" and os.path.isdir(path):
|
||||
data.append(path)
|
||||
return data
|
||||
|
||||
@classmethod
|
||||
def zip_compression(cls, source_dir=None, target_file=None, remove=True):
|
||||
target_dir = os.path.dirname(target_file)
|
||||
if not os.path.exists(target_dir):
|
||||
os.makedirs(target_dir)
|
||||
if not os.path.exists(target_file) and source_dir != None:
|
||||
with ZipFile(target_file, mode='w') as zf:
|
||||
for path, dir_names, filenames in os.walk(source_dir):
|
||||
path = Path(path)
|
||||
arc_dir = path.relative_to(source_dir)
|
||||
y = 0
|
||||
for filename in filenames:
|
||||
y = y + 1
|
||||
print("打包中:" + str(y) + "/" + str(len(filenames)), os.path.join(source_dir, filename))
|
||||
zf.write(path.joinpath(filename), arc_dir.joinpath(filename))
|
||||
zf.close()
|
||||
logging.info(f"打包完成:{target_file}")
|
||||
|
||||
@classmethod
|
||||
def packComicChapterCBZ(cls, comic, chapter, remove=True):
|
||||
images_chapter_path = os.path.join(IMAGES_STORE, comic, chapter)
|
||||
cbz_chapter_path = os.path.join(CBZ_EXPORT_PATH, comic, chapter)+".CBZ"
|
||||
if os.path.exists(images_chapter_path):
|
||||
dirs = os.listdir(images_chapter_path)
|
||||
for file in dirs:
|
||||
if file.startswith("scramble="):
|
||||
try:
|
||||
os.remove(file)
|
||||
except:
|
||||
print(f"删除 {file} 发生错误,已跳过")
|
||||
return False
|
||||
cls.zip_compression(images_chapter_path, cbz_chapter_path)
|
||||
time.sleep(0.1)
|
||||
if remove: shutil.rmtree(images_chapter_path)
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def replaceZip(cls,filepath,unpack_dir=None):
|
||||
if not cls.compareFileDate(filepath): return None
|
||||
if unpack_dir == None:
|
||||
unpack_dir = str(filepath).split(".")[0]
|
||||
fz = ZipFile(filepath, 'r')
|
||||
for file in fz.namelist():
|
||||
if file.endswith(".jpg"):
|
||||
data = fz.read(file)
|
||||
if len(data) < 500 and os.path.exists(filepath):
|
||||
os.remove(filepath)
|
||||
print(f"数据不完整,已删除:{filepath}")
|
||||
if cls.compareFileDate(filepath):
|
||||
os.utime(filepath)
|
||||
print(f"已更新文件时间 {filepath}")
|
||||
if os.path.exists(unpack_dir):
|
||||
shutil.rmtree(unpack_dir)
|
||||
# 删除删除main.ftl文件
|
||||
#delete_filename = ''
|
||||
#if os.path.exists(delete_filename):
|
||||
# os.remove(delete_filename)
|
||||
# time.sleep(60)
|
||||
# shutil.copy(文件的路径,另一个目录);拷贝main.ftl到准备压缩的目录下
|
||||
#cls.zip_compression()
|
||||
#小于则运行
|
||||
@classmethod
|
||||
def compareFileDate(cls,filepath):
|
||||
if os.path.exists(filepath):
|
||||
ctime = os.path.getmtime(filepath)
|
||||
str_ctime = datetime.fromtimestamp(int(ctime))
|
||||
file_ctime = str(str_ctime.year)+"{:0>2d}".format(str_ctime.month)+"{:0>2d}".format(str_ctime.day)+"{:0>2d}".format(str_ctime.hour)
|
||||
c_ctime = 2023011603
|
||||
else:
|
||||
return False
|
||||
if int(file_ctime) < c_ctime:
|
||||
return True
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def zip_info(cls, path, filter=True):
|
||||
result = None
|
||||
try:
|
||||
with ZipFile(path, "r") as zip_file:
|
||||
result = zip_file.namelist()
|
||||
if filter:
|
||||
result.remove(COMIC_INFO_XML_FILE)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return result
|
||||
@ -1,248 +0,0 @@
|
||||
import json,re
|
||||
from opencc import OpenCC
|
||||
from queue import Queue
|
||||
from utils.OldUtils import OldUtils
|
||||
|
||||
class Comic:
|
||||
# ComicInfo.xml and ComicChapter.json bengin
|
||||
# value origin node dep required
|
||||
dict_chapter = [None,None,"Title","章节名",True]
|
||||
dict_comic_name = [None,None,"Series","漫画名",True]
|
||||
dict_number = [None,None,"Number","编号",True]
|
||||
dict_comic_names = [None,None,"SeriesGroup","别名",False]
|
||||
dict_dep = [None,None,"Summary","概述",True]
|
||||
dict_year = [None,None,"Year","年",False]
|
||||
dict_month = [None,None,"Month","月",False]
|
||||
dict_day = [None,None,"Day","日",False]
|
||||
dict_author = [None,None,"Writer","作者",True]
|
||||
dict_cbs = [None,None,"Publisher","出版社",False]
|
||||
dict_genre = [None,None,"Genre","流派",True]
|
||||
dict_tags = [None,None,"Tags","标签",True]
|
||||
dict_homepage = [None,None,"Web","主页",False]
|
||||
dict_page_count = [None,None,"PageCount","总页数",True]
|
||||
dict_language = [None,None,"LanguageISO","语言",True]
|
||||
dict_agerating = [None,None,"AgeRating","年龄分级",False]
|
||||
dict_pages = [None,None,"Pages","页码",True]
|
||||
CURRENT_DOWN_LINK = None
|
||||
# ComicInfo.xml and ComicChapter.json end
|
||||
dict_icon = [None,None,"Icon","图标",True]
|
||||
dict_chapter_imgs = [None,None,"ChapterImgs","图像",True]
|
||||
#主页
|
||||
dict_list_chapter = [None,None,"ListChapter","全部章节名",True]
|
||||
(update_at,current_chapter_img,file_chapter_imgs) = [None,None,None]
|
||||
|
||||
|
||||
#繁体中文转简体中文
|
||||
@classmethod
|
||||
def ChineseConvert(cls, text,convert='t2s'): return OpenCC(convert).convert(str(text))
|
||||
#处理成符合规定的文件名
|
||||
@classmethod
|
||||
def fixFileName(cls,filename,replace=None):
|
||||
if not isinstance(filename,str): return filename
|
||||
intab = r'[?*/\|.:><]'
|
||||
str_replace = ""
|
||||
if replace != None: str_replace = replace
|
||||
filename = re.sub(intab, str_replace, filename)
|
||||
count = 1
|
||||
while True:
|
||||
str_file = filename[0-count]
|
||||
if str_file == " ": count += 1
|
||||
else:
|
||||
filename = filename[0:len(filename)+1-count]
|
||||
break
|
||||
return filename
|
||||
|
||||
@classmethod
|
||||
def setValue(cls,value):
|
||||
if value != None: value = cls.ChineseConvert(value)
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def setField(cls,field,value,origin=True,convert=True):
|
||||
if value != None:
|
||||
if origin:
|
||||
field[1] = value
|
||||
if convert: value = cls.ChineseConvert(value)
|
||||
field[0] = value
|
||||
return field
|
||||
|
||||
@classmethod
|
||||
def getFieldValue(cls,field):
|
||||
if field == None: return None
|
||||
return field[0]
|
||||
@classmethod
|
||||
def setFieldOrigin(cls,filed,origin):
|
||||
filed[1] = origin
|
||||
return filed
|
||||
@classmethod
|
||||
def getFieldOrigin(cls,filed): return filed[1]
|
||||
@classmethod
|
||||
def getFieldNode(cls,filed): return filed[2]
|
||||
|
||||
@classmethod
|
||||
def getValue(cls,field,exec=None):
|
||||
if exec != None: return cls.parseExec(field,exec=exec)
|
||||
return field
|
||||
#章节名
|
||||
@classmethod
|
||||
def setChapterName(cls,value,exec=None):
|
||||
value = cls.fixFileName(cls.parseExec(value,exec=exec))
|
||||
OldUtils.setOldChapter(value)
|
||||
cls.dict_chapter = cls.setField(cls.dict_chapter,value)
|
||||
|
||||
@classmethod
|
||||
def getChapterName(cls): return cls.getFieldValue(cls.dict_chapter)
|
||||
@classmethod
|
||||
def getOriginChapterName(cls): return cls.getFieldOrigin(cls.dict_chapter)
|
||||
|
||||
#漫画名
|
||||
@classmethod
|
||||
def setComicName(cls,value,exec=None):
|
||||
value = cls.fixFileName(cls.parseExec(value,exec=exec))
|
||||
OldUtils.setOldComicName(value)
|
||||
cls.dict_comic_name = cls.setField(cls.dict_comic_name,value)
|
||||
|
||||
@classmethod
|
||||
def getComicName(cls): return cls.getFieldValue(cls.dict_comic_name)
|
||||
@classmethod
|
||||
def getOriginComicName(cls): return cls.getFieldOrigin(cls.dict_comic_name)
|
||||
#编号
|
||||
@classmethod
|
||||
def setNumber(cls,value): cls.dict_number = cls.setField(cls.dict_number,value)
|
||||
@classmethod
|
||||
def getNumber(cls): return cls.getFieldValue(cls.dict_number)
|
||||
#概述
|
||||
@classmethod
|
||||
def setDep(cls,value,exec=None):
|
||||
cls.dict_dep = cls.setField(cls.dict_dep,cls.parseExec(value,exec=exec))
|
||||
@classmethod
|
||||
def getDep(cls): return cls.getFieldValue(cls.dict_dep)
|
||||
#作者
|
||||
@classmethod
|
||||
def setAuthor(cls,value): cls.dict_author = cls.setField(cls.dict_author,value)
|
||||
@classmethod
|
||||
def getAuthor(cls): return cls.getFieldValue(cls.dict_author)
|
||||
#流派
|
||||
@classmethod
|
||||
def setGenre(cls,value): cls.dict_genre = cls.setField(cls.dict_genre,value)
|
||||
@classmethod
|
||||
def getGenre(cls): return cls.getFieldValue(cls.dict_genre)
|
||||
#语言
|
||||
@classmethod
|
||||
def setLanguage(cls,value): cls.dict_language = cls.setField(cls.dict_language,value)
|
||||
@classmethod
|
||||
def getLanguage(cls): return cls.getFieldValue(cls.dict_language)
|
||||
#年龄分级
|
||||
@classmethod
|
||||
def setAgeRating(cls,value): cls.dict_agerating = cls.setField(cls.dict_agerating,value)
|
||||
@classmethod
|
||||
def getAgeRating(cls): return cls.getFieldValue(cls.dict_agerating)
|
||||
#标签
|
||||
@classmethod
|
||||
def setTags(cls,value): cls.dict_tags = cls.setField(cls.dict_tags,value)
|
||||
@classmethod
|
||||
def getTags(cls): return cls.getFieldValue(cls.dict_tags)
|
||||
#总页数
|
||||
@classmethod
|
||||
def setPageCount(cls,value): cls.dict_page_count = cls.setField(cls.dict_page_count,value)
|
||||
@classmethod
|
||||
def getPageCount(cls): return cls.getFieldValue(cls.dict_page_count)
|
||||
|
||||
#------------------------------------------------------------------------
|
||||
@classmethod
|
||||
def parseExec(cls,data,exec,item=True):
|
||||
if data !=None and exec != None:
|
||||
dots = str(exec).split(".")
|
||||
if not isinstance(data,dict): data = json.loads(data)
|
||||
for dot in dots:
|
||||
data = data.get(dot)
|
||||
return data
|
||||
@classmethod
|
||||
def setHomePage(cls,value): cls.dict_homepage = cls.setField(cls.dict_homepage,value)
|
||||
@classmethod
|
||||
def getHomePage(cls): return cls.getFieldValue(cls.dict_homepage)
|
||||
@classmethod
|
||||
def setIcon(cls,value): cls.dict_icon = cls.setField(cls.dict_icon,value,convert=False)
|
||||
@classmethod
|
||||
def getIcon(cls): return cls.getFieldValue(cls.dict_icon)
|
||||
@classmethod
|
||||
def setListChapter(cls,value): cls.dict_list_chapter = cls.setField(cls.dict_list_chapter,value,convert=False)
|
||||
@classmethod
|
||||
def getListChapter(cls): return cls.getFieldValue(cls.dict_list_chapter)
|
||||
@classmethod
|
||||
def getLenChapters(cls): return len(cls.getListChapter())
|
||||
@classmethod
|
||||
def setChapterImgs(cls,value,exec=None,item=None):
|
||||
cls.dict_chapter_imgs = cls.setField(cls.dict_chapter_imgs,cls.parseExec(value,exec=exec,item=item),convert=False)
|
||||
@classmethod
|
||||
def getChapterImgs(cls): return cls.getFieldValue(cls.dict_chapter_imgs)
|
||||
@classmethod
|
||||
def setUpdateAt(cls,value): cls.update_at = value
|
||||
@classmethod
|
||||
def getUpdateAt(cls): return cls.update_at
|
||||
@classmethod
|
||||
def setCurrentChapterImg(cls,value): cls.current_chapter_img = value
|
||||
@classmethod
|
||||
def getCurrentChapterImg(cls): return cls.current_chapter_img
|
||||
@classmethod
|
||||
def setChapterFilesName(cls,value): cls.file_chapter_imgs= value
|
||||
@classmethod
|
||||
def getChapterFilesName(cls): return cls.file_chapter_imgs
|
||||
@classmethod
|
||||
def setCurrentDownLink(cls,value): cls.CURRENT_DOWN_LINK = value
|
||||
@classmethod
|
||||
def getCurrentDownLink(cls): return cls.CURRENT_DOWN_LINK
|
||||
|
||||
class ListComic:
|
||||
LIST_COMIC_QUEUE = Queue()
|
||||
(LIST_COMIC_NAME,LIST_COMIC_LINK,LIST_COMIC_UPDATEAT) = [None,None,None]
|
||||
|
||||
@classmethod
|
||||
def setListComicsLinksUpdateAt(cls,names,links,update_at):
|
||||
if isinstance(names,list) and isinstance(links,list) and isinstance(update_at,list):
|
||||
for x in range(0,len(names)):
|
||||
cls.LIST_COMIC_QUEUE.put([names[x],links[x],update_at[x]])
|
||||
@classmethod
|
||||
def getListComicsLinksUpdateAt(cls):
|
||||
result = None
|
||||
if cls.LIST_COMIC_NAME != None and cls.LIST_COMIC_LINK != None:
|
||||
cls.setListComicsLinksUpdateAt(cls.LIST_COMIC_NAME,cls.LIST_COMIC_LINK,cls.LIST_COMIC_UPDATEAT)
|
||||
(cls.LIST_COMIC_NAME,cls.LIST_COMIC_LINK,cls.LIST_COMIC_UPDATEAT) = [None,None,None]
|
||||
if not cls.LIST_COMIC_QUEUE.empty(): result = cls.LIST_COMIC_QUEUE.get(False)
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def addListComicChapterLink(cls,name,link,update_at):
|
||||
if name != None and link != None:
|
||||
cls.LIST_COMIC_QUEUE.put(name,link,update_at)
|
||||
|
||||
@classmethod
|
||||
def getListValue(cls,result,type,start_add=None,result_type="list"):
|
||||
if result == None: return None
|
||||
if type == None: return result
|
||||
if result_type == "list" and type != None:
|
||||
data = []
|
||||
for x in range(0, len(result)):
|
||||
if start_add != None:
|
||||
data.append(start_add+result[x].get(type))
|
||||
else:
|
||||
data.append(result[x].get(type))
|
||||
return data
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def setListComicName(cls,value,type=None): cls.LIST_COMIC_NAME = cls.getListValue(value,type)
|
||||
@classmethod
|
||||
def getListComicName(cls): return cls.LIST_COMIC_NAME
|
||||
@classmethod
|
||||
def setListComicChapterLink(cls,value,type=None,start_add=None): cls.LIST_COMIC_LINK = cls.getListValue(value,type,start_add)
|
||||
@classmethod
|
||||
def getListComicChapterLink(cls): return cls.LIST_COMIC_LINK
|
||||
@classmethod
|
||||
def setListComicUpdateAt(cls,value,type=None): cls.LIST_COMIC_UPDATEAT = cls.getListValue(value,type)
|
||||
@classmethod
|
||||
def getListComicUpdateAt(cls): return cls.LIST_COMIC_UPDATEAT
|
||||
@classmethod
|
||||
def getListComicChapterLink(cls): return cls.LIST_COMIC_QUEUE.get(False)
|
||||
|
||||
#domain end....
|
||||
@ -1,41 +1,14 @@
|
||||
import json,os
|
||||
import logging
|
||||
from xml.dom.minidom import Document
|
||||
from Comics.utils.Comic import Comic
|
||||
from Comics.utils.Constant import ComicPath
|
||||
from itemadapter import is_item, ItemAdapter
|
||||
|
||||
class ComicInfoEntity:
|
||||
@classmethod
|
||||
def getNodes(cls):
|
||||
return [Comic.dict_chapter,Comic.dict_comic_name,Comic.dict_number,Comic.dict_comic_names,
|
||||
Comic.dict_dep,Comic.dict_year,Comic.dict_month,Comic.dict_day,Comic.dict_author,
|
||||
Comic.dict_cbs,Comic.dict_genre,Comic.dict_tags,Comic.dict_page_count,
|
||||
Comic.dict_language,Comic.dict_agerating,Comic.dict_pages]
|
||||
@classmethod
|
||||
def getJsonNodes(cls):
|
||||
return [Comic.dict_chapter,Comic.dict_comic_name,Comic.dict_icon,Comic.dict_number,
|
||||
Comic.dict_comic_names,
|
||||
Comic.dict_dep,Comic.dict_year,Comic.dict_month,Comic.dict_day,Comic.dict_author,
|
||||
Comic.dict_cbs,Comic.dict_genre,Comic.dict_tags,Comic.dict_page_count,
|
||||
Comic.dict_language,Comic.dict_agerating,Comic.dict_pages,
|
||||
Comic.dict_list_chapter,Comic.dict_chapter_imgs]
|
||||
|
||||
class ComicInfo:
|
||||
IS_NEW_ICON = False
|
||||
document = Document()
|
||||
path_comic_info = None
|
||||
|
||||
@classmethod
|
||||
def parseExec(cls,data,exec,start_add=None,item=True):
|
||||
if data !=None and exec != None:
|
||||
dots = str(exec).split(".")
|
||||
if not isinstance(data,dict): data = json.loads(data)
|
||||
for dot in dots:
|
||||
data = data.get(dot)
|
||||
if start_add != None and data != None:
|
||||
data = start_add+data
|
||||
return data
|
||||
|
||||
@classmethod
|
||||
def setNodeAndValue(cls,node,value):
|
||||
if value != None:
|
||||
@ -50,12 +23,12 @@ class ComicInfo:
|
||||
#页数
|
||||
@classmethod
|
||||
def setPages(cls,values=None):
|
||||
if values == None: values = Comic.getChapterFilesName()
|
||||
#if values == None: values = Comic.getChapterFilesName()
|
||||
if values != None and isinstance(values,list):
|
||||
suffix = "."+str(values[0]).split(".")[-1]
|
||||
join_list=",".join(values).replace(suffix,"")
|
||||
values = join_list.split(",")
|
||||
Comic.setPageCount(len(values)+1 if cls.IS_NEW_ICON else len(values))
|
||||
#Comic.setPageCount(len(values)+1 if cls.IS_NEW_ICON else len(values))
|
||||
root_node = cls.document.createElement("Pages")
|
||||
if cls.IS_NEW_ICON:
|
||||
#添加封面
|
||||
@ -68,12 +41,12 @@ class ComicInfo:
|
||||
page = page.split("_")[-1]
|
||||
c_node.setAttribute("Image",page)
|
||||
root_node.appendChild(c_node)
|
||||
Comic.dict_pages = Comic.setField(Comic.dict_pages,root_node,convert=False)
|
||||
#Comic.dict_pages = Comic.setField(Comic.dict_pages,root_node,convert=False)
|
||||
|
||||
@classmethod
|
||||
def getBaseUrl(cls,url=None):
|
||||
if url == None:
|
||||
url = Comic.getHomePage()
|
||||
#if url == None:
|
||||
# url = Comic.getHomePage()
|
||||
(num,index) = [3,0]
|
||||
for x in range(0, num):
|
||||
index = str(url).find("/",index)+1
|
||||
@ -84,24 +57,30 @@ class ComicInfo:
|
||||
def root_node(cls,root_value): return cls.document.createElement(root_value)
|
||||
|
||||
@classmethod
|
||||
def add_nodes(cls,root,list_value):
|
||||
if len(list_value) == 0: return list_value
|
||||
for value in list_value:
|
||||
#Comic.chapter
|
||||
if value[0] == None and value[4]:
|
||||
#数据为空 value[0], 但不允许为空value[4] = False
|
||||
msg = f"#数据为空 key={value[3]} value[0]={value[0]}, 但不允许为空value[4]={value[4]}"
|
||||
logger.error(msg)
|
||||
exit()
|
||||
if value[0] != None: root.appendChild(cls.setNodeAndValue(value[2],value[0]))
|
||||
def add_nodes(cls,root,item):
|
||||
item = ItemAdapter(item)
|
||||
keys = item.keys()
|
||||
files = item.field_names()
|
||||
values = item.values()
|
||||
print("test")
|
||||
#if len(list_value) == 0: return list_value
|
||||
#for value in list_value:
|
||||
# #Comic.chapter
|
||||
# if value[0] == None and value[4]:
|
||||
# #数据为空 value[0], 但不允许为空value[4] = False
|
||||
# msg = f"#数据为空 key={value[3]} value[0]={value[0]}, 但不允许为空value[4]={value[4]}"
|
||||
# logging.error(msg)
|
||||
# exit()
|
||||
# if value[0] != None: root.appendChild(cls.setNodeAndValue(value[2],value[0]))
|
||||
|
||||
@classmethod
|
||||
def initComicInfoXML(cls):
|
||||
cls.setPages()
|
||||
|
||||
@classmethod
|
||||
def writeComicInfoXML(cls,overlay=False):
|
||||
save_path = ComicPath.getPathComicInfoXML()
|
||||
def writeComicInfoXML(cls,item,overlay=False):
|
||||
#save_path = ComicPath.getPathComicInfoXML()
|
||||
save_path = "ComicInfo.xml"
|
||||
if os.path.exists(save_path):
|
||||
if overlay:
|
||||
os.remove(save_path)
|
||||
@ -113,44 +92,8 @@ class ComicInfo:
|
||||
root = cls.root_node("ComicInfo")
|
||||
new_document = Document()
|
||||
new_document.appendChild(root)
|
||||
cls.add_nodes(root,ComicInfoEntity.getNodes())
|
||||
cls.add_nodes(root, item)
|
||||
with open(save_path, "w", encoding="utf-8") as fo:
|
||||
new_document.writexml(fo, indent='', addindent='\t', newl='\n', encoding="utf-8")
|
||||
fo.close()
|
||||
logging.info(f"已生成文件... {save_path}")
|
||||
|
||||
@classmethod
|
||||
def setComicInfo(cls,comicname=None,homepage=None,alias=None,author=None,icon=None,tags=None,
|
||||
dep=None,genre=None,lang=None,age_rating=None,chapters=None,current_chapter_img=None):
|
||||
author = ",".join(set(str(str(author).replace("&",",").replace(" ",",")).split(",")))
|
||||
Comic.setHomePage(homepage)
|
||||
Comic.setIcon(icon)
|
||||
Comic.setListChapter(chapters)
|
||||
#Comic.setUpdateAt(update_at)
|
||||
Comic.setComicName(str(comicname))
|
||||
#if alias != None: comicInfo.setComicNames(alias)
|
||||
Comic.setAuthor(author)
|
||||
Comic.setTags(tags)
|
||||
Comic.setDep(dep)
|
||||
#comicInfo.setCBS("韩漫")
|
||||
if genre != None: Comic.setGenre(genre)
|
||||
Comic.setLanguage(lang)
|
||||
Comic.setAgeRating(age_rating)
|
||||
Comic.setCurrentChapterImg(current_chapter_img)
|
||||
|
||||
@classmethod
|
||||
def writeJson(cls):
|
||||
dict_data = {}
|
||||
nodes = ComicInfoEntity.getJsonNodes()
|
||||
for node in nodes:
|
||||
key = Comic.getFieldNode(node)
|
||||
value = Comic.getFieldOrigin(node)
|
||||
if isinstance(value,list):
|
||||
value = ",".join(value)
|
||||
if key != None and isinstance(value,str):
|
||||
child_dict = { key : value}
|
||||
dict_data.update(child_dict)
|
||||
s = json.dumps(dict_data,ensure_ascii=True)
|
||||
logging.debug(f"json={s}")
|
||||
with open(ComicPath.getPathConfComicChapterJson(mkdir=True),"w") as fs:
|
||||
fs.write(s)
|
||||
logging.info(f"已生成文件... {save_path}")
|
||||
@ -1,5 +1,7 @@
|
||||
import os.path
|
||||
import re
|
||||
from opencc import OpenCC
|
||||
|
||||
from Comics.settings import IMAGES_STORE
|
||||
class ComicPath:
|
||||
@classmethod
|
||||
def getDirComicChapter(cls):
|
||||
@ -13,4 +15,24 @@ class ComicPath:
|
||||
|
||||
#繁体中文转简体中文
|
||||
@classmethod
|
||||
def ChineseConvert(cls, text,convert='t2s'): return OpenCC(convert).convert(str(text))
|
||||
def chinese_convert(cls, text,convert='t2s'): return OpenCC(convert).convert(str(text))
|
||||
|
||||
#处理成符合规定的文件名
|
||||
@classmethod
|
||||
def fix_file_name(cls, filename, replace=None):
|
||||
if not isinstance(filename, str):
|
||||
return filename
|
||||
in_tab = r'[?*/\|.:><]'
|
||||
str_replace = ""
|
||||
if replace is not None:
|
||||
str_replace = replace
|
||||
filename = re.sub(in_tab, str_replace, filename)
|
||||
count = 1
|
||||
while True:
|
||||
str_file = filename[0-count]
|
||||
if str_file == " ":
|
||||
count += 1
|
||||
else:
|
||||
filename = filename[0:len(filename)+1-count]
|
||||
break
|
||||
return filename
|
||||
@ -2,7 +2,17 @@ import base64,hashlib,os,shutil
|
||||
import math,time,json,datetime,logging
|
||||
from PIL import Image
|
||||
from tinydb import TinyDB, Query
|
||||
from Comics.spiders.utils.Constant import ComicPath
|
||||
from Comics.utils.Constant import ComicPath
|
||||
|
||||
class fileUtils:
|
||||
@classmethod
|
||||
def save_file(cls,path,data):
|
||||
dir = os.path.dirname(path)
|
||||
if not os.path.exists(dir):
|
||||
os.makedirs(dir)
|
||||
with open(path,'w',encoding='utf-8') as fs:
|
||||
fs.write(str(data))
|
||||
fs.close()
|
||||
|
||||
class CommonUtils:
|
||||
@classmethod
|
||||
@ -31,11 +41,9 @@ class imageUtils:
|
||||
@classmethod
|
||||
def deScrambleImagesByPath(cls,img_path,img_save=None):
|
||||
if os.path.basename(img_path).startswith("scramble="):
|
||||
imageUtils.encode_scramble_image(img_path,img_save)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
img_path = imageUtils.encode_scramble_image(img_path,img_save)
|
||||
return img_path
|
||||
|
||||
@classmethod
|
||||
def encodeImage(cls,str_en):
|
||||
#print("en",str_en)
|
||||
@ -223,4 +231,5 @@ class imageUtils:
|
||||
print("解密成功=",save_path)
|
||||
if os.path.exists(imgpath):
|
||||
os.remove(imgpath)
|
||||
print("remove=",imgpath)
|
||||
print("remove=",imgpath)
|
||||
return save_path
|
||||
Loading…
Reference in New Issue
Block a user