fix
This commit is contained in:
parent
76c7aa69a8
commit
38deb27d7e
@ -1,5 +1,4 @@
|
||||
import os.path,json,ast
|
||||
|
||||
from Comics.settings import COMIC_INFO_FIELDS_TO_EXPORT
|
||||
from scrapy.exporters import XmlItemExporter
|
||||
from scrapy.exporters import PythonItemExporter
|
||||
@ -11,6 +10,16 @@ from Comics.utils.Constant import ComicPath
|
||||
from scrapy.utils.python import is_listlike, to_bytes, to_unicode
|
||||
from itemadapter import ItemAdapter
|
||||
|
||||
class ItemImport():
|
||||
def import_obj(self, file):
|
||||
if os.path.exists(file):
|
||||
with open(file, "r", encoding="utf-8") as fs:
|
||||
result = fs.read()
|
||||
fs.close()
|
||||
return result
|
||||
else:
|
||||
return []
|
||||
|
||||
class CommonExporter():
|
||||
def getPath(self, file , sufix=None):
|
||||
sufix = "."+sufix
|
||||
|
||||
@ -9,20 +9,14 @@ from Comics.utils.FileUtils import imageUtils
|
||||
from itemloaders.processors import TakeFirst, MapCompose, Join
|
||||
from scrapy.spiders import Spider
|
||||
|
||||
def current_project():
|
||||
return Spider.name
|
||||
def current_project(): return Spider.name
|
||||
|
||||
def serialize_to_chinese(value):
|
||||
return ComicPath.chinese_convert(value)
|
||||
def serialize_to_chinese(value): return ComicPath.chinese_convert(value)
|
||||
|
||||
def serialize_to_fix_file(value):
|
||||
file = ComicPath.chinese_convert(value)
|
||||
return ComicPath.fix_file_name(file)
|
||||
def serialize_to_fix_file(value): return ComicPath.fix_file_name(ComicPath.chinese_convert(value))
|
||||
|
||||
def _serialize_to_images(value, result_type=None):
|
||||
count = 1
|
||||
images_item = []
|
||||
image_urls = []
|
||||
(count, images_item, image_urls) = [1,[],[]]
|
||||
for image in value:
|
||||
try:
|
||||
(image_src, scramble) = [image.get("src"), image.get("scramble")]
|
||||
@ -52,12 +46,6 @@ def serialize_to_images(value): return _serialize_to_images(value)
|
||||
|
||||
def serialize_to_image_urls(value): return _serialize_to_images(value, result_type="image_urls")
|
||||
|
||||
|
||||
class ListComicItem(Item):
|
||||
name = Field()
|
||||
link = Field()
|
||||
|
||||
|
||||
class ComicItem(Item):
|
||||
# 工程
|
||||
current_project = Field()
|
||||
@ -103,8 +91,7 @@ class ImagesItem(Item):
|
||||
comic = Field()
|
||||
|
||||
def serializer_info_writer(value):
|
||||
list_value = []
|
||||
value = str(value).replace("&", " ")
|
||||
(list_value, value) = [[], str(value).replace("&", " ")]
|
||||
for v in set(str(value).split(" ")):
|
||||
list_value.append(v)
|
||||
return ",".join(list_value)
|
||||
@ -139,11 +126,9 @@ def serializer_info_images(value): return _serialize_info_images(value)
|
||||
|
||||
def serializer_info_images_count(value): return _serialize_info_images(value, "count")
|
||||
|
||||
def serializer_info_images_completed(value):
|
||||
return _serialize_info_images(value, result_type='name')
|
||||
def serializer_info_images_completed(value): return _serialize_info_images(value, result_type='name')
|
||||
|
||||
def serializer_info_images_count(value):
|
||||
return _serialize_info_images(value, result_type='len')
|
||||
def serializer_info_images_count(value): return _serialize_info_images(value, result_type='len')
|
||||
|
||||
|
||||
class ComicInfoItem(Item):
|
||||
|
||||
@ -11,10 +11,11 @@ from Comics.settings import CBZ_EXPORT_PATH,OUTPUT_DIR,PROJECT_KEY
|
||||
from Comics.utils.Constant import ComicPath
|
||||
from Comics.items import ComicItem
|
||||
from scrapy.pipelines.images import ImagesPipeline
|
||||
from Comics.exporters import ComicInfoXmlItemExporter,JsonExport,ItemExporter
|
||||
from Comics.exporters import ComicInfoXmlItemExporter,JsonExport,ItemExporter, ItemImport
|
||||
from Comics.utils.FileUtils import CBZUtils
|
||||
from Comics.utils.FileUtils import fileUtils as fu
|
||||
from Comics.loader import ComicEntity
|
||||
from Comics.utils.ComicUtils import checkUtils
|
||||
|
||||
class ComicsPipeline:
|
||||
def open_spider(self, spider):
|
||||
@ -22,6 +23,9 @@ class ComicsPipeline:
|
||||
|
||||
# item就是yield后面的对象
|
||||
def process_item(self, item, spider):
|
||||
|
||||
checkUtils().export_error(item)
|
||||
|
||||
if isinstance(item, ComicItem):
|
||||
# 'output/rm_comic/json/壞X/第1話 壞X'
|
||||
if os.path.exists(ComicPath.CBZ(item=item)):
|
||||
@ -112,7 +116,9 @@ class ImgDownloadPipeline(ImagesPipeline):
|
||||
comic_info_images= comic_info['Pages'], remove=True):
|
||||
self.update_icon(item)
|
||||
self.pack_icon(item)
|
||||
|
||||
# CBZ校验失败
|
||||
else:
|
||||
checkUtils().export_error(item)
|
||||
#sleep_time = random.randint(3,15)
|
||||
#print(f'等待{sleep_time}秒后进行下一章节')
|
||||
#time.sleep(int(sleep_time))
|
||||
@ -1,10 +1,8 @@
|
||||
import scrapy,logging,time,os
|
||||
from Comics.items import ComicItem
|
||||
from Comics.loader import ComicLoader
|
||||
from Comics.items import ListComicItem
|
||||
from Comics.utils.Constant import ComicPath
|
||||
from Comics.settings import PROJECT_KEY
|
||||
import skip
|
||||
from Comics.utils.ComicUtils import checkUtils
|
||||
|
||||
class RmComicSpider(scrapy.Spider):
|
||||
name = 'rm_comic'
|
||||
@ -47,11 +45,12 @@ class RmComicSpider(scrapy.Spider):
|
||||
comic_item.chapter(value=chapter)
|
||||
item = comic_item.load_item()
|
||||
cbz_path = ComicPath.get_file_path(item=item, result_type="cbz", convert=True)
|
||||
if os.path.exists(cbz_path):
|
||||
logging.info(f"漫画 {cbz_path} 已存在, 跳过中...")
|
||||
yield item
|
||||
else:
|
||||
yield scrapy.Request(self.main_url+link, meta={'item': item}, callback=self.parse_chapter)
|
||||
if not checkUtils().is_error(item):
|
||||
if os.path.exists(cbz_path):
|
||||
logging.info(f"漫画 {cbz_path} 已存在, 跳过中...")
|
||||
yield item
|
||||
else:
|
||||
yield scrapy.Request(self.main_url+link, meta={'item': item}, callback=self.parse_chapter)
|
||||
|
||||
|
||||
# 读取某章节下的所有图片
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
import scrapy,logging,time,os
|
||||
from Comics.items import ComicItem
|
||||
from Comics.loader import ComicLoader
|
||||
from Comics.items import ListComicItem
|
||||
from Comics.utils.Constant import ComicPath
|
||||
from Comics.settings import PROJECT_KEY
|
||||
import skip
|
||||
|
||||
32
Comics/utils/ComicUtils.py
Normal file
32
Comics/utils/ComicUtils.py
Normal file
@ -0,0 +1,32 @@
|
||||
import os,json
|
||||
from Comics.settings import CBZ_EXPORT_PATH,OUTPUT_DIR,PROJECT_KEY
|
||||
from Comics.utils.Constant import ComicPath
|
||||
from Comics.exporters import ComicInfoXmlItemExporter,JsonExport,ItemExporter, ItemImport
|
||||
from Comics.utils.FileUtils import fileUtils as fu
|
||||
from Comics.loader import ComicEntity
|
||||
|
||||
class checkUtils:
|
||||
#
|
||||
# 检测某一章节是否连续错误
|
||||
def export_error(self, item):
|
||||
file = os.path.join(OUTPUT_DIR, item[PROJECT_KEY][0], "error_comics.json")
|
||||
error_comic = ItemImport().import_obj(file)
|
||||
error_comic.append({ "name" : item['name'], "chapter" : item['chapter'], "date" : ComicPath().getYearMonthDay()})
|
||||
fu.save_file(file, json.dumps(error_comic))
|
||||
|
||||
def is_error(self, item):
|
||||
file = os.path.join(OUTPUT_DIR, item[PROJECT_KEY][0], "error_comics.json")
|
||||
error_comic = ItemImport().import_obj(file)
|
||||
try:
|
||||
for error_c in eval(error_comic):
|
||||
(name, chatper, date) = [error_c['name'], error_c['chapter'], error_c['date']]
|
||||
if item['name'] == name and item['chapter'] == chatper:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1,9 +1,16 @@
|
||||
import os.path,logging
|
||||
import re,requests,time
|
||||
from datetime import date
|
||||
from Comics import settings
|
||||
from opencc import OpenCC
|
||||
class ComicPath:
|
||||
PREFIX_SCRAMBLE = "scramble="
|
||||
|
||||
@classmethod
|
||||
def getYearMonthDay(cls):
|
||||
today = date.today()
|
||||
# 格式化为年-月-日
|
||||
return today.strftime("%Y%m%d")
|
||||
|
||||
@classmethod
|
||||
def getDirComicChapter(cls):
|
||||
|
||||
@ -10,9 +10,9 @@ from Comics.utils.Constant import ntfy
|
||||
class fileUtils:
|
||||
@classmethod
|
||||
def save_file(cls,path,data):
|
||||
dir = os.path.dirname(path)
|
||||
if not os.path.exists(dir):
|
||||
os.makedirs(dir)
|
||||
root_dir = os.path.dirname(path)
|
||||
if not os.path.exists(root_dir):
|
||||
os.makedirs(root_dir)
|
||||
with open(path,'w',encoding='utf-8') as fs:
|
||||
fs.write(str(data))
|
||||
fs.close()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user