This commit is contained in:
caiwx86 2024-02-20 21:08:13 +08:00
parent 2a9820949b
commit c78fa7e47d
14 changed files with 424 additions and 260 deletions

1
.gitignore vendored
View File

@ -1,5 +1,6 @@
.scrapy/*
.vscode/*
.DS_Store
CBZ/*
output/*
/**/__pycache__

View File

View File

@ -6,20 +6,10 @@ from scrapy.exporters import JsonItemExporter
from Comics.items import ComicInfoItem
from Comics.items import ComicItem
from Comics.settings import COMIC_INFO_XML_STORE
from Comics.utils.Constant import ComicPath
from Comics.utils import ComicPath
from scrapy.utils.python import is_listlike, to_bytes, to_unicode
from itemadapter import ItemAdapter
class ItemImport():
def import_obj(self, file):
if os.path.exists(file):
with open(file, "r", encoding="utf-8") as fs:
result = fs.read()
fs.close()
return result
else:
return []
class CommonExporter():
def getPath(self, file , sufix=None):
sufix = "."+sufix

View File

@ -4,9 +4,9 @@
# https://docs.org/en/latest/topics/items.html
import os,Comics.settings as settings,logging
from scrapy.item import Item, Field
from Comics.utils.Constant import ComicPath
from Comics.utils.FileUtils import imageUtils
from itemloaders.processors import TakeFirst, MapCompose, Join
from Comics.utils import ComicPath
from Comics.utils import imageUtils
from itemloaders.processors import TakeFirst
# 繁体中文转为简体中文
def serialize_to_chinese(value): return ComicPath.chinese_convert(value)
@ -87,6 +87,11 @@ class ComicItem(Item):
# 图像名
images_name = Field()
#章节链接
chapter_href = Field()
#章节API
chapter_api = Field()
# 序列化-作者
def serializer_info_writer(value):
(list_value, value) = [[], str(value).replace("&", " ")]

View File

@ -8,7 +8,8 @@ class ComicLoader(ItemLoader):
dots = str(exec).split(".")
if not isinstance(data,dict): data = json.loads(data)
for dot in dots:
data = data.get(dot)
if data != None: data = data.get(dot)
logging.debug(f"data= {data} dot={dot}")
return data
def add_xpath(self, field_name, xpath, *processors, index=None, exec=None, re=None, is_null=None, **kw):
@ -102,6 +103,29 @@ class ComicLoader(ItemLoader):
# 图像链接
def image_urls(self, value=None, xpath=None, index=None, sexec=None): self.set_properties('image_urls', value, xpath, index, sexec)
def get_output_value(self, field_name):
value = super().get_output_value(field_name)
try:
if isinstance(value, list) and len(value) == 1 : value = value[0]
except:
print(f"get_output_value value={value} type={type(value)}")
return value
# 漫画名称
def get_name(self): return self.get_output_value("name")
# 漫画章节
def get_chapter(self): return self.get_output_value("chapter")
# 工程名
def get_project_name(self): return self.get_output_value(PROJECT_KEY)
# 章节链接
def get_chapter_href(self): return self.get_output_value("chapter_href")
# 全部章节
def get_chapters(self): return self.get_output_value("chapters")
def get_chapter_api(self): return self.get_output_value("chapter_api")
def get_image_urls(self): return self.get_output_value("image_urls")
class ComicEntity:
ENTITY = None

View File

@ -9,11 +9,11 @@ import os,scrapy,logging
from Comics import settings
from Comics.items import ComicItem
from Comics.settings import OUTPUT_DIR
from Comics.loader import ComicEntity
from Comics.loader import ComicEntity,ComicLoader
from Comics.exporters import ComicInfoXmlItemExporter
from Comics.utils.FileUtils import CBZUtils,fileUtils as fu
from Comics.utils.Constant import ComicPath
from Comics.utils.ComicUtils import checkUtils
from Comics.utils import CBZUtils,fileUtils as fu
from Comics.utils import ComicPath
from Comics.utils import checkUtils
from Comics.exporters import JsonExport,ItemExporter
from scrapy.pipelines.images import ImagesPipeline
@ -24,12 +24,14 @@ class ComicsPipeline():
# item就是yield后面的对象
def process_item(self, item, spider):
if isinstance(item, ComicItem):
# item = ComicEntity(item).item()
# 'output/rm_comic/json/壞X/第1話 壞X'
if fu.exists(ComicPath.path_cbz(item=item)):
return ItemExporter().export_obj(item)
# 已存在漫画CBZ文件 调用转换
if fu.exists(ComicPath.path_cbz(item=item)): return ItemExporter().export_obj(item)
else:
file = os.path.join(OUTPUT_DIR, spider.name, "json", item['name'], item['chapter'])
return JsonExport(file=file).export_json(ComicEntity(item).item(), if_return=True)
# 不存在漫画CBZ文件
#file = os.path.join(OUTPUT_DIR, spider.name, "json", item['name'], item['chapter'])
return JsonExport(file=ComicPath.getDirJosnComicChapter(item)).export_json(ComicEntity(item).item(), if_return=True)
# image解析
def close_spider(self, spider):
@ -102,6 +104,11 @@ class ImgDownloadPipeline(ImagesPipeline):
# return item
# 打包
cbz_path = self.get_file_path(item, result_type="cbz")
success_data = []
for result in results:
if result[0]: success_data.append(result[1])
image_urls = ComicLoader(item=item).get_image_urls()
if len(success_data) != len(image_urls): return
if fu.exists(cbz_path):
self.update_icon(item)
self.pack_icon(item)

View File

@ -26,7 +26,7 @@ ROBOTSTXT_OBEY = False
HTTPERROR_ALLOWED_CODES = [ 200 , 403]
# Configure maximum concurrent requests performed by Scrapy (default: 16)
CONCURRENT_REQUESTS = 16
CONCURRENT_REQUESTS = 8
# Configure a delay for requests for the same website (default: 0)
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
@ -45,7 +45,7 @@ RETRY_HTTP_CODES = [408, 401]
CONCURRENT_REQUESTS_PER_DOMAIN = 16
CONCURRENT_REQUESTS_PER_IP = 16
PROXY_LIST = [
"http://127.0.0.1:7890",
# "http://127.0.0.1:7890",
# "http://10.0.10.117:8123",
]
# Disable cookies (enabled by default)

View File

@ -1,15 +1,17 @@
import scrapy,logging,time,os,skip
from Comics.items import ComicItem
from Comics.loader import ComicLoader
from Comics.utils.Constant import ComicPath
from Comics.utils.ComicUtils import checkUtils
from Comics.utils import ComicPath
from Comics.utils import checkUtils
from Comics.utils import Conf
class RmComicSpider(scrapy.Spider):
name = 'rm_comic'
allowed_domains = ['roum1.xyz']
allowed_domains = ['roum12.xyz']
main_url = 'https://'+allowed_domains[0]
start_urls = main_url+'/books'
# 遍历网站页数
def start_requests(self):
for x in range(0,60):
yield scrapy.Request(self.start_urls+"?&page="+str(x), callback=self.books_comic)
@ -17,39 +19,22 @@ class RmComicSpider(scrapy.Spider):
# 获取多个漫画信息
def books_comic(self, response):
comics = ComicLoader(item=ComicItem(), response=response)
data = comics.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0]
for book in comics.get_exec(data, str_exec="props.pageProps.books"):
comics.add_value('link', self.start_urls+"/"+book['id'])
for book in comics.get_exec(comics.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0], str_exec="props.pageProps.books"):
if book['name'] not in skip.skip_comic:
yield scrapy.Request(url=self.start_urls+"/"+book['id'], callback=self.parse_comic)
# 获取某个漫画的相关数据
# 获取到多个章节链接后进入下个流程
def parse_comic(self, response):
comic_item = ComicLoader(item=ComicItem(), response=response)
comic_item.project_name(self.name)
comic_item.name(xpath='//div[@class="col"]/h5/text()')
comic_item.icon(xpath='//img[@class="img-thumbnail"]/@src')
comic_item.author(xpath='//div[contains(@class,"bookid_bookInfo")]/p[1]/text()', index=1)
comic_item.tags(xpath='//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()')
comic_item.dep(xpath='//div[contains(@class,"bookid_bookInfo")]/p[4]/text()', index=1)
comic_item.date(xpath='//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()', index=1)
comic_item.genre(value="韩漫")
comic_item.age_rating(value="R18+")
chapter_href = comic_item.get_xpath('//div[contains(@class,"bookid_chapterBox")]'
'//div[contains(@class,"bookid_chapter")]/a/@href')
chapters = comic_item.get_xpath('//div[contains(@class,"bookid_chapterBox")]'
'//div[contains(@class,"bookid_chapter")]/a/text()')
for chapter, link in zip(chapters, chapter_href):
comic_item.chapters(value=chapters)
comic_item.chapter(value=chapter)
comic_item = Conf().comic(self.name, ComicLoader(ComicItem(), response))
for chapter, link in zip(comic_item.get_chapters(), comic_item.get_chapter_href()):
item = comic_item.load_item()
cbz_path = ComicPath.get_file_path(item=item, result_type="cbz", convert=True)
if not checkUtils().is_error(item):
if os.path.exists(cbz_path):
cbz_path = ComicPath.get_file_path(item=item, result_type="cbz", convert=True, chapter=chapter)
if not checkUtils().is_error(item) and os.path.exists(cbz_path):
logging.info(f"漫画 {cbz_path} 已存在, 跳过中...")
yield item
else:
# 开始访问章节链接并跳转到self.parse_chapter
yield scrapy.Request(self.main_url+link, meta={'item': item}, callback=self.parse_chapter)
@ -57,26 +42,22 @@ class RmComicSpider(scrapy.Spider):
def parse_chapter(self, response):
comic_item = ComicLoader(item=response.meta['item'], response=response)
data = comic_item.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0]
str_exec = "props.pageProps."
comic_item.name(value=data, sexec=str_exec+"bookName")
comic_item.dep(value=data, sexec=str_exec+"description")
comic_item.chapter(value=data, sexec=str_exec+"chapterName")
comic_item.image_urls(value=data, sexec=str_exec+"images")
comic_item.images(value=data, sexec=str_exec+"images")
comic = comic_item.load_item()
chapter_api_url = comic_item.get_exec(data, str_exec+"chapterAPIPath")
if chapter_api_url is not None:
item: ComicLoader = Conf().parse_chapter(item=comic_item, value=data)
comic = item.load_item()
chapter_api_url = item.get_chapter_api()
if chapter_api_url is not None and len(chapter_api_url) != 0 :
try:
yield scrapy.Request(self.main_url + chapter_api_url, meta={'item': comic}, callback=self.parse_chapter_api)
except:
logging.warning(f"yield scrapy.Request({self.main_url} + {chapter_api_url}, meta={comic}, callback=self.parse_chapter_api)")
else:
yield comic
# 加密数据API处理
def parse_chapter_api(self, response):
comic_item = ComicLoader(item=response.meta['item'], response=response)
comic_item.chapter(value=response.text, sexec='chapter.name')
comic_item.image_urls(value=response.text, sexec='chapter.images')
comic_item.images(value=response.text, sexec='chapter.images')
yield comic_item.load_item()
item: ComicLoader = Conf().parse_chapter(item=comic_item, value=response.text)
yield item.load_item()
def parse(self, response):

View File

@ -0,0 +1,41 @@
data:
name: '//div[@class="col"]/h5/text()'
icon: '//img[@class="img-thumbnail"]/@src'
author:
xpath: '//div[contains(@class,"bookid_bookInfo")]/p[1]/text()'
index: 1
tags: '//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()'
dep:
xpath: '//div[contains(@class,"bookid_bookInfo")]/p[4]/text()'
index: 1
date:
xpath: '//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()'
index: 1
genre:
value: "韩漫"
age_rating:
value: "R18+"
chapter_href: '//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/@href'
chapters: '//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()'
parse_chapter:
name:
sexec: props.pageProps.bookName
dep:
sexec: props.pageProps.description
chapter:
sexec: props.pageProps.chapterName
image_urls:
sexec: props.pageProps.images
images:
sexec: props.pageProps.images
chapter_api:
sexec: props.pageProps.chapterAPIPath
parse_chapter_api:
chapter:
sexec: chapter.name
image_urls:
sexec: chapter.images
images:
sexec: chapter.images

View File

@ -1,7 +1,7 @@
import scrapy,logging,time,os
from Comics.items import ComicItem
from Comics.loader import ComicLoader
from Comics.utils.Constant import ComicPath
from Comics.utils import ComicPath
from Comics.settings import PROJECT_KEY
import skip

View File

@ -1,74 +1,166 @@
import base64,hashlib,os,shutil
import base64,hashlib,os,shutil,os.path
import math,time,json,datetime,logging
import re,requests,time,xmlschema
from datetime import date
from Comics import settings
from opencc import OpenCC
from PIL import Image
from Comics.utils.Constant import ComicPath
from pathlib import Path
from zipfile import ZipFile
from Comics.settings import COMIC_INFO_XML_FILE,CBZ_EXPORT_PATH,IMAGES_STORE
from Comics.utils.Constant import ntfy
from Comics.settings import COMIC_INFO_XML_FILE,OUTPUT_DIR,PROJECT_KEY
import yaml
from Comics.loader import ComicLoader
# 配置类
class Conf():
# 读取yml文件配置
# @project 根据工程名读取配置 project.yml
# @key 读取key内的字典的数据(默认为空)
#def init(self, project, key=None):
# data = None
# if project == None: project = "config"
# with open(os.path.join("Comics","spiders", project)+".yml") as f:
# data = yaml.load(f, Loader=yaml.FullLoader)
# if key != None and data != None:
# return data[key]
def get_config_value(self, project, key=None):
# 使用Path类来处理文件路径
config_path = Path(os.path.join("Comics","spiders", project)+".yml")
#Path("Comics") / "spiders" / project / (project + ".yml")
# 检查项目是否存在
if not config_path.is_file():
return None
# 打开文件并加载配置数据
try:
with config_path.open('r') as f:
data = yaml.safe_load(f)
except yaml.YAMLError as e:
print(f"Error loading YAML file: {e}")
return None
# 检查key是否存在
if key is not None and key in data:
return data[key]
else:
return None
# 根据读取的配置数据导入到ComicLoader中
def comic(self, project, item: ComicLoader, child_data='data', val=None):
item.project_name(project)
data = self.get_config_value(project, child_data)
for key, xpath_data in data.items():
if isinstance(xpath_data, str): xpath_data = {'xpath': xpath_data}
xpath = xpath_data.get('xpath', None)
index = xpath_data.get('index', None)
value = xpath_data.get('value', None) if val is None else val
sexec = xpath_data.get('sexec', None)
item.set_properties(name=key, value=value, xpath=xpath, index=index, sexec=sexec)
return item
def parse_chapter(self,item: ComicLoader, value):
return self.comic(item.get_project_name(), item, "parse_chapter", value)
# 文件操作类
class fileUtils:
# 文件是否存在
@classmethod
def exists(cls, path): return os.path.exists(path)
# 文件路径拼接
@classmethod
def join(cls, path, *paths): return os.path.join(path, *paths);
# 文件夹名
@classmethod
def dirname(cls, path): return os.path.dirname(path);
# 文件名
@classmethod
def basename(cls, path): return os.path.basename(path);
# 保存文件
@classmethod
def save_file(cls,path,data):
root_dir = os.path.dirname(path)
if not os.path.exists(root_dir):
os.makedirs(root_dir)
if not os.path.exists(root_dir): os.makedirs(root_dir)
with open(path,'w',encoding='utf-8') as fs:
fs.write(str(data))
fs.close()
# 返回校验后的文件路径
@classmethod
def path(cls, file):
base_dir = os.path.dirname(file)
if not os.path.exists(base_dir): os.makedirs(base_dir)
return file
# 比较文件大小
@classmethod
def compare_size(cls, dst, file):
if os.path.exists(dst) and os.path.exists(file):
if cls.exists(dst) and cls.exists(file):
return os.stat(dst).st_size == os.stat(file).st_size
else:
return 0
return None
# 读取文件
@classmethod
def read(cls, file):
if os.path.exists(file):
with open(file, "r", encoding="utf-8") as fs: return fs.read()
else:
return []
"""
图像编号 image-1.jpg
存在image.png 返回 image-1.png 反之 image.png
"""
@classmethod
def file_check(cls, file, result="file"):
temp_file_name = file
count = 1
files_size = []
name, suffix = temp_file_name.split(".")
while count:
if os.path.exists(temp_file_name):
files_size.append(os.stat(temp_file_name).st_size)
temp_file_name = name+"-"+str(count)+"."+suffix
def file_check(cls, file, result="file", count=0):
temp_file_name, files_size, files_name = [file, {}, []]
# 默认文件名不存在
if not cls.exists(temp_file_name) and temp_file_name == file: count = 1
while count or count == 0:
temp_file_name = ComicPath().images_icon(file=file, count=count)
if cls.exists(temp_file_name):
# 保存存在的文件名
files_name.append(temp_file_name)
file_size = os.path.getsize(temp_file_name)
# 保存文件名和大小数据
files_size[file_size] = {"name": temp_file_name, "size": file_size}
# 格式化文件名
# temp_file_name = ComicPath().images_icon(file=file, count=count)
count += 1
else:
# 检测是否有重复数据
# 提取重复并需删除的文件名
diff_names = {value["name"] for value in files_size.values()}
# 不存在则返回原文件名
if len(diff_names) == 0: return file
for file_name in files_name:
if file_name not in diff_names:
logging.info(f"删除文件:{file_name}")
os.remove(file_name)
# 判断是否存在初始文件和多个文件名
if file in diff_names:
move_file = ComicPath().images_icon(file=file, count=count)
logging.info(f"移动文件{file}{move_file}")
shutil.move(file, move_file)
cls.file_check(file=file,result=result,count=0)
# 去重后文件名数与存在的文件名数不存在则证明文件存在重复,重新运行本方法
if len(set(diff_names)) != len(set(files_name)): cls.file_check(file, result=result,count=0)
if result == "size":
return files_size
return {value["size"] for value in files_size.values()}
else:
return temp_file_name
# 判断文件是否更新
@classmethod
def file_update(cls, old_file, new_file):
is_update = False
if os.path.exists(old_file):
is_update = os.stat(old_file).st_size not in cls.file_check(new_file, result="size")
if os.path.exists(old_file): is_update = os.path.getsize(old_file) not in cls.file_check(new_file, result="size")
return is_update
# 判断是否需要更新封面
@ -81,7 +173,7 @@ class fileUtils:
logging.info(f"update icon ... {image_path} ===> {cls.file_check(save_path)}")
shutil.copyfile(image_path, cls.file_check(save_path))
# 公共工具类
class CommonUtils:
@classmethod
def parseExec(cls,data,exec):
@ -92,6 +184,28 @@ class CommonUtils:
data = data.get(dot)
return data
@classmethod
def _validate_xml(cls,xml_file, xsd_file):
# 读取XSD文件
xsd = xmlschema.XMLSchema(xsd_file)
# 验证XML
is_valid = xsd.is_valid(xml_file)
if is_valid:
print("XML文件通过XSD验证成功")
else:
print("XML文件未通过XSD验证。以下是验证错误信息")
validation_errors = xsd.to_errors(xml_file)
for error in validation_errors:
print(error)
@classmethod
def validate_comicinfo_xml(cls, xml_file):
cls._validate_xml(xml_file, "ComicInfo.xsd")
# 图片处理类
class imageUtils:
@classmethod
@ -307,7 +421,7 @@ class imageUtils:
logging.debug(f"remove {img_path}")
return save_path
# 压缩工具类
class CBZUtils:
@classmethod
@ -419,3 +533,158 @@ class CBZUtils:
os.remove(zip_path)
logging.error(f"validating fail === {zip_path}")
return False
# 检测工具类
class checkUtils:
def read(self, item):
file = os.path.join(OUTPUT_DIR, ComicLoader(item=item).get_project_name(), "error_comics.json")
return fileUtils.read(file)
#
# 检测某一章节是否连续错误
def export_error(self, item):
if not self.is_error(item):
file = os.path.join(OUTPUT_DIR, ComicLoader(item=item).get_project_name(), "error_comics.json")
try:
error_comic = eval(self.read(item))
except:
error_comic = []
error_comic.append({ "name" : ComicPath.new_file_name(item['name']),
"chapter" : ComicPath.new_file_name(item['chapter']),
"date" : ComicPath().getYearMonthDay()})
fileUtils.save_file(file, json.dumps(error_comic))
def is_error(self, item):
try:
for error_c in eval(self.read(item)):
(name, chatper, date) = [error_c['name'], error_c['chapter'], error_c['date']]
if ComicPath.new_file_name(item['name']) == ComicPath.new_file_name(name) and ComicPath.new_file_name(item['chapter']) == ComicPath.new_file_name(chatper):
return True
else:
return False
except:
return False
# Comic路径类
class ComicPath:
PREFIX_SCRAMBLE = "scramble="
@classmethod
def getYearMonthDay(cls):
today = date.today()
# 格式化为年-月-日
return today.strftime("%Y%m%d")
@classmethod
def getDirComicChapter(cls, item, categorize=""):
comic = ComicLoader(item=item)
return os.path.join(OUTPUT_DIR, comic.get_project_name(), categorize, comic.get_name(), comic.get_chapter())
@classmethod
def getDirJosnComicChapter(cls, item):
return cls.getDirComicChapter(item=item, categorize="json")
@classmethod
def getFileScrambleImageName(cls,count,block,suffix=".jpg"): return cls.PREFIX_SCRAMBLE+str(block)+"_"+str(count)+suffix
@classmethod
def getFileScrambleImageSave(cls,file,relative=False, is_prefix=True):
file_name = str(file).split("_")[-1]
if relative:
file_name = os.path.basename(file_name)
if relative == "fullpath":
file_name = os.path.join(os.path.dirname(file), file_name)
if not is_prefix:
return file_name.split(".")[0]
else:
return file_name
#繁体中文转简体中文
@classmethod
def chinese_convert(cls, text,convert='t2s'): return OpenCC(convert).convert(str(text))
#处理成符合规定的文件名
@classmethod
def fix_file_name(cls, filename, replace=None):
if not isinstance(filename, str):
return filename
in_tab = r'[?*/\|.:><]'
str_replace = ""
if replace is not None:
str_replace = replace
filename = re.sub(in_tab, str_replace, filename)
count = 1
while True:
str_file = filename[0-count]
if str_file == " ":
count += 1
else:
filename = filename[0:len(filename)+1-count]
break
return filename
@classmethod
def new_file_name(cls, name): return cls.fix_file_name(cls.chinese_convert(name))
@classmethod
def get_file_path(cls, item, result_type="image", file=None, convert=False, chapter=None):
PROJECT = ComicLoader(item=item).get_project_name()
if not convert:
name = item['name']
if chapter == None: chapter = item['chapter']
else:
name = cls.fix_file_name(cls.chinese_convert(item['name']))
if chapter == None: chapter = cls.fix_file_name(cls.chinese_convert(item['chapter']))
if result_type == "image":
if os.path.sep not in file:
file = os.path.join(PROJECT, "images", name, chapter, file)
elif result_type == "comic_info":
file = os.path.join(PROJECT, "images", name, chapter)
elif result_type == "cbz_icon":
file = os.path.join(settings.CBZ_EXPORT_PATH, PROJECT, name, chapter+".jpg")
elif result_type == "down_icon":
file = os.path.join(settings.IMAGES_STORE, cls.get_file_path(item=item, result_type="icon"))
elif result_type == "down_cache_icon":
file = os.path.join(settings.IMAGES_STORE, cls.get_file_path(item=item, result_type="icon_cache"))
elif result_type == "icon":
file = os.path.join(PROJECT, "icons", name, name+".jpg")
elif result_type == "icon_cache":
file = os.path.join(PROJECT, "icons", ".cache", name+".jpg")
elif result_type == "cbz":
file = os.path.join(settings.CBZ_EXPORT_PATH, PROJECT, name, chapter+".CBZ")
elif result_type == "images_dir":
file = os.path.join(settings.IMAGES_STORE, PROJECT, "images", name, chapter)
else:
raise ValueError(f"Unsupported result_type: {result_type}")
return file
@classmethod
def path_cbz(cls, item):
return cls.get_file_path(item, result_type="cbz", convert=True)
@classmethod
def images_icon(cls, file, count):
if count == 0: return file
name, suffix = os.path.splitext(file)
return name+"-"+str(count)+suffix
# 通知类
class ntfy:
@classmethod
def sendMsg(cls, msg,alert=False,sleep=None,error=None):
try:
print(f"#ntfy: {msg}")
if alert:
requests.post("https://ntfy.caiwenxiu.cn/PyComic",
data=msg.encode(encoding='utf-8'))
except:
print(f"#ntfy error: {msg}")
if sleep != None:
logging.info(f'等待{sleep}秒后进入下一阶段')
time.sleep(int(sleep))
if error != None:
print(f"#ntfy Error: {error}")
return False
else:
return True

View File

@ -1,40 +0,0 @@
import os,json
from Comics.settings import CBZ_EXPORT_PATH,OUTPUT_DIR,PROJECT_KEY
from Comics.utils.Constant import ComicPath
from Comics.exporters import ComicInfoXmlItemExporter,JsonExport,ItemExporter, ItemImport
from Comics.utils.FileUtils import fileUtils as fu
from Comics.loader import ComicEntity
class checkUtils:
def read(self, item):
file = os.path.join(OUTPUT_DIR, item[PROJECT_KEY][0], "error_comics.json")
return ItemImport().import_obj(file)
#
# 检测某一章节是否连续错误
def export_error(self, item):
if not self.is_error(item):
file = os.path.join(OUTPUT_DIR, item[PROJECT_KEY][0], "error_comics.json")
try:
error_comic = eval(self.read(item))
except:
error_comic = []
error_comic.append({ "name" : ComicPath.new_file_name(item['name']),
"chapter" : ComicPath.new_file_name(item['chapter']),
"date" : ComicPath().getYearMonthDay()})
fu.save_file(file, json.dumps(error_comic))
def is_error(self, item):
try:
for error_c in eval(self.read(item)):
(name, chatper, date) = [error_c['name'], error_c['chapter'], error_c['date']]
if ComicPath.new_file_name(item['name']) == ComicPath.new_file_name(name) and ComicPath.new_file_name(item['chapter']) == ComicPath.new_file_name(chatper):
return True
else:
return False
except:
return False

View File

@ -1,114 +0,0 @@
import os.path,logging
import re,requests,time
from datetime import date
from Comics import settings
from opencc import OpenCC
class ComicPath:
PREFIX_SCRAMBLE = "scramble="
@classmethod
def getYearMonthDay(cls):
today = date.today()
# 格式化为年-月-日
return today.strftime("%Y%m%d")
@classmethod
def getDirComicChapter(cls):
return None
@classmethod
def getFileScrambleImageName(cls,count,block,suffix=".jpg"): return cls.PREFIX_SCRAMBLE+str(block)+"_"+str(count)+suffix
@classmethod
def getFileScrambleImageSave(cls,file,relative=False, is_prefix=True):
file_name = str(file).split("_")[-1]
if relative:
file_name = os.path.basename(file_name)
if relative == "fullpath":
file_name = os.path.join(os.path.dirname(file), file_name)
if not is_prefix:
return file_name.split(".")[0]
else:
return file_name
#繁体中文转简体中文
@classmethod
def chinese_convert(cls, text,convert='t2s'): return OpenCC(convert).convert(str(text))
#处理成符合规定的文件名
@classmethod
def fix_file_name(cls, filename, replace=None):
if not isinstance(filename, str):
return filename
in_tab = r'[?*/\|.:><]'
str_replace = ""
if replace is not None:
str_replace = replace
filename = re.sub(in_tab, str_replace, filename)
count = 1
while True:
str_file = filename[0-count]
if str_file == " ":
count += 1
else:
filename = filename[0:len(filename)+1-count]
break
return filename
@classmethod
def new_file_name(cls, name): return cls.fix_file_name(cls.chinese_convert(name))
@classmethod
def get_file_path(cls, item, result_type="image", file=None, convert=False):
PROJECT = item[settings.PROJECT_KEY][0]
if not convert:
name = item['name']
chapter = item['chapter']
else:
name = cls.fix_file_name(cls.chinese_convert(item['name']))
chapter = cls.fix_file_name(cls.chinese_convert(item['chapter']))
if result_type == "image":
if os.path.sep not in file:
file = os.path.join(PROJECT, "images", name, chapter, file)
elif result_type == "comic_info":
file = os.path.join(PROJECT, "images", name, chapter)
elif result_type == "cbz_icon":
file = os.path.join(settings.CBZ_EXPORT_PATH, PROJECT, name, chapter+".jpg")
elif result_type == "down_icon":
file = os.path.join(settings.IMAGES_STORE, cls.get_file_path(item=item, result_type="icon"))
elif result_type == "down_cache_icon":
file = os.path.join(settings.IMAGES_STORE, cls.get_file_path(item=item, result_type="icon_cache"))
elif result_type == "icon":
file = os.path.join(PROJECT, "icons", name, name+".jpg")
elif result_type == "icon_cache":
file = os.path.join(PROJECT, "icons", ".cache", name+".jpg")
elif result_type == "cbz":
file = os.path.join(settings.CBZ_EXPORT_PATH, PROJECT, name, chapter+".CBZ")
elif result_type == "images_dir":
file = os.path.join(settings.IMAGES_STORE, PROJECT, "images", name, chapter)
return file
@classmethod
def path_cbz(cls, item):
return cls.get_file_path(item, result_type="cbz", convert=True)
class ntfy:
@classmethod
def sendMsg(cls, msg,alert=False,sleep=None,error=None):
try:
print(f"#ntfy: {msg}")
if alert:
requests.post("https://ntfy.caiwenxiu.cn/PyComic",
data=msg.encode(encoding='utf-8'))
except:
print(f"#ntfy error: {msg}")
if sleep != None:
logging.info(f'等待{sleep}秒后进入下一阶段')
time.sleep(int(sleep))
if error != None:
print(f"#ntfy Error: {error}")
return False
else:
return True