This commit is contained in:
caiwx86 2023-04-06 15:34:14 +08:00
parent a8f44ec334
commit 6ca7ce1992
11 changed files with 205 additions and 141 deletions

2
.gitignore vendored
View File

@ -1,4 +1,4 @@
COMICOUT/
.conf/
**/__pycache__/**
.vscode
logconf.log

View File

@ -91,7 +91,13 @@ class baseComic:
cls.Onechapter(chapter_url,scramble)
#进入下个阶段
#章节图片全部下载后,调用下载封面
if ciUtils.isProgressDown: downloadUtils.download_comic_icon()
comic_icon_path = ComicPath.getPathConfComicIcon()
chapter_icon_path = ComicPath.getPathCBZComicChapterIcon()
if ciUtils.isProgressDown and os.path.exists(comic_icon_path) and not os.path.exists(chapter_icon_path):
shutil.copy(comic_icon_path,chapter_icon_path)
ntfy.sendMsg(f"复制成功:{comic_icon_path} -> {chapter_icon_path}")
ciUtils.iconDB()
ciUtils.setProgressCBZ()
#下个阶段
if ciUtils.isProgressCBZ: is_next = CBZUtils.packAutoComicChapterCBZ()
#except Exception as e: is_next = ntfy.sendMsg(f"{ci.getComicName()} 下载出错了",error=e)
@ -126,22 +132,19 @@ class baseComic:
except:
htmlUtils.remove_HtmlCache(url)
cls.comicChapterDownload(url)
(list_img,files_name) = [Comic.getChapterImgs(),Comic.file_chapter_imgs]
#保存信息
ci.writeJson()
#验证数据是已存在且是否完整
is_next = CBZUtils.nextCBZ()
is_old=CBZUtils.updateOldCBZ(files_name)
is_old=CBZUtils.updateOldCBZ()
#不存在ComicInfo.xml则生成
if is_next and fu.notExists(ComicPath.getPathComicInfoXML()): ci.writeComicInfoXML()
if is_next and not is_old:
ComicPath.TIME_SLEEP = random.randint(8,15)
downloadUtils.download_images(list_img,ComicPath.getDirComicChapter(),files_name=files_name,concurrency=5,timeout=10)
# ntfy.sendMsg("等待数据检验中...",sleep=0.5)
is_next = fu.equImages(ComicPath.getDirComicChapter(),list_img)
# if not is_next: ntfy.sendMsg(msg=f"下载数据(不完整,{int(repeat*2)}秒钟后尝试第{repeat}次",sleep=int(repeat*2))
# repeat += 1
else:
if not is_next and is_old:
ComicPath.TIME_SLEEP = 0.5
downloadUtils.queueDownClear()
else:
ComicPath.TIME_SLEEP = random.randint(8,15)
#图标
downloadUtils.putDownImageUrlDirFile(Comic.getIcon(),ComicPath.getDirConfComic(),ComicPath.COMIC_ICON_FILE_NAME)
downloadUtils.start_downloads()
return is_next

View File

@ -5,30 +5,30 @@ from utils.OldUtils import OldUtils
class Comic:
# ComicInfo.xml and ComicChapter.json bengin
# value origin node dep
dict_chapter = [None,None,"Title","章节名"]
dict_comic_name = [None,None,"Series","漫画名"]
dict_number = [None,None,"Number","编号"]
dict_comic_names = [None,None,"SeriesGroup","别名"]
dict_dep = [None,None,"Summary","概述"]
dict_year = [None,None,"Year",""]
dict_month = [None,None,"Month",""]
dict_day = [None,None,"Day",""]
dict_author = [None,None,"Writer","作者"]
dict_cbs = [None,None,"Publisher","出版社"]
dict_genre = [None,None,"Genre","流派"]
dict_tags = [None,None,"Tags","标签"]
dict_homepage = [None,None,"Web","主页"]
dict_page_count = [None,None,"PageCount","总页数"]
dict_language = [None,None,"LanguageISO","语言"]
dict_agerating = [None,None,"AgeRating","年龄分级"]
dict_pages = [None,None,"Pages","页码"]
# value origin node dep required
dict_chapter = [None,None,"Title","章节名",True]
dict_comic_name = [None,None,"Series","漫画名",True]
dict_number = [None,None,"Number","编号",True]
dict_comic_names = [None,None,"SeriesGroup","别名",False]
dict_dep = [None,None,"Summary","概述",True]
dict_year = [None,None,"Year","",False]
dict_month = [None,None,"Month","",False]
dict_day = [None,None,"Day","",False]
dict_author = [None,None,"Writer","作者",True]
dict_cbs = [None,None,"Publisher","出版社",False]
dict_genre = [None,None,"Genre","流派",True]
dict_tags = [None,None,"Tags","标签",True]
dict_homepage = [None,None,"Web","主页",False]
dict_page_count = [None,None,"PageCount","总页数",True]
dict_language = [None,None,"LanguageISO","语言",True]
dict_agerating = [None,None,"AgeRating","年龄分级",False]
dict_pages = [None,None,"Pages","页码",True]
CURRENT_DOWN_LINK = None
# ComicInfo.xml and ComicChapter.json end
dict_icon = [None,None,"Icon","图标"]
dict_chapter_imgs = [None,None,"ChapterImgs","图像"]
dict_icon = [None,None,"Icon","图标",True]
dict_chapter_imgs = [None,None,"ChapterImgs","图像",True]
#主页
dict_list_chapter = [None,None,"ListChapter","全部章节名"]
dict_list_chapter = [None,None,"ListChapter","全部章节名",True]
(update_at,current_chapter_img,file_chapter_imgs) = [None,None,None]

View File

@ -3,6 +3,7 @@ from xml.dom.minidom import Document
from utils.FileUtils import dbUtils as db
from common.Comic import Comic
from common.Constant import ComicPath
from utils.Logger import logger
class ComicInfoEntity:
@classmethod
@ -48,7 +49,8 @@ class ComicInfo:
#页数
@classmethod
def setPages(cls,values):
def setPages(cls,values=None):
if values == None: values = Comic.getChapterFilesName()
if values != None and isinstance(values,list):
suffix = "."+str(values[0]).split(".")[-1]
join_list=",".join(values).replace(suffix,"")
@ -86,10 +88,20 @@ class ComicInfo:
if len(list_value) == 0: return list_value
for value in list_value:
#Comic.chapter
if value[0] == None and value[4]:
#数据为空 value[0] 但不允许为空value[4] = False
msg = f"#数据为空 key={value[3]} value[0]={value[0]} 但不允许为空value[4]={value[4]}"
logger.error(msg)
exit()
if value[0] != None: root.appendChild(cls.setNodeAndValue(value[2],value[0]))
@classmethod
def initComicInfoXML(cls):
cls.setPages()
@classmethod
def writeComicInfoXML(cls,overlay=False):
cls.initComicInfoXML()
root = cls.root_node("ComicInfo")
new_document = Document()
new_document.appendChild(root)
@ -98,7 +110,7 @@ class ComicInfo:
with open(save_path, "w", encoding="utf-8") as fo:
new_document.writexml(fo, indent='', addindent='\t', newl='\n', encoding="utf-8")
fo.close()
print(f"已生成文件... {save_path}")
logger.info(f"已生成文件... {save_path}")
@classmethod
def setComicInfo(cls,comicname=None,homepage=None,alias=None,author=None,icon=None,tags=None,
@ -131,7 +143,8 @@ class ComicInfo:
if key != None and isinstance(value,str):
child_dict = { key : value}
dict_data.update(child_dict)
s = json.dumps(dict_data,ensure_ascii=False)
s = json.dumps(dict_data,ensure_ascii=True)
logger.info(f"json={s}")
with open(ComicPath.getPathConfComicChapterJson(mkdir=True),"w") as fs:
fs.write(s)

View File

@ -76,6 +76,8 @@ class pathStr:
class ComicPath:
COMIC_ICON_NAME = "000"
COMIC_ICON_SUFFIX = "jpg"
COMIC_ICON_FILE_NAME = COMIC_ICON_NAME+"."+COMIC_ICON_SUFFIX
DEFAULT_PATH = "default"
PROGRESS_PATH = "progress"
ICONS_PATH = "icons"
@ -115,7 +117,7 @@ class ComicPath:
def getPathConfComicChapterJson(cls,mkdir=True): return cls.setDirConf([Comic.getOriginComicName(),Comic.getOriginChapterName()],suffix="json",mkdir=mkdir)
#漫画Icon路径
@classmethod
def getPathConfComicIcon(cls,mkdir=True,suffix="jpg"): return cls.setDirConf([Comic.getOriginComicName(),cls.COMIC_ICON_NAME],suffix=suffix,mkdir=mkdir)
def getPathConfComicIcon(cls,mkdir=True): return cls.setDirConf([Comic.getOriginComicName(),cls.COMIC_ICON_NAME],suffix=cls.COMIC_ICON_SUFFIX,mkdir=mkdir)
#Icons json文件路径
@classmethod
def getDirConfDefault(cls,path=None,suffix=None,mkdir=True): return cls.setDirConf([cls.DEFAULT_PATH,path],suffix=suffix,mkdir=mkdir)
@ -130,7 +132,7 @@ class ComicPath:
def getDirCBZComicChapter(cls,suffix=None): return cls.setDirCBZ([Comic.getComicName(),Comic.getChapterName()],suffix=suffix)
#漫画章节CBZ同根下封面
@classmethod
def getPathCBZComicChapterIcon(cls,suffix="jpg"): return cls.setDirCBZ([Comic.getComicName(),Comic.getChapterName()],suffix=suffix)
def getPathCBZComicChapterIcon(cls,suffix="jpg",mkdir=True): return cls.setDirCBZ([Comic.getComicName(),Comic.getChapterName()],suffix=suffix,mkdir=mkdir)
#旧版本漫画章节CBZ路径
@classmethod
def getPathOldCBZComicChapter(cls): return cls.setDirOld([Comic.getOriginComicName(),Comic.getOriginChapterName()],suffix="CBZ")

View File

@ -6,6 +6,7 @@ from common.Constant import ComicPath
from utils.FileUtils import imageUtils
from utils.NetUtils import htmlUtils
from utils.ComicUtils import ntfy
from utils.NetUtils import downloadUtils as downUtils
class DomainDown:
@classmethod
@ -30,16 +31,19 @@ class DomainDown:
ntfy.sendMsg(f"未获取到章节图像 comic_name={Comic.getComicName()} chapter={Comic.getChapterName()}")
count = 1
list_img,list_file_name = [[],[]]
(files_name,images_url) = [[],[]]
for image in images:
(image_src,scramble) = [image.get("src"),image.get("scramble")]
count_image = "{:0>3d}".format(count)
list_img.append(image_src)
image_src_suffix = "."+str(image_src).split(".")[-1]
if scramble:
de_str = str(image_src).split("/")[-1].replace(image_src_suffix,"==")
blocks_num = imageUtils.encodeImage(de_str)
count_image = "scramble="+str(blocks_num)+"_"+count_image
list_file_name.append(count_image+image_src_suffix)
image_file_name = count_image+image_src_suffix
files_name.append(image_file_name)
images_url.append(image_src)
downUtils.putDownImageUrlDirFile(image_src,ComicPath.getDirComicChapter(),image_file_name)
count+=1
ciUtils.comicChapterDownload(list_img,list_file_name)
Comic.setChapterImgs(images_url)
Comic.setChapterFilesName(files_name)

45
logconf.yml Normal file
View File

@ -0,0 +1,45 @@
version: 1
#是否覆盖掉已经存在的loggers
disable_existing_loggers: True
formatters:
tostrout:
format: "%(asctime)s - %(name)s - %(levelname)-9s - %(filename)-8s : %(lineno)s line - %(message)s"
datefmt: "%Y/%m/%d %H:%M:%S"
tofile:
format: "%(asctime)s - %(name)s - %(levelname)-9s - %(filename)-8s : %(lineno)s line - %(message)s"
handlers:
sh:
class: logging.StreamHandler
level: DEBUG
formatter: tostrout
stream: ext://sys.stdout
fh:
class: logging.handlers.TimedRotatingFileHandler
filename: logconf.log
interval: 1
backupCount: 2
when: D
level: INFO
formatter: tofile
loggers:
logger1:
level: DEBUG
handlers: [sh]
#是否往上级Logger传递如果为yes的话root选择了两个logger这里的日志也会在两个logger的配置中输出会重复。所以选No,自己记录自己的日志。
propagate: no
logger2:
level: INFO
handlers: [fh]
propagate: no
root:
level: DEBUG
handlers: [sh,fh]
propagate: no

View File

@ -2,13 +2,12 @@ import os,shutil,time,requests
from datetime import datetime
from pathlib import Path
from zipfile import ZipFile
from queue import Queue
from common.ComicInfo import ComicInfoUtils as ciUtils
from common.ComicInfo import ComicInfo as ci
from common.ComicInfo import Comic
from utils.FileUtils import fileUtils as fu
from common.Constant import pathStr
from common.Constant import ComicPath
from utils.OldUtils import OldUtils
class ntfy:
@classmethod
@ -121,7 +120,7 @@ class CBZUtils:
with ZipFile(path, "r") as zip_file:
result = zip_file.namelist()
if filter:
filter_icon = ComicPath.COMIC_ICON_NAME+".jpg"
filter_icon = ComicPath.COMIC_ICON_FILE_NAME
filter_info_xml = ComicPath.COMIC_INFO_XML
if filter_icon in result: result.remove(filter_icon)
if filter_info_xml in result: result.remove(filter_info_xml)
@ -155,7 +154,8 @@ class CBZUtils:
return False
@classmethod
def updateOldCBZ(cls,filesname,result=False):
def updateOldCBZ(cls,filesname=None,result=False):
if filesname == None: filesname = Comic.getChapterFilesName()
old_zipfile_path = ComicPath.getPathOldCBZComicChapter()
#判断是否存在已下载CBZ文件

View File

@ -3,7 +3,7 @@ import math,time,json
from PIL import Image
from tinydb import TinyDB, Query
from common.Constant import ComicPath
from common.Comic import Comic
from utils.Logger import logger
class imageUtils:
@ -226,15 +226,15 @@ class fileUtils:
img = Image.open(file_path)
img.verify()
img.close()
print(f"{file_path} 类型为type:{type} size: {file_size_unit}")
logger.debug(f"{file_path} 类型为type:{type} size: {file_size_unit}")
return True
except:
if os.path.exists(file_path):
os.remove(file_path)
print(f"{file_path}已损坏 type:{type} size: {file_size_unit},删除重试中")
logger.warning(f"{file_path}已损坏 type:{type} size: {file_size_unit},删除重试中")
return False
else:
print(f"文件小于100b删除中,{file_path} size: {file_size_unit}")
logger.warning(f"文件小于100b删除中,{file_path} size: {file_size_unit}")
try:
os.remove(file_path)
except Exception as e:
@ -263,15 +263,15 @@ class fileUtils:
if not os.path.exists(path):
return None
try:
print(f"{path} 删除中...")
logger.debug(f"{path} 删除中...")
if os.path.isfile(path):
os.remove(path)
if os.path.isdir(path):
shutil.rmtree(path)
print(f"已删除:{path}")
logger.debug(f"已删除:{path}")
return True
except:
print(f"删除错误:{path}")
logger.warning(f"删除错误:{path}")
return False
#文件保存
@ -298,7 +298,7 @@ class fileUtils:
result = path + "文件写入成功"
except:
result = path + "文件写入失败"
print(result)
logger.debug(result)
return result
class dbUtils:
@ -317,7 +317,7 @@ class dbUtils:
else: db.update({"progress":progress},comic.name== name)
msg = "失败"
if cls.query(name,progress,db_name): msg = "成功"
print(f"设置{msg}, name={name} value={progress} db={db_name}")
logger.info(f"设置{msg}, name={name} value={progress} db={db_name}")
@classmethod
def query(cls,name,progress=None,db_name=None):

18
utils/Logger.py Normal file
View File

@ -0,0 +1,18 @@
# -*- coding:utf-8 -*-
import logging
import logging.config
import yaml
class logger:
with open("logconf.yml", "r",encoding="utf-8") as f:
dict_conf = yaml.safe_load(f)
logging.config.dictConfig(dict_conf)
root = logging.getLogger()
@classmethod
def debug(cls,value): cls.root.debug(value)
@classmethod
def info(cls,value): cls.root.info(value)
@classmethod
def warning(cls,value): cls.root.warning(value)
@classmethod
def error(cls,value): cls.root.error(value)

View File

@ -12,7 +12,7 @@ from common.ComicInfo import ComicInfo as ci
from common.ComicInfo import Comic
from common.Constant import ComicPath
from utils.FileUtils import fileUtils as fu
from utils.Logger import logger
class htmlUtils:
@ -134,9 +134,9 @@ class htmlUtils:
if os.path.exists(html_cache_path):
try:
os.remove(html_cache_path)
print(f"html_cache更新成功 {html_cache_path}")
logger.info(f"html_cache更新成功 {html_cache_path}")
except:
print(f"html_cache更新失败 {html_cache_path}")
logger.info(f"html_cache更新失败 {html_cache_path}")
#获取html实体数据
et = cls.getHTML(url)
if et == None:
@ -156,6 +156,9 @@ class htmlUtils:
return result
class downloadUtils:
QUEUE_DOWN = Queue()
TYPE_IMG = "image"
TYPE_ICON = "icon"
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Proxy-Connection": "keep-alive",
@ -164,56 +167,66 @@ class downloadUtils:
"Accept-Encoding": "gzip, deflate, sdch",
# 'Connection': 'close',
}
down_queue = Queue()
@classmethod
def common_download(cls,file_name,image_url,dst_dir,timeout=10,proxy=None,proxy_type=None):
def queueDownClear(cls): cls.QUEUE_DOWN = Queue()
@classmethod
def putDownUrlDirFileType(cls,url,dir,file,type):
cls.QUEUE_DOWN.put([url,dir,file,type])
@classmethod
def getDownUrlDirFileType(cls):
if not cls.QUEUE_DOWN.empty(): return cls.QUEUE_DOWN.get(False)
else: return None
@classmethod
def putDownImageUrlDirFile(cls,url,dir,file): cls.putDownUrlDirFileType(url,dir,file,cls.TYPE_IMG)
@classmethod
def common_download(cls,repair_max=10,timeout=10,proxy=None,proxy_type=None):
result = cls.getDownUrlDirFileType()
if result == None: return None
(file_url,dir,file,file_type) = [result[0],result[1],result[2],result[3]]
if file_url == None:
logger.error("common_down file_url 为空")
raise NameError("common_down file_url为空")
proxies = None
if proxy_type is not None:
proxies = {
"http": proxy_type + "://" + proxy,
"https": proxy_type + "://" + proxy }
response = None
file_path = os.path.join(dst_dir, file_name)
if os.path.exists(file_path):
print("download_image 文件已存在,已跳过=",file_path)
return None
temp_path = os.path.join(dst_dir, file_name+".downloads")
save_path = os.path.join(dir,file)
logger.debug(f"save_path {save_path}")
if not os.path.exists(dir): os.makedirs(dir)
temp_path = save_path+".downloads"
repair_count = 1
while not os.path.exists(save_path):
try:
response = requests.get(
image_url, headers=cls.headers, timeout=timeout, proxies=proxies)
if response.status_code != 200 and repair_count <= 5:
file_url, headers=cls.headers, timeout=timeout, proxies=proxies)
if response.status_code != 200 and repair_count <= repair_max:
logger.warning("下载异常")
raise NameError("下载异常")
with open(temp_path, 'wb') as f:
f.write(response.content)
time.sleep(0.7)
response.close()
#验证是否是图像
if fu.ver_file(temp_path,type="image"):
shutil.move(temp_path, file_path)
print("## OK: {} {}".format(file_path, image_url))
if fu.ver_file(temp_path,type=file_type):
shutil.move(temp_path, save_path)
logger.info("## OK: {} {}".format(save_path, file_url))
else:
raise NameError("## Fail: {} {}".format(image_url, "图像损坏"))
logger.warning("## Fail: {} {}".format(file_url, "图像损坏"))
raise NameError("## Fail: {} {}".format(file_url, "图像损坏"))
except Exception as e:
print(f"common_down() 出错了 {e}")
print(f'重试:第{repair_count}{image_url}')
cls.down_queue.put([file_name,image_url,dst_dir])
logger.warning(f'重试:第{repair_count}次 异常:{e} {file_url}')
cls.putDownUrlDirFileType(file_url,dir,file,file_type)
repair_count += 1
@classmethod
def download_image(cls,timeout=20, proxy_type=None, proxy=None,type="image"):
repeat = 1
while not cls.down_queue.empty() and repeat <= 10:
data = cls.down_queue.get(False)
(file_name,image_url,dst_dir) = [data[0],data[1],data[2]]
cls.common_download(file_name,image_url,dst_dir,timeout=timeout,
proxy=proxy,proxy_type=proxy_type)
repeat += 1
@classmethod
def download_images(cls,image_urls, dst_dir,concurrency=None,timeout=20,proxy_type=None, proxy=None,files_name=None):
def start_downloads(cls,repair_max=20,concurrency=None,timeout=20,proxy_type=None, proxy=None):
"""
Download image according to given urls and automatically rename them in order.
:param timeout:
@ -226,46 +239,12 @@ class downloadUtils:
:return: none
"""
if concurrency == None:
concurrency = len(image_urls)
concurrency = cls.QUEUE_DOWN.qsize()
logger.debug(f"concurrency= {concurrency}")
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
future_list = list()
count = 0
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
for image_url in image_urls:
file_name = files_name[count]
cls.down_queue.put([file_name,image_url,dst_dir])
while not cls.QUEUE_DOWN.empty():
logger.debug("QUEUE_DOWN 不为空 准备下载中...")
future_list.append(executor.submit(
cls.download_image,timeout, proxy_type, proxy))
count += 1
cls.common_download,timeout, proxy_type, proxy))
concurrent.futures.wait(future_list, timeout)
@classmethod
def download_comic_icon(cls,is_new=ciUtils.IS_NEW_ICON):
icon_url = Comic.getIcon()
if icon_url == None:
print("icon 不存在,已跳过")
return None
icon_suffix = str(icon_url).split(".")[-1]
#判断漫画名路径是否已存在comicname/cover.jpg, 存在跳过
path_comic_icon = ComicPath.getPathConfComicIcon(suffix=icon_suffix)
if not ciUtils.equIcon() and fu.exists(path_comic_icon): os.remove(path_comic_icon)
while fu.notExists(path_comic_icon):
cls.down_queue.put([ComicPath.COMIC_ICON_NAME+"."+icon_suffix,icon_url,ComicPath.getDirConfComic()])
cls.download_image()
save_path = ComicPath.getPathCBZComicChapterIcon(icon_suffix)
if is_new:
#历史版本ICON
if os.path.exists(save_path): os.remove(save_path)
if os.path.exists(path_comic_icon):
base_dir = ComicPath.getDirComicChapter()
if not os.path.exists(base_dir): os.makedirs(base_dir)
shutil.copy(path_comic_icon,os.path.join(base_dir,ComicPath.COMIC_ICON_NAME+icon_suffix))
else:
if fu.notExists(ComicPath.getDirCBZComic()): os.makedirs(ComicPath.getDirCBZComic())
if fu.notExists(save_path):
shutil.copy(path_comic_icon,save_path)
print(f"{path_comic_icon} 已复制至: {save_path}")
#保存icon信息
ciUtils.iconDB()
ciUtils.setProgressCBZ()