This commit is contained in:
caiwx86 2023-03-30 02:01:06 +08:00
parent eaf15969f0
commit 25be464102
8 changed files with 119 additions and 121 deletions

Binary file not shown.

View File

@ -5,33 +5,24 @@ from utils.downloader import download_images
from utils.base.BaseComicEntity import baseComic
class comicEntity:
@classmethod
def baseComicData(cls,url,update=False):
data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url,update=update)
data = json.loads(data[0])
data = data.get("props")
x = data.get("pageProps")
return x
@classmethod
def downladsComcis(cls,url):
#漫画名
x = cls.baseComicData(url,True)
books = x.get("books")
len_books = len(books)
base_url = comicInfo.getBaseUrl(url)
for x in range(0, len_books):
book = books[x]
book_id = book.get("id")
book_name = book.get("name")
updated = book.get("updatedAt")
comic_href = base_url+"/books/"+book_id
comicInfo.setUpdateAt(updated)
comicInfo.setComicName(book_name)
cls.oneComic(baseComic.comics(book_name=book_name,comic_href=comic_href,updated=updated),date=updated)
@classmethod
def oneComic(cls,c_url,sleep=None,date=None):
def booksJson(cls,url,update=False):
data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url,update=update)
return json.loads(data[0]).get("props").get("pageProps").get("books")
@classmethod
def downladsComcis(cls,url):
#漫画名
books = cls.booksJson(url,True)
for x in range(0, len(books)):
comicInfo.setComicInfo(comicname=books[x].get("name"),
current_chapter_img=comicInfo.getBaseUrl(url)+"/books/"+books[x].get("id"),
update_at=books[x].get("updatedAt"))
cls.oneComic(baseComic.updateComics())
@classmethod
def oneComic(cls,c_url,sleep=None,date=comicInfo.getUpdateAt()):
if c_url == None: return None
#漫画名
title = htmlUtils.xpathData('//div[@class="col"]/h5/text()',url=c_url,num=0,update=True)

View File

@ -5,7 +5,7 @@ from pathlib import Path
from zipfile import ZipFile
from utils.comic.ComicInfo import comicInfo
from utils.Ntfy import ntfy
from utils.FileUtils import fileUtils
from utils.FileUtils import fileUtils as fu
from utils.comic.PathStr import pathStr
class CBZUtils:
@ -14,7 +14,7 @@ class CBZUtils:
def getCBZ_Dir(cls): return comicInfo.getNewCBZComicChapter()
@classmethod
def getCBZ_Path(cls): return cls.getCBZ_Dir()+".CBZ"
def getCBZ_Path(cls): return comicInfo.getNewFileCBZComicChapter()
@classmethod
def readDirsOrFiles(cls,dir,type):
@ -62,7 +62,7 @@ class CBZUtils:
return False
cls.zip_compression(comicInfo.getDirComicChapter(), cls.getCBZ_Path())
time.sleep(0.1)
fileUtils.remove(comicInfo.getDirComicChapter())
fu.remove(comicInfo.getDirComicChapter())
return True
@classmethod
@ -82,14 +82,7 @@ class CBZUtils:
print(f"已更新文件时间 {filepath}")
if os.path.exists(unpack_dir):
shutil.rmtree(unpack_dir)
# 删除删除main.ftl文件
#delete_filename = ''
#if os.path.exists(delete_filename):
# os.remove(delete_filename)
# time.sleep(60)
# shutil.copy(文件的路径,另一个目录);拷贝main.ftl到准备压缩的目录下
#cls.zip_compression()
#小于则运行
@classmethod
def compareFileDate(cls,filepath):
if os.path.exists(filepath):
@ -110,57 +103,53 @@ class CBZUtils:
with ZipFile(path, "r") as zip_file:
result = zip_file.namelist()
if filter:
result.remove(comicInfo.COMIC_ICON_NAME+".jpg")
result.remove(comicInfo.COMIC_INFO_XML)
filter_icon = comicInfo.COMIC_ICON_NAME+".jpg"
filter_info_xml = comicInfo.COMIC_INFO_XML
if filter_icon in result: result.remove(filter_icon)
if filter_info_xml in result: result.remove(filter_info_xml)
except Exception as e:
print(e)
return result
#CBZ检验是否完整
@classmethod
def verCBZComic(cls,path=None,list_img=None):
if path == None:
path = cls.getCBZ_Path()
if list_img == None:
list_img = comicInfo.getChapterImgs()
if not os.path.exists(path): return False
if os.path.exists(path) and len(cls.zip_info(path)) == len(list_img):
def verCBZComic(cls,path=None,list_img=None,min_size=300000):
#数据检验
if path == None: path = cls.getCBZ_Path()
#文件不存在 则返回
if fu.notExists(path): return False
if list_img == None: list_img = comicInfo.getChapterImgs()
if fu.exists(path) and len(cls.zip_info(path)) == len(list_img):
print(f"文件校验成功:{path}")
comicInfo.setProgress(comicInfo.PROGRESS_DONE)
return True
else:
try:
if len(cls.zip_info(path)) < len(list_img) or os.path.getsize(path) < 300000:
print(f"文件删除中:{path}")
os.remove(path)
if len(cls.zip_info(path)) < len(list_img) or os.path.getsize(path) < min_size:
fu.remove(path)
comicInfo.setProgress(comicInfo.PROGRESS_NONE)
except Exception as e:
print(e)
return False
return False
@classmethod
def existsUnzipCBZ(cls,filesname,result=False):
unzip_base_path = pathStr.base_unzip_path
zipfile_path = os.path.join(unzip_base_path,comicInfo.str_comic_name,comicInfo.str_chapter+".CBZ")
def updateOldCBZ(cls,filesname,result=False):
old_zipfile_path = os.path.join(pathStr.old_cbz_path,comicInfo.str_comic_name,comicInfo.str_chapter+".CBZ")
#判断是否存在已下载CBZ文件
if os.path.exists(zipfile_path) and not os.path.exists(CBZUtils.getCBZ_Path()):
print(f"存在CBZ文件{zipfile_path},解压中")
zip_file = ZipFile(zipfile_path)
if fu.exists(old_zipfile_path) and fu.notExists(CBZUtils.getCBZ_Path()):
print(f"存在CBZ文件{old_zipfile_path},解压中...")
zip_file = ZipFile(old_zipfile_path)
#CBZ中文件数量剔除ComicInfo.xml
if len(filesname) == len(zip_file.namelist())-1:
unzip_path = comicInfo.getDirComicChapter()
zip_file.extractall(unzip_path)
zip_file.close()
print(f"解压完成: CBZ文件{zipfile_path}")
print(f"文件校验中")
print(f"解压完成: CBZ文件{old_zipfile_path}")
print("文件校验中...")
for file in os.listdir(unzip_path):
if file.endswith(".jpg") and not fileUtils.ver_file(os.path.join(unzip_path,file),type="image"):
#清空文件
try:
shutil.rmtree(unzip_path)
except Exception as e:
print(e)
return False
#检验图片损坏则删除
if file.endswith(".jpg") and not fu.ver_file(os.path.join(unzip_path,file),type="image"): fu.remove(unzip_path)
comicInfo.writeComicInfoXML(overlay=True)
result = True
return result

View File

@ -203,6 +203,9 @@ class fileUtils:
@classmethod
def exists(cls,path):
return os.path.exists(path)
@classmethod
def notExists(cls,path):
return not cls.exists(path)
@classmethod
def ver_file(cls,file_path,type):
@ -234,13 +237,16 @@ class fileUtils:
if not os.path.exists(path):
return None
try:
print(f"{path} 删除中...")
if os.path.isfile(path):
os.remove(path)
if os.path.isdir(path):
shutil.rmtree(path)
print(f"已删除:{path}")
return True
except:
print(f"删除错误:{path}")
return False
class dbUtils:
@classmethod

View File

@ -10,19 +10,21 @@ from entity.down.RouMan import comicCommon as RouManComicCommon
from entity.down.JM import comicCommon as JMComicCommon
from entity.down.BaoZi import comicCommon as BaoZiComicCommon
from utils.comic.PathStr import pathStr
from utils.FileUtils import fileUtils
from utils.FileUtils import fileUtils as fu
class baseComic:
count_chapter = 0
#校验该漫画是否为最新
# Y/跳过 N/下载 返回下载链接
@classmethod
def comics(cls,book_name,comic_href,updated=None):
#不判断是否为空ComicInfo方法内会自动判断
ci.setComicInfo(comicname=book_name,update_at=updated)
def updateComics(cls):
(book_name,comic_href,updated) = [ci.getComicName(),ci.getCurrentChapterImg(),ci.getUpdateAt()]
#白名单跳过
if ci.getIsComicNameSkips(book_name): return None
if not os.path.exists(ci.getDirConfComic()): ntfy.sendMsg(f"{random.randint(5,20)}秒后开始下载 漫画:{book_name}")
else: ntfy.sendMsg(f"已存在 漫画:{book_name}")
if not ci.isUpdateComic(): return comic_href
if not ci.isUpdateComic():
ntfy.sendMsg(f"开始下载 漫画:{book_name}")
return comic_href
else:
ntfy.sendMsg(f"{book_name} 已是最新")
chapters = htmlUtils.xpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()',url=comic_href,update=False)
@ -49,13 +51,10 @@ class baseComic:
for href in chapter_href:
ci.setChapterName(chapters[cls.count_chapter])
ci.setChapterIndex(cls.count_chapter+1)
if ci.isProgress(ci.PROGRESS_DONE) and not os.path.exists(ci.getNewCBZComicChapter("file")): ci.isProgress(ci.PROGRESS_DONE,remove=True)
if not ci.isProgress(ci.PROGRESS_DONE): cls.comicChapter(href,scramble=True,sleep=random.randint(5,15))
#存在就校验CBZ包是否完整
#if ci.nextExistsGetPath("done_"):
# verUtils.verCBZ()
#if ci.isProgress(ci.PROGRESS_DONE):
# CBZUtils.verCBZComic()
#存在完成配置文件 但文件不存在 将清空完成配置文件
if ci.isProgress(ci.PROGRESS_DONE) and not fu.exists(ci.getNewCBZComicChapter("file")): ci.isProgress(ci.PROGRESS_DONE,remove=True)
#不存在完成配置文件 则允许下载
if not ci.isProgress(ci.PROGRESS_DONE): cls.comicChapters(href,scramble=True,sleep=random.randint(5,15))
cls.count_chapter += 1
#一本漫画下载后等待
#清空文件夹
@ -67,9 +66,9 @@ class baseComic:
读取某章节下所有图片
'''
@classmethod
def comicChapter(cls,chapter_url,scramble=None,sleep=None,is_next=True):
def comicChapters(cls,chapter_url,scramble=None,sleep=None):
try:
is_next = cls.Onechapter(chapter_url,scramble)
cls.Onechapter(chapter_url,scramble)
#进入下个阶段
#章节图片全部下载后,调用下载封面
if ci.isProgress(ci.PROGRESS_DOWN): download_comic_icon()
@ -80,15 +79,14 @@ class baseComic:
ci.setChapterIndex(cls.count_chapter + 1)
if sleep != None and is_next: ntfy.sendMsg(f"{sleep} 秒后开始下载下一个章节",sleep=sleep)
#根据章节地址下载全部图片并将文件名scramble开头的图片进行解密
@classmethod
def Onechapter(cls,chapter_url,scramble=None):
is_next = True
if not str(chapter_url).startswith("http"): chapter_url = ci.getBaseUrl() + chapter_url
try:
is_next = cls.comicChapterDownload(chapter_url)
except:
htmlUtils.remove_HtmlCache(chapter_url)
#下载图片
try: is_next = cls.comicChapterDownload(chapter_url)
except: htmlUtils.remove_HtmlCache(chapter_url)
ci.nextInfoToImgChapter()
#下载完成后, 开始解密图片
chapter_dir = ci.getDirComicChapter()
@ -103,33 +101,32 @@ class baseComic:
@classmethod
def comicChapterDownload(cls,url,is_next=True):
#获取本次工程的HOME目录
comic_main = pathStr.getComicMain()
if comic_main == pathStr.comic_jm: JMComicCommon.comicChapterDownload(url)
if comic_main == pathStr.comic_bz: BaoZiComicCommon.comicChapterDownload(url)
if comic_main == pathStr.comic_rm: RouManComicCommon.comicChapterDownload(url)
if comic_main == None: print("comic_main为空退出中...") & exit()
list_img = ci.getChapterListImg()
files_name = ci.getChapterFilesName()
chapter_name = ci.getChapter()
book_name = ci.getComicName()
(list_img,files_name,chapter_name,book_name) = [ci.getChapterListImg(),ci.getChapterFilesName(),ci.getChapter(),ci.getComicName()]
ci.setChapterImgs(list_img)
#保存信息
ci.nextSaveInfoChapter(chapter_name, list_img)
#验证数据是已存在且是否完整
#cbz_path = ci.getDirCBZComicChapter()+".CBZ"
cbz_path = ci.getNewCBZComicChapter()+".CBZ"
cbz_file = ci.getNewFileCBZComicChapter()
#更新Icon
ci.getNewIconComicChapter()
if os.path.exists(cbz_path): CBZUtils.verCBZComic(cbz_path)
#检验CBZ文件
CBZUtils.verCBZComic(cbz_file)
is_unzip = CBZUtils.existsUnzipCBZ(files_name)
if is_next and not os.path.exists(ci.getPathComicInfoXML()):
if is_next and fu.notExists(ci.getPathComicInfoXML()):
#print("不存在ComicInfo.xml 生成中...")
ci.setPages(files_name)
ci.writeComicInfoXML(chapter_name)
is_next = CBZUtils.nextCBZ(list_img)
repeat = 1
while is_next and repeat <= 2 and not is_unzip:
while is_next and repeat <= 2 and not CBZUtils.updateOldCBZ(files_name):
ntfy.sendMsg(f"{book_name} {chapter_name} 下载中")
download_images(list_img,ci.getDirComicChapter(), files_name=files_name,concurrency=10,timeout=60)
equ_next = len(",".join(os.listdir(ci.getDirComicChapter())).split(".jpg"))-1 == len(list_img)

View File

@ -13,6 +13,8 @@ class comicInfo():
PROGRESS_CBZ = "cbz"
PROGRESS_DONE = "done"
PROGRESS_NONE = "none"
IS_NEW_ICON = False
document = Document()
path_comic_info = None
@ -50,6 +52,7 @@ class comicInfo():
str_files_img = None
str_chapter_index= None
str_value1 = None
str_current_chapter_img = None
list_skip = []
chapter_node = None
@ -88,8 +91,7 @@ class comicInfo():
@classmethod
def setComicValue(cls,value):
result = None
if value != None or value != "":
result = value
if value != None or value != "": result = value
return result
@classmethod
@ -97,6 +99,10 @@ class comicInfo():
@classmethod
def setChapterImgs(cls, value): cls.str_chapter_imgs = cls.setComicValue(value)
@classmethod
def setCurrentChapterImg(cls, value): cls.str_current_chapter_img = cls.setComicValue(value)
@classmethod
def getCurrentChapterImg(cls): return cls.str_current_chapter_img
@classmethod
def getChapterImgs(cls): return cls.str_chapter_imgs
@classmethod
def getLenChapters(cls): return len(cls.str_list_chapter)
@ -142,7 +148,7 @@ class comicInfo():
@classmethod
def setPageCount(cls,value):
cls.str_page_count = cls.setComicValue(int(value)+1)
cls.str_page_count = cls.setComicValue(int(value))
cls.page_count_node = cls.setNodeAndValue(cls.page_count,cls.str_page_count)
@classmethod
@ -151,13 +157,14 @@ class comicInfo():
su = "."+str(value[0]).split(".")[-1]
join_list=",".join(value).replace(su,"")
value = join_list.split(",")
cls.setPageCount(str(len(value)))
cls.setPageCount(len(value)+1 if cls.IS_NEW_ICON else len(value))
root_node = cls.document.createElement(cls.pages)
#添加封面
icon_node = cls.document.createElement("Page")
icon_node.setAttribute("Image","000")
icon_node.setAttribute("Type","FrontCover")
root_node.appendChild(icon_node)
if cls.IS_NEW_ICON:
#添加封面
icon_node = cls.document.createElement("Page")
icon_node.setAttribute("Image","000")
icon_node.setAttribute("Type","FrontCover")
root_node.appendChild(icon_node)
for page in value:
c_node = cls.document.createElement("Page")
page = page.split("_")[-1]
@ -275,6 +282,11 @@ class comicInfo():
def getNewCBZComicChapter(cls,type="dir"): return cls.getNewToComicChapter(".CBZ", type)
@classmethod
def getNewIconComicChapter(cls,type="dir"): return cls.getNewToComicChapter(".jpg", type)
@classmethod
def getNewFileCBZComicChapter(cls,type="file"): return cls.getNewToComicChapter(".CBZ", type)
@classmethod
def getNewFileIconComicChapter(cls,type="file"): return cls.getNewToComicChapter(".jpg", type)
@classmethod
def getNewToComicChapter(cls,su,type="dir"):
@ -437,7 +449,7 @@ class comicInfo():
@classmethod
def setComicInfo(cls,comicname=None,homepage=None,alias=None,author=None,icon=None,tags=None,
dep=None,genre=None,lang=None,chapters=None,update_at=None):
dep=None,genre=None,lang=None,chapters=None,update_at=None,current_chapter_img=None):
author = str(author).replace("&",",").replace(" ",",")
cls.setHomePage(homepage)
cls.setComicName(str(comicname))
@ -450,4 +462,5 @@ class comicInfo():
if genre != None: cls.setGenre(genre)
cls.setLang(lang)
cls.setListChapter(chapters)
cls.setUpdateAt(update_at)
cls.setUpdateAt(update_at)
cls.setCurrentChapterImg(current_chapter_img)

View File

@ -9,7 +9,7 @@ class pathStr:
comic_url_main = None
base_comic_out = os.path.join("/mnt", "Comics")
base_unzip_path = os.path.join("/mnt","OldComics")
old_cbz_path = os.path.join("/mnt","OldComics")
@classmethod
def base_cbz(cls): return cls.getBaseComicPath("CBZ")
@classmethod

View File

@ -14,7 +14,7 @@ import time
from utils.Ntfy import ntfy
from utils.comic.ComicInfo import comicInfo
from utils.HtmlUtils import htmlUtils
from utils.FileUtils import fileUtils
from utils.FileUtils import fileUtils as fu
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
@ -53,7 +53,7 @@ def download_image(image_url, dst_dir, file_name, timeout=20, proxy_type=None, p
f.write(response.content)
response.close()
#验证是否是图像
if fileUtils.ver_file(temp_path,type=type):
if fu.ver_file(temp_path,type=type):
shutil.move(temp_path, file_path)
print("## OK: {} {}".format(file_path, image_url))
except Exception as e:
@ -94,7 +94,7 @@ def download_images(image_urls, dst_dir,concurrency=None,timeout=30,proxy_type=N
count += 1
concurrent.futures.wait(future_list, timeout)
def download_comic_icon():
def download_comic_icon(is_new=comicInfo.IS_NEW_ICON):
icon_url = comicInfo.getIcon()
if icon_url == None:
print("icon 不存在,已跳过")
@ -104,23 +104,25 @@ def download_comic_icon():
icon_prefix = icon_prefix.split("?")[0]
#判断漫画名路径是否已存在comicname/cover.jpg, 存在跳过
path_comic_icon = os.path.join(comicInfo.getDirConfComic(),save_name+icon_prefix)
if not comicInfo.equIcon() and os.path.exists(path_comic_icon):
if not comicInfo.equIcon() and fu.exists(path_comic_icon):
os.remove(path_comic_icon)
if not os.path.exists(path_comic_icon):
if fu.notExists(path_comic_icon):
download(icon_url, path_comic_icon)
#if not os.path.exists(path_cbz_comic):
# os.makedirs(path_cbz_comic)
save_path = os.path.join(comicInfo.getDirCBZComic(),comicInfo.getChapter()+icon_prefix)
#历史版本ICON
if os.path.exists(save_path):
os.remove(save_path)
#shutil.copy(path_comic_icon, save_path)
#print(f"{path_comic_icon} 已复制至: {save_path}")
if os.path.exists(path_comic_icon):
base_dir = comicInfo.getDirComicChapter()
if not os.path.exists(base_dir):
os.makedirs(base_dir)
if is_new:
#历史版本ICON
if os.path.exists(save_path):
os.remove(save_path)
if os.path.exists(path_comic_icon):
base_dir = comicInfo.getDirComicChapter()
if not os.path.exists(base_dir): os.makedirs(base_dir)
shutil.copy(path_comic_icon,os.path.join(base_dir,save_name+icon_prefix))
else:
if fu.notExists(comicInfo.getDirCBZComicChapter()): os.makedirs(comicInfo.getDirCBZComicChapter())
shutil.copy(path_comic_icon,save_path)
print(f"{path_comic_icon} 已复制至: {save_path}")
#保存icon信息
comicInfo.iconDB()
comicInfo.nextDownloadToCBZChapter()