PyComicPackRouMan/utils/entity/JMTI.py
2023-01-16 01:34:46 +08:00

281 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import hashlib
import json,os,time,random,shutil
import re,math
from utils.HtmlUtils import htmlUtils
from utils.FileUtils import imageUtils
from utils.comic.ComicInfo import comicInfo
from utils.CBZUtils import CBZUtils
from utils.CBZUtils import verUtils
from utils.downloader import download_images
from utils.Ntfy import ntfy
from PIL import Image
class comicEntity:
count_chapter = 0
aid = None
repeat = 0
@classmethod
def baseReUrl(cls,url):
newurl_list=""
if newurl_list:
if re.findall(r'https://(.*?)/\w+/\d+/',url)[0] not in newurl_list:
for newurl in newurl_list:
url = re.sub(re.findall(r'https://(.*?)/\w+/\d+/', url)[0], newurl, url)
return url
@classmethod
def downladsComcis(cls,url):
#漫画名
comic_href_list = htmlUtils.xpathData("//div[@class='thumb-overlay-albums']/a/@href",url,update=True)
comics_name = htmlUtils.xpathData("//span[@class='video-title title-truncate m-t-5']/text()")
len_books = len(comic_href_list)
base_url = comicInfo.getBaseUrl(url)
for x in range(0, len_books):
book_name = comics_name[x]
comicInfo.setComicName(book_name)
comic_href = base_url+comic_href_list[x]
random_int = random.randint(5,20)
dir_conf_comic = comicInfo.getDirConfComic()
if not os.path.exists(dir_conf_comic):
ntfy.sendMsg(f"{random_int}秒后开始下载 漫画:{book_name}")
time.sleep(random_int)
else:
ntfy.sendMsg(f"已存在 漫画:{book_name}")
cls.oneComic(comic_href, random.uniform(0,10))
#print(books)
#for comicHref in comicsHref:
# cls.oneComic(comicHref,random.uniform(10,20))
@classmethod
def oneComic(cls,c_url,sleep=None):
nums = htmlUtils.xpathData("//div[@class='row']/div[6]/div[1]/div[1]/ul[contains(@class,'btn-toolbar')]/a",url=c_url,update=True)
book_name = htmlUtils.xpathData("//div[@itemprop='name']/h1[@id='book-name']/text()",num=0)
book_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\\-\*\<\>]', '', book_name)
tags = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='tags']/a[@class='btn btn-sm btn-primary']/text()")
author = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='author']/a[@class='btn btn-sm btn-primary']/text()")
book_msg = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='p-t-5 p-b-5']/text()")
jmid = book_msg[0]
dep = str(book_msg[1]).replace("叙述:","")
icon = htmlUtils.xpathData("//div[@class='row']//div[@class='thumb-overlay']/img/@src",num=0)
comicInfo.setIcon(icon)
comicInfo.setHomePage(c_url)
comicInfo.setComicName(book_name)
comicInfo.setAuthor(author)
comicInfo.setDep(dep)
comicInfo.setTags(tags)
comicInfo.setCBS("韩漫")
comicInfo.setLang("zh")
albumid = re.search(r'/album/(\d+)', c_url).group(1)
referer = re.search(r'(https://\w+\.\w+)/', c_url).group(1)
if nums:
list_chapter_name = []
list_chapter_href = []
cls.count_chapter = 0
for i in nums:
photo_name_list = i.xpath("li/text()")[0].split()
photo_date = i.xpath("li/span/text()")[0].split()
#print(re.findall(r'[\u4E00-\u9FA5]+.*?', i.xpath("li/text()")[0]))
try:
if re.findall(r'[\u4E00-\u9FA5]', photo_name_list[2]):
photo_name=re.sub(r'\s','',photo_name_list[0])+' '+photo_name_list[2]
else:photo_name=re.sub(r'\s','',photo_name_list[0])
except Exception as e:
photo_name = re.sub(r'\s', '', photo_name_list[0])
photo_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\\-\*\<\>\-]', '',photo_name)
#print(photo_name)
photoid=i.attrib['data-album']
cls.aid = photoid
comicInfo.setChapterName(photo_name)
comicInfo.setDate(photo_date[0],split='-')
comicInfo.setWeb(referer+i.attrib['href'])
is_scramble = False
if int(photoid) > 220980:
is_scramble = True
if not comicInfo.nextExistsGetPath("done_"):
comicEntity.comicChapter(referer+i.attrib['href'],scramble=is_scramble,sleep=random.randint(5,15))
#存在就校验CBZ包是否完整
if comicInfo.nextExistsGetPath("done_"):
verUtils.verCBZ()
cls.count_chapter += 1
#一本漫画下载后等待
#清空文件夹
path_dir_comic = comicInfo.getDirComic()
if os.path.exists(path_dir_comic):
shutil.rmtree(path_dir_comic)
if sleep != None:
time.sleep(sleep)
'''
读取某章节下所有图片
'''
@classmethod
def comicChapter(cls,chapter_url,scramble=None,sleep=None):
is_next = True
# try:
is_next = cls.Onechapter(chapter_url,scramble)
#进入下个阶段
if comicInfo.nextExistsGetPath("down_"):
#章节图片全部下载后,调用下载封面
netUtils.downloadComicIcon()
#下个阶段
if comicInfo.nextExistsGetPath("cbz_"):
time.sleep(0.1)
#下载后自动打包
is_next = CBZUtils.packAutoComicChapterCBZ()
#完成删除原文件
remove_path = comicInfo.getDirComicChapter()
shutil.rmtree(remove_path)
print(f"文件已删除: {remove_path}")
# except:
# ntfy.sendMsg(f"{comicInfo.getComicName()} 下载出错了")
# is_next = False
#ntfy.sendMsg(f"预计总章节大小:{cls.count_chapter + 1} / "+ str(comicInfo.getLenChapters()))
if sleep != None and is_next == True:
ntfy.sendMsg(f"{sleep} 秒后开始下载下一个章节")
time.sleep(sleep)
@classmethod
def Onechapter(cls,chapter_url,scramble=None):
if not str(chapter_url).startswith("http"):
chapter_url = comicInfo.getBaseUrl() + chapter_url
try:
is_next = cls.comicChapterDownload(chapter_url,scramble)
except:
htmlUtils.remove_HtmlCache(chapter_url)
cls.repeat = 0
is_next = cls.comicChapterDownload(chapter_url,scramble)
comicInfo.nextInfoToImgChapter()
#下载完成后, 开始解密图片
if scramble:
#获取章节图片路径
chapter_dir = comicInfo.getDirComicChapter()
dirs = os.listdir(chapter_dir)
for img in dirs:
is_scramble = str(img).startswith("scramble=")
if is_scramble:
c_path = os.path.join(chapter_dir, img)
#imageUtils.getScrambleImage(c_path)
cls.encode_scramble_image(c_path)
#进入下一阶段
comicInfo.nextImgToDownloadChapter()
return is_next
@classmethod
def comicChapterDownload(cls,chapter_url,c_scramble):
img_list =htmlUtils.xpathData("//div[@class='panel-body']/div/div[contains(@class,'center')]/img/@data-original",url=chapter_url,update=True)
pages_imgs =htmlUtils.xpathData("//div[@class='center scramble-page']/@id",url=chapter_url)
comicInfo.setPages(pages_imgs)
comicInfo.writeComicInfoXML(comicInfo.str_chapter)
#print("img_list:",len(img_list))
list_img = []
list_file_name = []
for i in img_list:
img_url= i
img_name = os.path.basename(img_url).split('.')[0]
if c_scramble:
img_name = "scramble="+str(cls.get_scramble_num(cls.aid,img_name))+"_"+img_name
#path_img = "%s\\%s.jpg" % (cls.aid, img_name)
path_img = "%s.jpg" % (img_name)
list_img.append(img_url)
list_file_name.append(path_img)
comicInfo.setChapterImgs(list_img)
#保存图像
comicInfo.nextSaveInfoChapter(comicInfo.str_chapter, list_img)
is_next = verUtils.verNextCBZ(list_img)
list_shunt = ["?shunt=2","?shunt=1","?shunt=3",""]
while not is_next:
time.sleep(10)
download_images(list_img,comicInfo.getDirComicChapter(), filesName=list_file_name,timeout=1500)
file_imgs = os.listdir(comicInfo.getDirComicChapter())
count_jpg = ",".join(file_imgs).split(".jpg")
is_next = len(count_jpg)-1 == len(list_img)
cls.repeat += 1
if cls.repeat > 3:
url = list_shunt[cls.repeat % len(list_shunt)]
print("分流中=",url)
cls.comicChapterDownload(str(chapter_url).split("?")[0]+url,c_scramble)
return True
@classmethod
def get_md5(cls,num):
result1 = hashlib.md5(num.encode()).hexdigest()
print('get_md5-', result1)
return result1
@classmethod
def get_scramble_num(cls,e, t):
#print(type(e),e, type(t),t)
a = 10
try:
num_dict = {}
for i in range(10):
num_dict[i] = i * 2 + 2
if (int(e) >= 268850):
n = str(e) + t;
# switch(n=(n = (n = md5(n)).substr(-1)), n %= 10) {
#print("n=",n)
tmp = ord(cls.get_md5(n)[-1])
result = num_dict[tmp % 10]
a = result
return a
except Exception as e:
print(e.__traceback__.tb_lineno,e)
return False
@classmethod
def encode_scramble_image(cls,imgpath):
image = Image.open(imgpath)
w, h = image.size
#image.show()
file_str = str(imgpath).split("=")
#10_29.jpg
base_dir = file_str[0].replace("scramble","")
base_name = file_str[-1]
base_fn = base_name.split("_")
save_name = base_fn[1]
save_name_delesu = save_name.split(".")[0]
blocks = int(base_fn[0])
img_type = os.path.basename(imgpath).split('.')[-1]
save_path = os.path.join(os.path.dirname(imgpath),save_name_delesu+"."+img_type)
# print(type(aid),type(img_name))
if blocks:
s = blocks # 随机值
# print(s)
l = h % s # 切割最后多余的值
box_list = []
hz = 0
for i in range(s):
c = math.floor(h / s)
g = i * c
hz += c
h2 = h - c * (i + 1) - l
if i == 0:
c += l;hz += l
else:
g += l
box_list.append((0, h2, w, h - g))
# print(box_list,len(box_list))
item_width = w
# box_list.reverse() #还原切图可以倒序列表
# print(box_list, len(box_list))
newh = 0
image_list = [image.crop(box) for box in box_list]
# print(box_list)
newimage = Image.new("RGB", (w, h))
for image in image_list:
# image.show()
b_w, b_h = image.size
newimage.paste(image, (0, newh))
newh += b_h
newimage.save(save_path)
if os.path.exists(imgpath):
os.remove(imgpath)