281 lines
12 KiB
Python
281 lines
12 KiB
Python
import hashlib
|
||
import json,os,time,random,shutil
|
||
import re,math
|
||
from utils.HtmlUtils import htmlUtils
|
||
from utils.FileUtils import imageUtils
|
||
from utils.comic.ComicInfo import comicInfo
|
||
from utils.CBZUtils import CBZUtils
|
||
from utils.CBZUtils import verUtils
|
||
from utils.downloader import download_images
|
||
from utils.Ntfy import ntfy
|
||
from PIL import Image
|
||
|
||
class comicEntity:
|
||
count_chapter = 0
|
||
aid = None
|
||
repeat = 0
|
||
|
||
@classmethod
|
||
def baseReUrl(cls,url):
|
||
newurl_list=""
|
||
if newurl_list:
|
||
if re.findall(r'https://(.*?)/\w+/\d+/',url)[0] not in newurl_list:
|
||
for newurl in newurl_list:
|
||
url = re.sub(re.findall(r'https://(.*?)/\w+/\d+/', url)[0], newurl, url)
|
||
return url
|
||
|
||
@classmethod
|
||
def downladsComcis(cls,url):
|
||
#漫画名
|
||
comic_href_list = htmlUtils.xpathData("//div[@class='thumb-overlay-albums']/a/@href",url,update=True)
|
||
comics_name = htmlUtils.xpathData("//span[@class='video-title title-truncate m-t-5']/text()")
|
||
len_books = len(comic_href_list)
|
||
base_url = comicInfo.getBaseUrl(url)
|
||
for x in range(0, len_books):
|
||
book_name = comics_name[x]
|
||
comicInfo.setComicName(book_name)
|
||
comic_href = base_url+comic_href_list[x]
|
||
random_int = random.randint(5,20)
|
||
dir_conf_comic = comicInfo.getDirConfComic()
|
||
if not os.path.exists(dir_conf_comic):
|
||
ntfy.sendMsg(f"{random_int}秒后开始下载 漫画:{book_name}")
|
||
time.sleep(random_int)
|
||
else:
|
||
ntfy.sendMsg(f"已存在 漫画:{book_name}")
|
||
cls.oneComic(comic_href, random.uniform(0,10))
|
||
|
||
#print(books)
|
||
#for comicHref in comicsHref:
|
||
# cls.oneComic(comicHref,random.uniform(10,20))
|
||
|
||
@classmethod
|
||
def oneComic(cls,c_url,sleep=None):
|
||
nums = htmlUtils.xpathData("//div[@class='row']/div[6]/div[1]/div[1]/ul[contains(@class,'btn-toolbar')]/a",url=c_url,update=True)
|
||
book_name = htmlUtils.xpathData("//div[@itemprop='name']/h1[@id='book-name']/text()",num=0)
|
||
book_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\:\-\*\<\>]', '', book_name)
|
||
tags = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='tags']/a[@class='btn btn-sm btn-primary']/text()")
|
||
author = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='author']/a[@class='btn btn-sm btn-primary']/text()")
|
||
book_msg = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='p-t-5 p-b-5']/text()")
|
||
jmid = book_msg[0]
|
||
dep = str(book_msg[1]).replace("叙述:","")
|
||
icon = htmlUtils.xpathData("//div[@class='row']//div[@class='thumb-overlay']/img/@src",num=0)
|
||
|
||
comicInfo.setIcon(icon)
|
||
comicInfo.setHomePage(c_url)
|
||
comicInfo.setComicName(book_name)
|
||
comicInfo.setAuthor(author)
|
||
comicInfo.setDep(dep)
|
||
comicInfo.setTags(tags)
|
||
comicInfo.setCBS("韩漫")
|
||
comicInfo.setLang("zh")
|
||
albumid = re.search(r'/album/(\d+)', c_url).group(1)
|
||
referer = re.search(r'(https://\w+\.\w+)/', c_url).group(1)
|
||
|
||
if nums:
|
||
list_chapter_name = []
|
||
list_chapter_href = []
|
||
cls.count_chapter = 0
|
||
|
||
for i in nums:
|
||
photo_name_list = i.xpath("li/text()")[0].split()
|
||
photo_date = i.xpath("li/span/text()")[0].split()
|
||
#print(re.findall(r'[\u4E00-\u9FA5]+.*?', i.xpath("li/text()")[0]))
|
||
try:
|
||
if re.findall(r'[\u4E00-\u9FA5]', photo_name_list[2]):
|
||
photo_name=re.sub(r'\s','',photo_name_list[0])+' '+photo_name_list[2]
|
||
else:photo_name=re.sub(r'\s','',photo_name_list[0])
|
||
except Exception as e:
|
||
photo_name = re.sub(r'\s', '', photo_name_list[0])
|
||
photo_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\:\-\*\<\>\-]', '',photo_name)
|
||
#print(photo_name)
|
||
photoid=i.attrib['data-album']
|
||
cls.aid = photoid
|
||
comicInfo.setChapterName(photo_name)
|
||
comicInfo.setDate(photo_date[0],split='-')
|
||
comicInfo.setWeb(referer+i.attrib['href'])
|
||
is_scramble = False
|
||
if int(photoid) > 220980:
|
||
is_scramble = True
|
||
if not comicInfo.nextExistsGetPath("done_"):
|
||
comicEntity.comicChapter(referer+i.attrib['href'],scramble=is_scramble,sleep=random.randint(5,15))
|
||
#存在就校验CBZ包是否完整
|
||
if comicInfo.nextExistsGetPath("done_"):
|
||
verUtils.verCBZ()
|
||
cls.count_chapter += 1
|
||
#一本漫画下载后等待
|
||
#清空文件夹
|
||
path_dir_comic = comicInfo.getDirComic()
|
||
if os.path.exists(path_dir_comic):
|
||
shutil.rmtree(path_dir_comic)
|
||
if sleep != None:
|
||
time.sleep(sleep)
|
||
|
||
'''
|
||
|
||
读取某章节下所有图片
|
||
'''
|
||
@classmethod
|
||
def comicChapter(cls,chapter_url,scramble=None,sleep=None):
|
||
is_next = True
|
||
# try:
|
||
is_next = cls.Onechapter(chapter_url,scramble)
|
||
#进入下个阶段
|
||
if comicInfo.nextExistsGetPath("down_"):
|
||
#章节图片全部下载后,调用下载封面
|
||
netUtils.downloadComicIcon()
|
||
#下个阶段
|
||
if comicInfo.nextExistsGetPath("cbz_"):
|
||
time.sleep(0.1)
|
||
#下载后自动打包
|
||
is_next = CBZUtils.packAutoComicChapterCBZ()
|
||
#完成删除原文件
|
||
remove_path = comicInfo.getDirComicChapter()
|
||
shutil.rmtree(remove_path)
|
||
print(f"文件已删除: {remove_path}")
|
||
# except:
|
||
# ntfy.sendMsg(f"{comicInfo.getComicName()} 下载出错了")
|
||
# is_next = False
|
||
#ntfy.sendMsg(f"预计总章节大小:{cls.count_chapter + 1} / "+ str(comicInfo.getLenChapters()))
|
||
if sleep != None and is_next == True:
|
||
ntfy.sendMsg(f"{sleep} 秒后开始下载下一个章节")
|
||
time.sleep(sleep)
|
||
|
||
|
||
@classmethod
|
||
def Onechapter(cls,chapter_url,scramble=None):
|
||
if not str(chapter_url).startswith("http"):
|
||
chapter_url = comicInfo.getBaseUrl() + chapter_url
|
||
try:
|
||
is_next = cls.comicChapterDownload(chapter_url,scramble)
|
||
except:
|
||
htmlUtils.remove_HtmlCache(chapter_url)
|
||
cls.repeat = 0
|
||
is_next = cls.comicChapterDownload(chapter_url,scramble)
|
||
comicInfo.nextInfoToImgChapter()
|
||
#下载完成后, 开始解密图片
|
||
if scramble:
|
||
#获取章节图片路径
|
||
chapter_dir = comicInfo.getDirComicChapter()
|
||
dirs = os.listdir(chapter_dir)
|
||
for img in dirs:
|
||
is_scramble = str(img).startswith("scramble=")
|
||
if is_scramble:
|
||
c_path = os.path.join(chapter_dir, img)
|
||
#imageUtils.getScrambleImage(c_path)
|
||
cls.encode_scramble_image(c_path)
|
||
#进入下一阶段
|
||
comicInfo.nextImgToDownloadChapter()
|
||
return is_next
|
||
|
||
@classmethod
|
||
def comicChapterDownload(cls,chapter_url,c_scramble):
|
||
img_list =htmlUtils.xpathData("//div[@class='panel-body']/div/div[contains(@class,'center')]/img/@data-original",url=chapter_url,update=True)
|
||
pages_imgs =htmlUtils.xpathData("//div[@class='center scramble-page']/@id",url=chapter_url)
|
||
comicInfo.setPages(pages_imgs)
|
||
comicInfo.writeComicInfoXML(comicInfo.str_chapter)
|
||
#print("img_list:",len(img_list))
|
||
list_img = []
|
||
list_file_name = []
|
||
for i in img_list:
|
||
img_url= i
|
||
img_name = os.path.basename(img_url).split('.')[0]
|
||
if c_scramble:
|
||
img_name = "scramble="+str(cls.get_scramble_num(cls.aid,img_name))+"_"+img_name
|
||
#path_img = "%s\\%s.jpg" % (cls.aid, img_name)
|
||
path_img = "%s.jpg" % (img_name)
|
||
list_img.append(img_url)
|
||
list_file_name.append(path_img)
|
||
comicInfo.setChapterImgs(list_img)
|
||
#保存图像
|
||
comicInfo.nextSaveInfoChapter(comicInfo.str_chapter, list_img)
|
||
is_next = verUtils.verNextCBZ(list_img)
|
||
list_shunt = ["?shunt=2","?shunt=1","?shunt=3",""]
|
||
while not is_next:
|
||
time.sleep(10)
|
||
download_images(list_img,comicInfo.getDirComicChapter(), filesName=list_file_name,timeout=1500)
|
||
file_imgs = os.listdir(comicInfo.getDirComicChapter())
|
||
count_jpg = ",".join(file_imgs).split(".jpg")
|
||
is_next = len(count_jpg)-1 == len(list_img)
|
||
cls.repeat += 1
|
||
if cls.repeat > 3:
|
||
url = list_shunt[cls.repeat % len(list_shunt)]
|
||
print("分流中=",url)
|
||
cls.comicChapterDownload(str(chapter_url).split("?")[0]+url,c_scramble)
|
||
return True
|
||
|
||
@classmethod
|
||
def get_md5(cls,num):
|
||
result1 = hashlib.md5(num.encode()).hexdigest()
|
||
print('get_md5-', result1)
|
||
return result1
|
||
|
||
@classmethod
|
||
def get_scramble_num(cls,e, t):
|
||
#print(type(e),e, type(t),t)
|
||
a = 10
|
||
try:
|
||
num_dict = {}
|
||
for i in range(10):
|
||
num_dict[i] = i * 2 + 2
|
||
if (int(e) >= 268850):
|
||
n = str(e) + t;
|
||
# switch(n=(n = (n = md5(n)).substr(-1)), n %= 10) {
|
||
#print("n=",n)
|
||
tmp = ord(cls.get_md5(n)[-1])
|
||
result = num_dict[tmp % 10]
|
||
a = result
|
||
return a
|
||
except Exception as e:
|
||
print(e.__traceback__.tb_lineno,e)
|
||
return False
|
||
|
||
@classmethod
|
||
def encode_scramble_image(cls,imgpath):
|
||
image = Image.open(imgpath)
|
||
w, h = image.size
|
||
#image.show()
|
||
file_str = str(imgpath).split("=")
|
||
#10_29.jpg
|
||
base_dir = file_str[0].replace("scramble","")
|
||
base_name = file_str[-1]
|
||
base_fn = base_name.split("_")
|
||
save_name = base_fn[1]
|
||
save_name_delesu = save_name.split(".")[0]
|
||
blocks = int(base_fn[0])
|
||
img_type = os.path.basename(imgpath).split('.')[-1]
|
||
save_path = os.path.join(os.path.dirname(imgpath),save_name_delesu+"."+img_type)
|
||
# print(type(aid),type(img_name))
|
||
if blocks:
|
||
s = blocks # 随机值
|
||
# print(s)
|
||
l = h % s # 切割最后多余的值
|
||
box_list = []
|
||
hz = 0
|
||
for i in range(s):
|
||
c = math.floor(h / s)
|
||
g = i * c
|
||
hz += c
|
||
h2 = h - c * (i + 1) - l
|
||
if i == 0:
|
||
c += l;hz += l
|
||
else:
|
||
g += l
|
||
box_list.append((0, h2, w, h - g))
|
||
|
||
# print(box_list,len(box_list))
|
||
item_width = w
|
||
# box_list.reverse() #还原切图可以倒序列表
|
||
# print(box_list, len(box_list))
|
||
newh = 0
|
||
image_list = [image.crop(box) for box in box_list]
|
||
# print(box_list)
|
||
newimage = Image.new("RGB", (w, h))
|
||
for image in image_list:
|
||
# image.show()
|
||
b_w, b_h = image.size
|
||
newimage.paste(image, (0, newh))
|
||
|
||
newh += b_h
|
||
newimage.save(save_path)
|
||
if os.path.exists(imgpath):
|
||
os.remove(imgpath) |