PyComicPackRouMan/utils/entity/JMTI.py

import hashlib
import json,os,time,random,shutil
import re,math
from utils.HtmlUtils import htmlUtils
from utils.FileUtils import imageUtils
from utils.comic.ComicInfo import comicInfo
from utils.CBZUtils import CBZUtils
from utils.CBZUtils import verUtils
from utils.downloader import download_images
from utils.Ntfy import ntfy
from PIL import Image

class comicEntity:
    count_chapter = 0
    aid = None
    repeat = 0

    @classmethod
    def baseReUrl(cls,url):
        newurl_list=""
        if newurl_list:
            if re.findall(r'https://(.*?)/\w+/\d+/',url)[0] not in newurl_list:
                for newurl in newurl_list:
                    url = re.sub(re.findall(r'https://(.*?)/\w+/\d+/', url)[0], newurl, url)
        return url

    @classmethod
    def downladsComcis(cls,url):
            #漫画名
        comic_href_list = htmlUtils.xpathData("//div[@class='thumb-overlay-albums']/a/@href",url,update=True)
        comics_name = htmlUtils.xpathData("//span[@class='video-title title-truncate m-t-5']/text()")
        len_books = len(comic_href_list)
        base_url = comicInfo.getBaseUrl(url)
        for x in range(0, len_books):
            book_name = comics_name[x]
            comicInfo.setComicName(book_name)
            comic_href = base_url+comic_href_list[x]
            random_int = random.randint(5,20)
            dir_conf_comic = comicInfo.getDirConfComic()
            if not os.path.exists(dir_conf_comic):
                ntfy.sendMsg(f"{random_int}秒后开始下载 漫画：{book_name}")
                time.sleep(random_int)
            else:
                ntfy.sendMsg(f"已存在 漫画：{book_name}")
            cls.oneComic(comic_href, random.uniform(0,10))

        #print(books)
        #for comicHref in comicsHref:
        #    cls.oneComic(comicHref,random.uniform(10,20))

    @classmethod
    def oneComic(cls,c_url,sleep=None):
        nums = htmlUtils.xpathData("//div[@class='row']/div[6]/div[1]/div[1]/ul[contains(@class,'btn-toolbar')]/a",url=c_url,update=True)
        book_name = htmlUtils.xpathData("//div[@itemprop='name']/h1[@id='book-name']/text()",num=0)
        book_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\：\-\*\<\>]', '', book_name)
        tags = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='tags']/a[@class='btn btn-sm btn-primary']/text()")
        author = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='author']/a[@class='btn btn-sm btn-primary']/text()")
        book_msg = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='p-t-5 p-b-5']/text()")
        jmid = book_msg[0]
        dep = str(book_msg[1]).replace("叙述：","")
        icon = htmlUtils.xpathData("//div[@class='row']//div[@class='thumb-overlay']/img/@src",num=0)

        comicInfo.setIcon(icon)
        comicInfo.setHomePage(c_url)
        comicInfo.setComicName(book_name)
        comicInfo.setAuthor(author)
        comicInfo.setDep(dep)
        comicInfo.setTags(tags)
        comicInfo.setCBS("韩漫")
        comicInfo.setLang("zh")
        albumid = re.search(r'/album/(\d+)', c_url).group(1)
        referer = re.search(r'(https://\w+\.\w+)/', c_url).group(1)

        if nums:
            list_chapter_name = []
            list_chapter_href = []
            cls.count_chapter = 0

            for i in nums:
                photo_name_list = i.xpath("li/text()")[0].split()
                photo_date = i.xpath("li/span/text()")[0].split()
                #print(re.findall(r'[\u4E00-\u9FA5]+.*?', i.xpath("li/text()")[0]))
                try:
                    if re.findall(r'[\u4E00-\u9FA5]', photo_name_list[2]):
                        photo_name=re.sub(r'\s','',photo_name_list[0])+' '+photo_name_list[2]
                    else:photo_name=re.sub(r'\s','',photo_name_list[0])
                except Exception as e:
                    photo_name = re.sub(r'\s', '', photo_name_list[0])
                photo_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\：\-\*\<\>\-]', '',photo_name)
                #print(photo_name)
                photoid=i.attrib['data-album']
                cls.aid = photoid
                comicInfo.setChapterName(photo_name)
                comicInfo.setDate(photo_date[0],split='-')
                comicInfo.setWeb(referer+i.attrib['href'])
                is_scramble = False
                if int(photoid) > 220980:
                    is_scramble = True
                if not comicInfo.nextExistsGetPath("done_"):
                    comicEntity.comicChapter(referer+i.attrib['href'],scramble=is_scramble,sleep=random.randint(5,15))
                #存在就校验CBZ包是否完整
                if comicInfo.nextExistsGetPath("done_"):
                    verUtils.verCBZ()
                cls.count_chapter += 1
                #一本漫画下载后等待
        #清空文件夹
        path_dir_comic = comicInfo.getDirComic()
        if os.path.exists(path_dir_comic):
            shutil.rmtree(path_dir_comic)
        if sleep != None:
            time.sleep(sleep)

    '''

    读取某章节下所有图片
    '''
    @classmethod
    def comicChapter(cls,chapter_url,scramble=None,sleep=None):
        is_next = True
#        try:
        is_next = cls.Onechapter(chapter_url,scramble)
            #进入下个阶段
        if comicInfo.nextExistsGetPath("down_"):
        #章节图片全部下载后，调用下载封面
            netUtils.downloadComicIcon()
         #下个阶段
        if comicInfo.nextExistsGetPath("cbz_"):
            time.sleep(0.1)
            #下载后自动打包
            is_next = CBZUtils.packAutoComicChapterCBZ()
        #完成删除原文件
            remove_path = comicInfo.getDirComicChapter()
            shutil.rmtree(remove_path)
            print(f"文件已删除: {remove_path}")
#        except:
#            ntfy.sendMsg(f"{comicInfo.getComicName()} 下载出错了")
#            is_next = False
        #ntfy.sendMsg(f"预计总章节大小：{cls.count_chapter + 1}  / "+ str(comicInfo.getLenChapters()))
        if sleep != None and is_next == True:
            ntfy.sendMsg(f"{sleep} 秒后开始下载下一个章节")
            time.sleep(sleep)


    @classmethod
    def Onechapter(cls,chapter_url,scramble=None):
        if not str(chapter_url).startswith("http"):
            chapter_url = comicInfo.getBaseUrl() + chapter_url
        try:
            is_next = cls.comicChapterDownload(chapter_url,scramble)
        except:
            htmlUtils.remove_HtmlCache(chapter_url)
            cls.repeat = 0
            is_next = cls.comicChapterDownload(chapter_url,scramble)
        comicInfo.nextInfoToImgChapter()
        #下载完成后, 开始解密图片
        if scramble:
            #获取章节图片路径
            chapter_dir = comicInfo.getDirComicChapter()
            dirs = os.listdir(chapter_dir)
            for img in dirs:
                is_scramble = str(img).startswith("scramble=")
                if is_scramble:
                    c_path = os.path.join(chapter_dir, img)
                    #imageUtils.getScrambleImage(c_path)
                    cls.encode_scramble_image(c_path)
        #进入下一阶段
        comicInfo.nextImgToDownloadChapter()
        return is_next

    @classmethod
    def comicChapterDownload(cls,chapter_url,c_scramble):
        img_list =htmlUtils.xpathData("//div[@class='panel-body']/div/div[contains(@class,'center')]/img/@data-original",url=chapter_url,update=True)
        pages_imgs =htmlUtils.xpathData("//div[@class='center scramble-page']/@id",url=chapter_url)
        comicInfo.setPages(pages_imgs)
        comicInfo.writeComicInfoXML(comicInfo.str_chapter)
        #print("img_list:",len(img_list))
        list_img = []
        list_file_name = []
        for i in img_list:
            img_url= i
            img_name = os.path.basename(img_url).split('.')[0]
            if c_scramble:
                img_name = "scramble="+str(cls.get_scramble_num(cls.aid,img_name))+"_"+img_name
            #path_img = "%s\\%s.jpg" % (cls.aid, img_name)
            path_img = "%s.jpg" % (img_name)
            list_img.append(img_url)
            list_file_name.append(path_img)
        comicInfo.setChapterImgs(list_img)
                #保存图像
        comicInfo.nextSaveInfoChapter(comicInfo.str_chapter, list_img)
        is_next = verUtils.verNextCBZ(list_img)
        list_shunt = ["?shunt=2","?shunt=1","?shunt=3",""]
        while not is_next:
            time.sleep(10)
            download_images(list_img,comicInfo.getDirComicChapter(), filesName=list_file_name,timeout=1500)
            file_imgs = os.listdir(comicInfo.getDirComicChapter())
            count_jpg = ",".join(file_imgs).split(".jpg")
            is_next = len(count_jpg)-1 == len(list_img)
            cls.repeat += 1
            if cls.repeat > 3:
                url = list_shunt[cls.repeat % len(list_shunt)]
                print("分流中=",url)
                cls.comicChapterDownload(str(chapter_url).split("?")[0]+url,c_scramble)
        return True

    @classmethod
    def get_md5(cls,num):
        result1 = hashlib.md5(num.encode()).hexdigest()
        print('get_md5-', result1)
        return result1

    @classmethod
    def get_scramble_num(cls,e, t):
        #print(type(e),e, type(t),t)
        a = 10
        try:
            num_dict = {}
            for i in range(10):
                num_dict[i] = i * 2 + 2
            if (int(e) >= 268850):
                n = str(e) + t;
                # switch(n=(n = (n = md5(n)).substr(-1)), n %= 10) {
                #print("n=",n)
                tmp = ord(cls.get_md5(n)[-1])
                result = num_dict[tmp % 10]
                a = result
            return a
        except Exception as e:
            print(e.__traceback__.tb_lineno,e)
            return False

    @classmethod
    def encode_scramble_image(cls,imgpath):
        image = Image.open(imgpath)
        w, h = image.size
        #image.show()
        file_str = str(imgpath).split("=")
        #10_29.jpg
        base_dir = file_str[0].replace("scramble","")
        base_name = file_str[-1]
        base_fn = base_name.split("_")
        save_name = base_fn[1]
        save_name_delesu = save_name.split(".")[0]
        blocks = int(base_fn[0])
        img_type = os.path.basename(imgpath).split('.')[-1]
        save_path = os.path.join(os.path.dirname(imgpath),save_name_delesu+"."+img_type)
        # print(type(aid),type(img_name))
        if blocks:
            s = blocks  # 随机值
            # print(s)
            l = h % s  # 切割最后多余的值
            box_list = []
            hz = 0
            for i in range(s):
                c = math.floor(h / s)
                g = i * c
                hz += c
                h2 = h - c * (i + 1) - l
                if i == 0:
                    c += l;hz += l
                else:
                    g += l
                box_list.append((0, h2, w, h - g))

            # print(box_list,len(box_list))
            item_width = w
            # box_list.reverse() #还原切图可以倒序列表
            # print(box_list, len(box_list))
            newh = 0
            image_list = [image.crop(box) for box in box_list]
            # print(box_list)
            newimage = Image.new("RGB", (w, h))
            for image in image_list:
                # image.show()
                b_w, b_h = image.size
                newimage.paste(image, (0, newh))

                newh += b_h
            newimage.save(save_path)
            if os.path.exists(imgpath):
                os.remove(imgpath)