PyComicPackRouMan/entity/JM.py

import json
import re
from utils.HtmlUtils import htmlUtils
from utils.comic.ComicInfo import comicInfo
from utils.downloader import download_images
from utils.base.BaseComicEntity import baseComic

class comicEntity:
    @classmethod
    def baseComicData(cls,url,update=False):
        data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url,update=update)
        data = json.loads(data[0])
        data = data.get("props")
        x = data.get("pageProps")
        return x

    @classmethod
    def downladsComcis(cls,url):
            #漫画名
        x = cls.baseComicData(url)
        books = x.get("books")
        len_books = len(books)
        base_url = comicInfo.getBaseUrl(url)
        for x in range(0, len_books):
            book = books[x]
            book_id = book.get("id")
            book_name = book.get("name")
            updated = book.get("updatedAt")
            comic_href = base_url+"/books/"+book_id
            href = baseComic.downladsComcis(book_name=book_name,comic_href=comic_href,updated=updated)
            cls.oneComic(href)
            comicInfo.updateComicDate()

    @classmethod
    def oneComic(cls,c_url,sleep=None):
        nums = htmlUtils.xpathData("//div[@class='row']/div[6]/div[1]/div[1]/ul[contains(@class,'btn-toolbar')]/a",url=c_url,update=True)
        book_name = htmlUtils.xpathData("//div[@itemprop='name']/h1[@id='book-name']/text()",num=0)
        title = re.sub(r'[\\\/\|\(\)\~\?\.\:\：\-\*\<\>]', '', book_name)
        tags = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='tags']/a[@class='btn btn-sm btn-primary']/text()")
        author = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='author']/a[@class='btn btn-sm btn-primary']/text()")
        book_msg = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='p-t-5 p-b-5']/text()")
        jmid = book_msg[0]
        dep = str(book_msg[1]).replace("叙述：","")
        icon = htmlUtils.xpathData("//div[@class='row']//div[@class='thumb-overlay']/img/@src",num=0)

        referer = re.search(r'(https://\w+\.\w+)/', c_url).group(1)
        if nums:
            list_chapter_name = []
            list_chapter_href = []
            list_chapter_update = []
            cls.count_chapter = 0

            for i in nums:
                photo_name_list = i.xpath("li/text()")[0].split()
                photo_date = i.xpath("li/span/text()")[0].split()
                #print(re.findall(r'[\u4E00-\u9FA5]+.*?', i.xpath("li/text()")[0]))
                try:
                    if re.findall(r'[\u4E00-\u9FA5]', photo_name_list[2]):
                        photo_name=re.sub(r'\s','',photo_name_list[0])+' '+photo_name_list[2]
                    else:photo_name=re.sub(r'\s','',photo_name_list[0])
                except Exception as e:
                    photo_name = re.sub(r'\s', '', photo_name_list[0])
                photo_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\：\-\*\<\>\-]', '',photo_name)
                #print(photo_name)
                photoid=i.attrib['data-album']
                cls.aid = photoid
                comicInfo.setValue1(cls.aid)
                list_chapter_name.append(photo_name)
                list_chapter_href.append(referer+i.attrib['href'])
                list_chapter_update.append(photo_date[0])

            baseComic.oneComic(url=c_url,title=title,author=author,
                icon=icon,tags=tags,dep=dep,chapters=list_chapter_name,chapter_href=list_chapter_href,
                alias=None)