PyComicPackRouMan/entity/JM.py
2023-01-23 09:40:53 +08:00

74 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import re
from utils.HtmlUtils import htmlUtils
from utils.comic.ComicInfo import comicInfo
from utils.downloader import download_images
from utils.base.BaseComicEntity import baseComic
class comicEntity:
@classmethod
def baseComicData(cls,url,update=False):
data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url,update=update)
data = json.loads(data[0])
data = data.get("props")
x = data.get("pageProps")
return x
@classmethod
def downladsComcis(cls,url):
#漫画名
x = cls.baseComicData(url)
books = x.get("books")
len_books = len(books)
base_url = comicInfo.getBaseUrl(url)
for x in range(0, len_books):
book = books[x]
book_id = book.get("id")
book_name = book.get("name")
updated = book.get("updatedAt")
comic_href = base_url+"/books/"+book_id
href = baseComic.downladsComcis(book_name=book_name,comic_href=comic_href,updated=updated)
cls.oneComic(href)
comicInfo.updateComicDate()
@classmethod
def oneComic(cls,c_url,sleep=None):
nums = htmlUtils.xpathData("//div[@class='row']/div[6]/div[1]/div[1]/ul[contains(@class,'btn-toolbar')]/a",url=c_url,update=True)
book_name = htmlUtils.xpathData("//div[@itemprop='name']/h1[@id='book-name']/text()",num=0)
title = re.sub(r'[\\\/\|\(\)\~\?\.\:\\-\*\<\>]', '', book_name)
tags = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='tags']/a[@class='btn btn-sm btn-primary']/text()")
author = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='author']/a[@class='btn btn-sm btn-primary']/text()")
book_msg = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='p-t-5 p-b-5']/text()")
jmid = book_msg[0]
dep = str(book_msg[1]).replace("叙述:","")
icon = htmlUtils.xpathData("//div[@class='row']//div[@class='thumb-overlay']/img/@src",num=0)
referer = re.search(r'(https://\w+\.\w+)/', c_url).group(1)
if nums:
list_chapter_name = []
list_chapter_href = []
list_chapter_update = []
cls.count_chapter = 0
for i in nums:
photo_name_list = i.xpath("li/text()")[0].split()
photo_date = i.xpath("li/span/text()")[0].split()
#print(re.findall(r'[\u4E00-\u9FA5]+.*?', i.xpath("li/text()")[0]))
try:
if re.findall(r'[\u4E00-\u9FA5]', photo_name_list[2]):
photo_name=re.sub(r'\s','',photo_name_list[0])+' '+photo_name_list[2]
else:photo_name=re.sub(r'\s','',photo_name_list[0])
except Exception as e:
photo_name = re.sub(r'\s', '', photo_name_list[0])
photo_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\\-\*\<\>\-]', '',photo_name)
#print(photo_name)
photoid=i.attrib['data-album']
cls.aid = photoid
comicInfo.setValue1(cls.aid)
list_chapter_name.append(photo_name)
list_chapter_href.append(referer+i.attrib['href'])
list_chapter_update.append(photo_date[0])
baseComic.oneComic(url=c_url,title=title,author=author,
icon=icon,tags=tags,dep=dep,chapters=list_chapter_name,chapter_href=list_chapter_href,
alias=None)