74 lines
3.6 KiB
Python
74 lines
3.6 KiB
Python
import json
|
||
import re
|
||
from utils.HtmlUtils import htmlUtils
|
||
from utils.comic.ComicInfo import comicInfo
|
||
from utils.downloader import download_images
|
||
from utils.base.BaseComicEntity import baseComic
|
||
|
||
class comicEntity:
|
||
@classmethod
|
||
def baseComicData(cls,url,update=False):
|
||
data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url,update=update)
|
||
data = json.loads(data[0])
|
||
data = data.get("props")
|
||
x = data.get("pageProps")
|
||
return x
|
||
|
||
@classmethod
|
||
def downladsComcis(cls,url):
|
||
#漫画名
|
||
x = cls.baseComicData(url)
|
||
books = x.get("books")
|
||
len_books = len(books)
|
||
base_url = comicInfo.getBaseUrl(url)
|
||
for x in range(0, len_books):
|
||
book = books[x]
|
||
book_id = book.get("id")
|
||
book_name = book.get("name")
|
||
updated = book.get("updatedAt")
|
||
comic_href = base_url+"/books/"+book_id
|
||
href = baseComic.downladsComcis(book_name=book_name,comic_href=comic_href,updated=updated)
|
||
cls.oneComic(href)
|
||
comicInfo.updateComicDate()
|
||
|
||
@classmethod
|
||
def oneComic(cls,c_url,sleep=None):
|
||
nums = htmlUtils.xpathData("//div[@class='row']/div[6]/div[1]/div[1]/ul[contains(@class,'btn-toolbar')]/a",url=c_url,update=True)
|
||
book_name = htmlUtils.xpathData("//div[@itemprop='name']/h1[@id='book-name']/text()",num=0)
|
||
title = re.sub(r'[\\\/\|\(\)\~\?\.\:\:\-\*\<\>]', '', book_name)
|
||
tags = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='tags']/a[@class='btn btn-sm btn-primary']/text()")
|
||
author = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='author']/a[@class='btn btn-sm btn-primary']/text()")
|
||
book_msg = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='p-t-5 p-b-5']/text()")
|
||
jmid = book_msg[0]
|
||
dep = str(book_msg[1]).replace("叙述:","")
|
||
icon = htmlUtils.xpathData("//div[@class='row']//div[@class='thumb-overlay']/img/@src",num=0)
|
||
|
||
referer = re.search(r'(https://\w+\.\w+)/', c_url).group(1)
|
||
if nums:
|
||
list_chapter_name = []
|
||
list_chapter_href = []
|
||
list_chapter_update = []
|
||
cls.count_chapter = 0
|
||
|
||
for i in nums:
|
||
photo_name_list = i.xpath("li/text()")[0].split()
|
||
photo_date = i.xpath("li/span/text()")[0].split()
|
||
#print(re.findall(r'[\u4E00-\u9FA5]+.*?', i.xpath("li/text()")[0]))
|
||
try:
|
||
if re.findall(r'[\u4E00-\u9FA5]', photo_name_list[2]):
|
||
photo_name=re.sub(r'\s','',photo_name_list[0])+' '+photo_name_list[2]
|
||
else:photo_name=re.sub(r'\s','',photo_name_list[0])
|
||
except Exception as e:
|
||
photo_name = re.sub(r'\s', '', photo_name_list[0])
|
||
photo_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\:\-\*\<\>\-]', '',photo_name)
|
||
#print(photo_name)
|
||
photoid=i.attrib['data-album']
|
||
cls.aid = photoid
|
||
comicInfo.setValue1(cls.aid)
|
||
list_chapter_name.append(photo_name)
|
||
list_chapter_href.append(referer+i.attrib['href'])
|
||
list_chapter_update.append(photo_date[0])
|
||
|
||
baseComic.oneComic(url=c_url,title=title,author=author,
|
||
icon=icon,tags=tags,dep=dep,chapters=list_chapter_name,chapter_href=list_chapter_href,
|
||
alias=None) |