This commit is contained in:
caiwx86 2022-12-06 10:16:29 +08:00
parent 499cb29fa3
commit a8148ac383
11 changed files with 606 additions and 178 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
COMICOUT/
.conf/
**/__pycache__/**

76
main.py
View File

@ -1,72 +1,8 @@
from utils.HtmlUtils import htmlUtils
from utils.comic.ComicStr import comicStr
from utils.FileUtils import fileUtils
import json,os
from utils.NetUtils import netUtils
from utils.ImageUtils import imageUtils
from utils.ComicUtils import comicUtils
comic_chapter_path = "COMICOUT"
def oneComic(c_url):
global comic_chapter_path
# #漫画名
data = htmlUtils.xpathData(comicStr.result,
'//script[@id="__NEXT_DATA__"]/text()',url=c_url)
# #
data = json.loads(data[0])
data = data.get("props")
data = data.get("pageProps")
print(data)
fileUtils.saveConfComicChapterInfo("1話 親子餐廳的媽媽們",data,"親子餐廳的媽媽們")
x = fileUtils.getInfoConfComicChapter("1話 親子餐廳的媽媽們","親子餐廳的媽媽們")
bookName = x.get("bookName")
alias = x.get("alias")
chapterName = x.get("chapterName")
description = x.get("description")
images = x.get("images")
chapterAPIPath = x.get("chapterAPIPath")
print(chapterAPIPath)
if not chapterAPIPath == None:
base_url = comicUtils.getBaseUrl(c_url)
chapterAPIUrl = base_url+chapterAPIPath
data = htmlUtils.getJSON(chapterAPIUrl)
data = data.get("chapter")
chapterName = data.get("name")
images = data.get("images")
print("test")
if images == None:
print("fsf")
totalChapter = x.get("totalChapter")
tags = x.get("tags")
x = tags
print(x)
count_image = 1
list_img = []
list_scramble = []
for image in images:
image_src = image.get("src")
scramble = image.get("scramble")
print("count=",count_image)
list_img.append(image_src)
list_scramble.append(scramble)
print(image_src)
print(scramble)
count_image+=1
print(count_image)
print(list_img)
print(totalChapter)
netUtils.downloadComicChapterImages(bookName,chapterName,list_img,scrambles=list_scramble)
comic_chapter_path = os.path.join("COMICOUT",bookName,chapterName)
from utils.entity.RouMan import comicEntity
if __name__ == '__main__':
oneComic("https://rm01.xyz/books/f08668a4-0cbc-488e-95a7-3c71de0c7a31/23")
# oneComic()
# path = "COMICOUT\好友的私生活\第1話 好友的私生活"
path = comic_chapter_path
dirs = os.listdir(path)
for dir in dirs:
isScramble = str(dir).startswith("scramble=")
if isScramble:
c_path = os.path.join(path, dir)
imageUtils.getScrambleImage(c_path)
# os.environ["http_proxy"] = "http://127.0.0.1:7890"
# os.environ["https_proxy"] = "http://127.0.0.1:7890"
# url = "https://rm01.xyz/books/f08668a4-0cbc-488e-95a7-3c71de0c7a31/1"
# comicEntity.comicChapter(url,scramble=True)
comicEntity.oneComic("https://rm01.xyz/books/47376792-2816-4ccf-a146-957b8d6a2ac6")

55
utils/CBZUtils.py Normal file
View File

@ -0,0 +1,55 @@
import os
from pathlib import Path
from zipfile import ZipFile
import shutil
from utils.comic.ComicInfo import comicInfo
class CBZUtils:
@classmethod
def readDirsOrFiles(cls,dir,type):
data = []
files = os.listdir(dir)
for file in files:
path = os.path.join(dir,file)
if type == "files" and os.path.isfile(path):
data.append(path)
if type == "dirs" and os.path.isdir(path):
data.append(path)
return data
@classmethod
def zip_compression(cls,source_dir, target_file,type=None):
msg = {}
target_dir = os.path.dirname(target_file)
if not os.path.exists(target_dir):
os.makedirs(target_dir)
if not os.path.exists(target_file):
with ZipFile(target_file, mode='w') as zf:
for path, dir_names, filenames in os.walk(source_dir):
path = Path(path)
arc_dir = path.relative_to(source_dir)
y = 0
for filename in filenames:
y = y + 1
print("打包中:" + str(y) + "/" + str(len(filenames)), os.path.join(source_dir, filename))
zf.write(path.joinpath(filename), arc_dir.joinpath(filename))
zf.close()
print("打包完成:", target_file)
#md5_file = md5(target_file)
#print("md5:", md5_file)
#msg[target_file] = md5_file
return msg
else:
print("文件已存在:", target_file)
if type == "delete":
shutil.rmtree(source_dir)
print(f"已删除 source_dir: {source_dir}")
@classmethod
def packAutoComicChapterCBZ(cls):
chapter_path = comicInfo.getDirComicChapter()
packCBZ_path = comicInfo.getDirCBZComicChapter()
cls.zip_compression(chapter_path,packCBZ_path+".CBZ",type="delete")

View File

@ -1,10 +1,8 @@
import json
import requests,os
from utils.comic.PathStr import pathStr
class fileUtils:
base_path = "COMIC_OUT"
conf_path = os.path.join(base_path,".conf")
comic_path = os.path.join(base_path,".conf","comic")
comic_name = ""
#文件保存
@ -47,8 +45,8 @@ class fileUtils:
def getPathConfComicChapterInfo(cls,chapter,comic_name=None):
if comic_name == None:
comic_name = cls.comic_name
return os.path.join(cls.comic_path,comic_name,"info_"+chapter)
return os.path.join(pathStr.base_conf_path,comic_name,"info_"+chapter)
@classmethod
def getInfoConfComicChapter(cls,chapter,comic_name=None):
data = None
@ -73,5 +71,5 @@ class fileUtils:
'''
@classmethod
def get_utl_save_comic(cls,name):
file = os.path.join(cls.comic_path,name)
file = os.path.join(pathStr.base_comic_img,name)
return file

View File

@ -2,11 +2,12 @@ from fake_useragent import UserAgent
import requests
from lxml import html
from utils.comic.ComicStr import comicStr
import os
class htmlUtils:
headers = {'User-Agent': UserAgent().random}
url_data = {}
@classmethod
def getHTML(cls, curl):
rstr = r"[\/\\\:\*\?\"\<\>\|\.]" #  '/ \ : * ? " < > |'
@ -21,7 +22,11 @@ class htmlUtils:
data = { curl : url_text}
cls.url_data.update(data)
return url_text
@classmethod
def getBytes(cls, url):
return requests.get(url, headers=cls.headers,stream=True)
@classmethod
def getJSON(cls,curl):
res = requests.get(curl, headers=cls.headers)
@ -29,7 +34,7 @@ class htmlUtils:
return data_json
@classmethod
def xpathData(cls,c_title, c_xpath,url=None,num=None,type=None,not_eq=None):
def xpathData(cls,c_xpath,url=None,num=None,not_eq=None):
if url == None:
url = cls.temp_url
else:
@ -44,11 +49,6 @@ class htmlUtils:
if not x == not_eq:
result.append(x)
count +=1
data = {c_title : result}
if not num == None:
data = {c_title : result[num]}
if not type == None:
data = { type : result }
if c_title == comicStr.result:
data = result
return data
result = result[num]
return result

View File

@ -22,6 +22,9 @@ class imageUtils:
@classmethod
def scrambleImage(cls,file_path):
if str(file_path).endswith(".downloads"):
os.remove(file_path)
return None
file_str = str(file_path).split("=")
#10_29.jpg
baseDir = file_str[0].replace("scramble","")
@ -45,7 +48,9 @@ class imageUtils:
blockWidth = int(width / blocks)
print("blockHeight=",blockHeight)
su = str(file_path).split(".")[-1]
split_path = os.path(baseDir,save_name_delesu+"split")
split_path = os.path.join(baseDir,save_name_delesu+"split")
if image_su == "downloads":
return None
cls.splitimage(file_path,blocks,1,split_path)
cls.image_compose(split_path+"/",blocks,1,save_file_path,blockHeight,width)
#完成后清空

View File

@ -1,10 +1,12 @@
import os.path,shutil
import requests
from concurrent.futures import ThreadPoolExecutor,wait,ALL_COMPLETED
import time
import time,random
import imghdr
from utils.comic.PathStr import pathStr
from utils.ImageUtils import imageUtils
from utils.comic.ComicInfo import comicInfo
from utils.HtmlUtils import htmlUtils
from utils.downloader import download_image
class netUtils:
@ -23,9 +25,9 @@ class netUtils:
os.remove(tmp_file)
print("存在缓存文件,已删除:",tmp_file)
repair_count = 1
res = requests.get(url, stream=True)
res = htmlUtils.getBytes(url)
while res.status_code != 200 and repair_count <= 5:
res = requests.get(url, stream=True)
res = htmlUtils.getBytes(url)
print(f'重试:第{repair_count}{url}')
repair_count += 1
#判断是否为图片
@ -35,23 +37,26 @@ class netUtils:
basedir= os.path.dirname(path)
if not os.path.exists(basedir):
os.makedirs(basedir)
#expected_length = res.headers.get('Content-Length')
#actual_length = res.raw.tell()
with open(tmp_file, 'wb') as f:
for ch in res:
f.write(ch)
f.close()
shutil.move(tmp_file, path)
print(f"url={url} 保存至:{path}")
return path
for ch in res:
f.write(ch)
f.close()
shutil.move(tmp_file, path)
print(f"url={url} 保存至:{path}")
return path
@classmethod
def threadDownload(cls,url,path,fileType=None):
executor = ThreadPoolExecutor(max_workers=3)
tasks = executor.submit(cls.download, url,path,fileType)
time.sleep(random.uniform(0,1))
#wait(tasks, return_when=ALL_COMPLETED)
@classmethod
def downloadComicChapterImages(cls,comic_name, chapter_name, imgs,scrambles=None):
file_path = os.path.join(pathStr.base_comic_out, comic_name, chapter_name)
def downloadComicChapterImages(cls, imgs,scrambles=None):
file_path = comicInfo.getDirComicChapter()
print("files=",file_path)
count_img = 1
@ -65,22 +70,24 @@ class netUtils:
blockInt = imageUtils.encodeImage(de_str)
save_file_path = os.path.join(file_path,"scramble="+str(blockInt)+"_"+file_name)
cls.threadDownload(img, save_file_path, fileType="image")
time.sleep(0.1)
count_img += 1
return os.path.dirname(save_file_path)
@classmethod
def downloadComicIcon(cls,comic_name,chapter,img):
file_su = os.path.splitext(img)[-1]
icon_name = "cover"+ file_su
save_file_path = os.path.join(cls.save_comic_img_basePath,comic_name,icon_name)
if os.path.exists(save_file_path):
"已存在,跳过下载"
def downloadComicIcon(cls):
icon_url = comicInfo.getIcon()
if icon_url == None:
print("icon 不存在,已跳过")
return None
else:
cls.download(img,save_file_path,fileType="image")
target_dir = os.path.join(cls.CBZ_path,comic_name)
target_file = os.path.join(target_dir,chapter+file_su)
if not os.path.exists(target_dir):
os.makedirs(target_dir)
shutil.copy(save_file_path,target_file)
save_name = "cover"
icon_su = "."+str(icon_url).split(".")[-1]
#判断漫画名路径是否已存在comicname/cover.jpg, 存在跳过
pathComicIcon = os.path.join(comicInfo.getDirConfComic(),save_name+icon_su)
if not os.path.exists(pathComicIcon):
cls.download(icon_url, pathComicIcon)
pathCBZComic = comicInfo.getDirCBZComic()
if not os.path.exists(pathCBZComic):
os.makedirs(pathCBZComic)
save_path = os.path.join(pathCBZComic,comicInfo.getChapter()+icon_su)
shutil.copy(pathComicIcon, save_path)
print(f"{pathComicIcon} 已复制至: {save_path}")

View File

@ -1,37 +1,290 @@
from xml.dom.minidom import Document
import os,re
from utils.comic.PathStr import pathStr
import json,shutil
document = Document()
class comicInfoXmlNode():
class comicInfo():
document = Document()
pathComicInfo = None
root = "ComicInfo"
chapter = "Title"
comic_name = "Series"
dep = "Summary"
author = "Writer"
tags = "Genre"
tag = "Genre"
cbs = "Publisher"
lang = "LanguageISO"
comic_names = "SeriesGroup"
tags = "Tag"
str_comicName = None
str_chapter = None
str_icon = None
str_homePage = None
chapter_node = None
comicName_node = None
dep_node = None
author_node = None
tag_node = None
cbs_node = None
lang_node = None
comicNames_node = None
tags_node = None
@classmethod
def setNodeAndValue(cls,node,value):
node = document.createElement(node)
node_text = document.createTextNode(value)
node.appendChild(node_text)
if not value == None:
c_node = cls.document.createElement(node)
node_text = cls.document.createTextNode(value)
c_node.appendChild(node_text)
return c_node
return None
@classmethod
def Root(cls):
return cls.document.createElement(cls.root)
@classmethod
def setChapter(cls,value):
cls.setNodeAndValue(cls.chapter,value)
def setChapterName(cls,value):
cls.str_chapter = cls.fixFileName(value)
cls.chapter_node = cls.setNodeAndValue(cls.chapter,value)
@classmethod
def setComicName(cls,value):
cls.setNodeAndValue(cls.comic_name, value)
cls.str_comicName = cls.fixFileName(value)
cls.comicName_node = cls.setNodeAndValue(cls.comic_name, value)
@classmethod
def getComicInfoXML(cls):
cls.setChapter()
class comicInfo():
def setComicNames(cls,value):
cls.comicNames_node = cls.setNodeAndValue(cls.comic_names,value)
#输出xml
@classmethod
def writeComicInfoXML(cls,c_title,chapter,dep,author,tags="韩漫",c_publisher="韩漫",language="zh"):
#file 预期 CBZ/"comic_name"/chapter
print()
@classmethod
def setDep(cls,value):
cls.dep_node = cls.setNodeAndValue(cls.dep, value)
@classmethod
def setAuthor(cls,value):
cls.author_node = cls.setNodeAndValue(cls.author,value)
@classmethod
def setLang(cls,value):
cls.lang_node = cls.setNodeAndValue(cls.lang, value)
@classmethod
def setTag(cls,value):
cls.tag_node = cls.setNodeAndValue(cls.tag, value)
@classmethod
def setTags(cls,value):
cls.tags_node = cls.setNodeAndValue(cls.tags,value)
@classmethod
def setCBS(cls,value):
cls.cbs_node = cls.setNodeAndValue(cls.cbs,value)
@classmethod
def setIcon(cls,value):
cls.str_icon = value
return cls.str_icon
@classmethod
def setHomePage(cls, value):
cls.str_homePage = value
@classmethod
def getHomePage(cls):
return cls.str_homePage
'''
获取网站主页
'''
@classmethod
def getBaseUrl(cls,url=None):
if url == None:
url = cls.str_homePage
num = 3
index = 0
for x in range(0, num):
index = str(url).find("/",index)+1
return url[0:index-1]
@classmethod
def getIcon(cls):
return cls.str_icon
@classmethod
def getComicName(cls):
return cls.str_comicName
@classmethod
def getChapter(cls):
return cls.str_chapter
@classmethod
def fixFileName(cls,filename,replace=None):
intab = r'[?*/\|.:><]'
str_replace = ""
if not replace == None:
str_replace = replace
filename = re.sub(intab, str_replace, filename)
return filename
@classmethod
def getDirConfComic(cls):
if not cls.str_comicName == None:
return os.path.join(pathStr.base_conf_path, cls.str_comicName)
else:
print("comicName不存在退出中")
exit()
@classmethod
def getDirCBZComic(cls):
if not cls.str_comicName == None:
return os.path.join(pathStr.base_CBZ, cls.str_comicName)
else:
print("comicName不存在退出中")
exit()
@classmethod
def getDirCBZComicChapter(cls):
if not cls.str_comicName == None and not cls.str_chapter == None:
return os.path.join(pathStr.base_CBZ,cls.str_comicName,cls.str_chapter)
else:
print("comicName不存在退出中")
exit()
@classmethod
def getDirComic(cls):
if not cls.str_comicName == None:
return os.path.join(pathStr.base_comic_img, cls.str_comicName)
else:
print("comicName不存在退出中")
exit()
@classmethod
def getDirComicChapter(cls):
if not cls.str_comicName == None and not cls.str_chapter == None:
return os.path.join(pathStr.base_comic_img,cls.str_comicName,cls.str_chapter)
else:
print("comicName与chapter 不存在,退出中")
exit()
@classmethod
def getPathComicInfoXML(cls):
try:
cls.pathComicInfo = os.path.join(pathStr.base_comic_img,cls.str_comicName,cls.str_chapter,"ComicInfo.xml")
except:
return None
return cls.pathComicInfo
@classmethod
def writeComicInfoXML(cls,chapter):
if cls.chapter == cls.fixFileName(chapter):
wait = input(f"cls.chapter {cls.chapter} 与 chapter {chapter} 不相等是否继续y/n ")
if wait == "y":
print("继续中...")
else:
print("用户操作取消...")
exit()
root = cls.Root()
newDocument = Document()
newDocument.appendChild(root)
if not cls.chapter_node == None:
root.appendChild(cls.chapter_node)
if not cls.comicName_node == None:
root.appendChild(cls.comicName_node)
if not cls.dep_node == None:
root.appendChild(cls.dep_node)
if not cls.author_node == None:
root.appendChild(cls.author_node)
if not cls.tag_node == None:
root.appendChild(cls.tag_node)
if not cls.cbs_node == None:
root.appendChild(cls.cbs_node)
if not cls.lang_node == None:
root.appendChild(cls.lang_node)
if not cls.comicNames_node == None:
root.appendChild(cls.comicNames_node)
if not cls.tags_node == None:
root.appendChild(cls.tags_node)
cls.getPathComicInfoXML()
base_dir = os.path.dirname(cls.pathComicInfo)
if not os.path.exists(base_dir):
os.makedirs(base_dir)
with open(cls.pathComicInfo , "w", encoding="utf-8") as fo:
newDocument.writexml(fo, indent='', addindent='\t', newl='\n', encoding="utf-8")
fo.close()
print("ComicInfo.xml 已生成 pathd=", cls.pathComicInfo)
#文件保存
@classmethod
def file_save(cls,path,data,mode=None):
result = {}
f = {}
dir_name = os.path.dirname(path)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
save_path = os.path.join(path)
data = json.dumps(data)
if mode == None:
mode = "w+"
try:
f = open(save_path, mode, encoding="utf-8")
f.write(data)
f.close()
print("data=",data)
result = path + "文件写入成功"
except:
result = path + "文件写入失败"
return result
@classmethod
def nextSavePath(cls,next,data=None):
save_path = cls.getDirConfComic()+"/"+next+cls.str_chapter
if not data == None:
cls.file_save(save_path, data)
return save_path
@classmethod
def nextSaveInfoChapter(cls,chapter,data):
if cls.str_chapter == chapter:
cls.nextSavePath("info_",data)
else:
wait = input(f"chapter {cls.str_chapter}{chapter} 不一致,是否继续 y/n")
if wait == "y":
print("继续中")
else:
exit()
@classmethod
def nextInfoToImgChapter(cls):
info = cls.nextSavePath("info_")
img = cls.nextSavePath("img_")
shutil.move(info,img)
@classmethod
def nextImgToDownloadChapter(cls):
img = cls.nextSavePath("img_")
download = cls.nextSavePath("down_")
shutil.move(img, download)
@classmethod
def nextDownloadToCBZChapter(cls):
download = cls.nextSavePath("down_")
cbz = cls.nextSavePath("cbz_")
shutil.move(download, cbz)
@classmethod
def nextCBZToDoneChapter(cls):
cbz = cls.nextSavePath("cbz_")
done = cls.nextSavePath("done_")
shutil.move(cbz,done)
@classmethod
def nextExistsGetPath(cls,msg):
path = cls.nextSavePath(msg)
return os.path.exists(path)

View File

@ -3,4 +3,5 @@ import os
class pathStr:
base_comic_out = "COMICOUT"
base_CBZ = os.path.join(base_comic_out,"CBZ")
base_comic_img = os.path.join(base_comic_out,"outputComic")
base_comic_img = os.path.join(base_comic_out,"outputComic")
base_conf_path = ".conf"

87
utils/downloader.py Normal file
View File

@ -0,0 +1,87 @@
""" Download image according to given urls and automatically rename them in order. """
# -*- coding: utf-8 -*-
# author: Yabin Zheng
# Email: sczhengyabin@hotmail.com
from __future__ import print_function
import shutil
import imghdr
import os,time
import concurrent.futures
import requests
from utils.ImageUtils import imageUtils
from utils.HtmlUtils import htmlUtils
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Proxy-Connection": "keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36",
"Accept-Encoding": "gzip, deflate, sdch",
# 'Connection': 'close',
}
def download_image(image_url, dst_dir, file_name, timeout=20, proxy_type=None, proxy=None):
proxies = None
if proxy_type is not None:
proxies = {
"http": proxy_type + "://" + proxy,
"https": proxy_type + "://" + proxy
}
response = None
file_path = os.path.join(dst_dir, file_name)
temp_path = file_path+".downloads"
try_times = 0
while True:
try:
try_times += 1
response = requests.get(
image_url, headers=htmlUtils.headers, timeout=timeout, proxies=proxies)
with open(temp_path, 'wb') as f:
f.write(response.content)
response.close()
shutil.move(temp_path,file_path)
except Exception as e:
if try_times < 10:
print(f"{try_times} try {file_path}")
continue
if response:
response.close()
print("## Fail: {} {}".format(image_url, e.args))
break
def download_images(image_urls, dst_dir, file_prefix="img", concurrency=3, timeout=20, proxy_type=None, proxy=None,scrambles=None):
"""
Download image according to given urls and automatically rename them in order.
:param timeout:
:param proxy:
:param proxy_type:
:param image_urls: list of image urls
:param dst_dir: output the downloaded images to dst_dir
:param file_prefix: if set to "img", files will be in format "img_xxx.jpg"
:param concurrency: number of requests process simultaneously
:return: none
"""
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
future_list = list()
count = 1
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
for image_url in image_urls:
img_prefix = "."+str(image_url).split(".")[-1]
file_name = ("{:0>3d}".format(count))+img_prefix
if scrambles[count -1]:
su = "."+str(image_url).split(".")[-1]
de_str = str(image_url).split("/")[-1].replace(su,"==")
blocks = imageUtils.encodeImage(de_str)
file_name = "scramble="+str(blocks)+"_"+file_name
future_list.append(executor.submit(
download_image, image_url, dst_dir, file_name, timeout, proxy_type, proxy))
time.sleep(0.1)
count += 1
concurrent.futures.wait(future_list, timeout=180)

View File

@ -1,63 +1,148 @@
import json,os
import json,os,time
from utils.comic.ComicStr import comicStr
from utils.ComicUtils import comicUtils
from utils.FileUtils import fileUtils
class comicEntityRM:
from utils.comic.PathStr import pathStr
from utils.NetUtils import netUtils
from utils.HtmlUtils import htmlUtils
from utils.ImageUtils import imageUtils
from utils.comic.ComicInfo import comicInfo
from utils.CBZUtils import CBZUtils
from utils.downloader import download_images
class comicEntity:
@classmethod
def oneComic(cls,c_url):
#漫画名
title = comicUtils.xpathData(comicStr.title,
'//div[@class="col"]/h5/text()',url=c_url,num=0)
title = htmlUtils.xpathData('//div[@class="col"]/h5/text()',url=c_url,num=0)
#别名
alias = comicUtils.xpathData(comicStr.alias,
'//span[contains(@class,"bookid_alias")]/text()',num=1)
icon = comicUtils.xpathData(comicStr.icon,
'//img[@class="img-thumbnail"]/@src')
alias = htmlUtils.xpathData('//span[contains(@class,"bookid_alias")]/text()',num=1)
icon = htmlUtils.xpathData('//img[@class="img-thumbnail"]/@src',num=0)
author = comicUtils.xpathData(comicStr.author,
'//div[contains(@class,"bookid_bookInfo")]/p[1]/text()',num=1)
tags = comicUtils.xpathData(comicStr.tags,
'//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()')
action = comicUtils.xpathData(comicStr.action,
'//div[contains(@class,"bookid_bookInfo")]/p[2]/text()',num=1)
dep = comicUtils.xpathData(comicStr.dep,
'//div[contains(@class,"bookid_bookInfo")]/p[4]/text()',num=1)
update_date = comicUtils.xpathData(comicStr.last_update,
'//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()',num=1)
chapters = comicUtils.xpathData(comicStr.chapters,
'//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()')
chapter_href = comicUtils.xpathData(comicStr.chapter_href,
'//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/@href')
comicUtils.setComic(title,alias,icon,author,tags,action,dep,update_date,chapters,chapter_href)
author = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[1]/text()',num=1)
tags = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()',num=0)
action = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[2]/text()',num=1)
dep = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[4]/text()',num=1)
update_date = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()',num=1)
chapters = htmlUtils.xpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()')
chapter_href = htmlUtils.xpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/@href')
#{'title': ['社區重建協會']}
#主页
#homepage = {comicStr.homepage : [c_url] }
#图片
#comicUtils.setComic(titles,homepage,icons,authors,deps,chapters, chapter_hrefs,last_update)
comicData = comicUtils.getComic()
print(comicData)
wait = input("数据暂停查看y/n")
if not wait == "y":
exit()
return comicData
author = str(author).replace("&",",").replace(" ",",")
comicInfo.setHomePage(c_url)
comicInfo.setComicName(str(title))
comicInfo.setComicNames(title+","+alias)
comicInfo.setAuthor(author)
comicInfo.setIcon(icon)
comicInfo.setTag(tags)
comicInfo.setTags(tags)
comicInfo.setDep(dep)
comicInfo.setCBS("韩漫")
comicInfo.setLang("zh")
comicInfo.setComicNames(title+","+alias)
#comicUtils.setComic(title,alias,icon,author,tags,action,dep,update_date,chapters,chapter_href)
count_chapter = 0
for href in chapter_href:
chapter = chapters[count_chapter]
comicInfo.setChapterName(chapter)
if not comicInfo.nextExistsGetPath("done_"):
comicEntity.comicChapter(href,scramble=True,sleep=8)
count_chapter += 1
'''
'''
读取某章节下所有图片
'''
@classmethod
def comicChapter(cls,c_url,chapter):
xpath_str = '//img[contains(@class,"id_comicImage")]/@src'
not_eq = "/loading.jpg"
#章节下所有图片链接
list_img = comicUtils.xpathData(comicStr.list_img,
xpath_str,url=c_url,type=chapter,not_eq=not_eq)
def comicChapter(cls,chapter_url,scramble=None,sleep=None):
cls.Onechapter(chapter_url,scramble,sleep)
#进入下个阶段
comicInfo.nextImgToDownloadChapter()
if comicInfo.nextExistsGetPath("down_"):
#章节图片全部下载后,调用下载封面
netUtils.downloadComicIcon()
#下个阶段
comicInfo.nextDownloadToCBZChapter()
if comicInfo.nextExistsGetPath("cbz_"):
#下载后自动打包
CBZUtils.packAutoComicChapterCBZ()
comicInfo.nextCBZToDoneChapter()
@classmethod
def Onechapter(cls,chapter_url,scramble=None,sleep=None):
if not str(chapter_url).startswith("http"):
chapter_url = comicInfo.getBaseUrl() + chapter_url
chapter_dir = cls.comicChapterDownload(chapter_url)
if sleep == None:
print("not sleep")
#time.sleep(3)
else:
time.sleep(int(sleep))
#下载完成后, 开始解密图片
if scramble:
dirs = os.listdir(chapter_dir)
for img in dirs:
isScramble = str(img).startswith("scramble=")
if isScramble:
c_path = os.path.join(chapter_dir, img)
imageUtils.getScrambleImage(c_path)
#进入下一阶段
comicInfo.nextInfoToImgChapter()
@classmethod
def comicChapterDownload(cls,chapter_url):
#漫画名
data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=chapter_url)
# #
data = json.loads(data[0])
data = data.get("props")
x = data.get("pageProps")
bookName = x.get("bookName")
chapterName = x.get("chapterName")
#fileUtils.saveConfComicChapterInfo(chapterName,x,bookName)
#if comicInfo.nextExistsGetPath("info_"):
# print(f"{bookName} {chapterName} info文件已存在跳过")
comicInfo.nextSaveInfoChapter(chapterName,x)
alias = x.get("alias")
description = x.get("description")
images = x.get("images")
chapterAPIPath = x.get("chapterAPIPath")
comicInfo.setComicName(bookName)
comicInfo.setChapterName(chapterName)
comicInfo.setDep(description)
pathComicInfo = comicInfo.getPathComicInfoXML()
if not os.path.exists(pathComicInfo):
print("不存在ComicInfo.xml 生成中...")
comicInfo.writeComicInfoXML(chapterName)
if not chapterAPIPath == None:
base_url = comicUtils.getBaseUrl(chapter_url)
chapterAPIUrl = base_url+chapterAPIPath
data = htmlUtils.getJSON(chapterAPIUrl)
data = data.get("chapter")
chapterName = data.get("name")
images = data.get("images")
if images == None:
print(f"未获取到章节图像 comic_name={bookName} chapter={chapterName}")
totalChapter = x.get("totalChapter")
tags = x.get("tags")
x = tags
print(x)
count_image = 1
list_img = []
list_scramble = []
for image in images:
image_src = image.get("src")
scramble = image.get("scramble")
print("count=",count_image)
list_img.append(image_src)
list_scramble.append(scramble)
print(image_src)
print(scramble)
count_image+=1
print(count_image)
print(list_img)
wait = input("暂停查看数据y/n")
if not wait == "y":
exit()
return list_img
print(totalChapter)
#netUtils.downloadComicChapterImages(list_img,scrambles=list_scramble)
download_images(list_img,comicInfo.getDirComicChapter(),scrambles=list_scramble)