This commit is contained in:
caiwx86 2022-12-06 13:45:23 +08:00
parent a8148ac383
commit f948382084
6 changed files with 86 additions and 64 deletions

View File

@ -5,4 +5,5 @@ if __name__ == '__main__':
# os.environ["https_proxy"] = "http://127.0.0.1:7890"
# url = "https://rm01.xyz/books/f08668a4-0cbc-488e-95a7-3c71de0c7a31/1"
# comicEntity.comicChapter(url,scramble=True)
comicEntity.oneComic("https://rm01.xyz/books/47376792-2816-4ccf-a146-957b8d6a2ac6")
# comicEntity.oneComic("https://rm01.xyz/books/3104bc6f-bcf8-47a3-8383-2110f7fe6981")
comicEntity.downladsComcis("https://rm01.xyz/books?&page=0")

View File

@ -53,3 +53,4 @@ class CBZUtils:
chapter_path = comicInfo.getDirComicChapter()
packCBZ_path = comicInfo.getDirCBZComicChapter()
cls.zip_compression(chapter_path,packCBZ_path+".CBZ",type="delete")
comicInfo.nextCBZToDoneChapter()

View File

@ -91,3 +91,4 @@ class netUtils:
save_path = os.path.join(pathCBZComic,comicInfo.getChapter()+icon_su)
shutil.copy(pathComicIcon, save_path)
print(f"{pathComicIcon} 已复制至: {save_path}")
comicInfo.nextDownloadToCBZChapter()

View File

@ -4,4 +4,4 @@ class pathStr:
base_comic_out = "COMICOUT"
base_CBZ = os.path.join(base_comic_out,"CBZ")
base_comic_img = os.path.join(base_comic_out,"outputComic")
base_conf_path = ".conf"
base_conf_path = os.path.join(base_comic_out,".conf")

View File

@ -7,11 +7,9 @@ from __future__ import print_function
import shutil
import imghdr
import os,time
import os
import concurrent.futures
import requests
from utils.ImageUtils import imageUtils
from utils.HtmlUtils import htmlUtils
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
@ -33,28 +31,27 @@ def download_image(image_url, dst_dir, file_name, timeout=20, proxy_type=None, p
response = None
file_path = os.path.join(dst_dir, file_name)
temp_path = file_path+".downloads"
try_times = 0
while True:
temp_path = os.path.join(dst_dir, file_name+".downloads")
repair_count = 1
try:
try_times += 1
response = requests.get(
image_url, headers=htmlUtils.headers, timeout=timeout, proxies=proxies)
image_url, headers=headers, timeout=timeout, proxies=proxies)
while response.status_code != 200 and repair_count <= 5:
download_image(image_url,dst_dir,file_name)
print(f'重试:第{repair_count}{image_url}')
repair_count += 1
with open(temp_path, 'wb') as f:
f.write(response.content)
response.close()
shutil.move(temp_path, file_path)
print("## OK: {} {}".format(file_path, image_url))
except Exception as e:
if try_times < 10:
print(f"{try_times} try {file_path}")
continue
if response:
response.close()
print("## Fail: {} {}".format(image_url, e.args))
break
def download_images(image_urls, dst_dir, file_prefix="img", concurrency=3, timeout=20, proxy_type=None, proxy=None,scrambles=None):
def download_images(image_urls, dst_dir, file_prefix="img", concurrency=50, timeout=20, proxy_type=None, proxy=None,filesName=None):
"""
Download image according to given urls and automatically rename them in order.
:param timeout:
@ -66,22 +63,15 @@ def download_images(image_urls, dst_dir, file_prefix="img", concurrency=3, timeo
:param concurrency: number of requests process simultaneously
:return: none
"""
concurrency = len(image_urls)
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
future_list = list()
count = 1
count = 0
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
for image_url in image_urls:
img_prefix = "."+str(image_url).split(".")[-1]
file_name = ("{:0>3d}".format(count))+img_prefix
if scrambles[count -1]:
su = "."+str(image_url).split(".")[-1]
de_str = str(image_url).split("/")[-1].replace(su,"==")
blocks = imageUtils.encodeImage(de_str)
file_name = "scramble="+str(blocks)+"_"+file_name
file_name = filesName[count]
future_list.append(executor.submit(
download_image, image_url, dst_dir, file_name, timeout, proxy_type, proxy))
time.sleep(0.1)
count += 1
concurrent.futures.wait(future_list, timeout=180)

View File

@ -1,4 +1,4 @@
import json,os,time
import json,os,time,random
from utils.comic.ComicStr import comicStr
from utils.ComicUtils import comicUtils
from utils.FileUtils import fileUtils
@ -12,7 +12,33 @@ from utils.downloader import download_images
class comicEntity:
@classmethod
def oneComic(cls,c_url):
def downladsComcis(cls,url):
#漫画名
data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url)
# #
data = json.loads(data[0])
data = data.get("props")
x = data.get("pageProps")
books = x.get("books")
len_books = len(books)
baseUrl = comicInfo.getBaseUrl(url)
for x in range(0, len_books):
book = books[x]
#https://rm01.xyz/books/052426c3-ec7d-4035-b614-3c9290ee625b
book_id = book.get("id")
book_name = book.get("name")
comicHref = baseUrl+"/books/"+book_id
random_int = random.uniform(8,30)
wait = print(f"{random_int}秒后开始下载 漫画:{book_name}")
time.sleep(random_int)
cls.oneComic(comicHref, random.uniform(0,3))
print(books)
#for comicHref in comicsHref:
# cls.oneComic(comicHref,random.uniform(10,20))
@classmethod
def oneComic(cls,c_url,sleep=None):
#漫画名
title = htmlUtils.xpathData('//div[@class="col"]/h5/text()',url=c_url,num=0)
#别名
@ -46,8 +72,11 @@ class comicEntity:
chapter = chapters[count_chapter]
comicInfo.setChapterName(chapter)
if not comicInfo.nextExistsGetPath("done_"):
comicEntity.comicChapter(href,scramble=True,sleep=8)
comicEntity.comicChapter(href,scramble=True,sleep=random.uniform(8,20))
count_chapter += 1
#一本漫画下载后等待
if not sleep == None:
time.sleep(sleep)
'''
@ -55,32 +84,28 @@ class comicEntity:
'''
@classmethod
def comicChapter(cls,chapter_url,scramble=None,sleep=None):
cls.Onechapter(chapter_url,scramble,sleep)
cls.Onechapter(chapter_url,scramble)
#进入下个阶段
comicInfo.nextImgToDownloadChapter()
if comicInfo.nextExistsGetPath("down_"):
#章节图片全部下载后,调用下载封面
netUtils.downloadComicIcon()
#下个阶段
comicInfo.nextDownloadToCBZChapter()
if comicInfo.nextExistsGetPath("cbz_"):
#下载后自动打包
CBZUtils.packAutoComicChapterCBZ()
comicInfo.nextCBZToDoneChapter()
if not sleep == None:
print(f"{sleep} 秒后开始下载下一个章节")
time.sleep(sleep)
@classmethod
def Onechapter(cls,chapter_url,scramble=None,sleep=None):
def Onechapter(cls,chapter_url,scramble=None):
if not str(chapter_url).startswith("http"):
chapter_url = comicInfo.getBaseUrl() + chapter_url
chapter_dir = cls.comicChapterDownload(chapter_url)
if sleep == None:
print("not sleep")
#time.sleep(3)
else:
time.sleep(int(sleep))
cls.comicChapterDownload(chapter_url)
comicInfo.nextInfoToImgChapter()
#下载完成后, 开始解密图片
if scramble:
chapter_dir = comicInfo.getDirComicChapter()
dirs = os.listdir(chapter_dir)
for img in dirs:
isScramble = str(img).startswith("scramble=")
@ -88,7 +113,8 @@ class comicEntity:
c_path = os.path.join(chapter_dir, img)
imageUtils.getScrambleImage(c_path)
#进入下一阶段
comicInfo.nextInfoToImgChapter()
comicInfo.nextImgToDownloadChapter()
@classmethod
def comicChapterDownload(cls,chapter_url):
@ -129,20 +155,23 @@ class comicEntity:
tags = x.get("tags")
x = tags
print(x)
count_image = 1
count = 1
list_img = []
list_scramble = []
list_fileName = []
for image in images:
image_src = image.get("src")
scramble = image.get("scramble")
print("count=",count_image)
count_image = "{:0>3d}".format(count)
list_img.append(image_src)
list_scramble.append(scramble)
print(image_src)
print(scramble)
count_image+=1
print(count_image)
print(list_img)
print(totalChapter)
image_src_prefix = "."+str(image_src).split(".")[-1]
if scramble:
su = "."+str(image_src).split(".")[-1]
de_str = str(image_src).split("/")[-1].replace(su,"==")
blocks = imageUtils.encodeImage(de_str)
count_image = "scramble="+str(blocks)+"_"+count_image
list_fileName.append(count_image+image_src_prefix)
count+=1
print("count_all_img=", count)
#netUtils.downloadComicChapterImages(list_img,scrambles=list_scramble)
download_images(list_img,comicInfo.getDirComicChapter(),scrambles=list_scramble)
return download_images(list_img,comicInfo.getDirComicChapter(), filesName=list_fileName)