This commit is contained in:
caiwx86 2022-12-06 13:45:23 +08:00
parent a8148ac383
commit f948382084
6 changed files with 86 additions and 64 deletions

View File

@ -5,4 +5,5 @@ if __name__ == '__main__':
# os.environ["https_proxy"] = "http://127.0.0.1:7890" # os.environ["https_proxy"] = "http://127.0.0.1:7890"
# url = "https://rm01.xyz/books/f08668a4-0cbc-488e-95a7-3c71de0c7a31/1" # url = "https://rm01.xyz/books/f08668a4-0cbc-488e-95a7-3c71de0c7a31/1"
# comicEntity.comicChapter(url,scramble=True) # comicEntity.comicChapter(url,scramble=True)
comicEntity.oneComic("https://rm01.xyz/books/47376792-2816-4ccf-a146-957b8d6a2ac6") # comicEntity.oneComic("https://rm01.xyz/books/3104bc6f-bcf8-47a3-8383-2110f7fe6981")
comicEntity.downladsComcis("https://rm01.xyz/books?&page=0")

View File

@ -52,4 +52,5 @@ class CBZUtils:
def packAutoComicChapterCBZ(cls): def packAutoComicChapterCBZ(cls):
chapter_path = comicInfo.getDirComicChapter() chapter_path = comicInfo.getDirComicChapter()
packCBZ_path = comicInfo.getDirCBZComicChapter() packCBZ_path = comicInfo.getDirCBZComicChapter()
cls.zip_compression(chapter_path,packCBZ_path+".CBZ",type="delete") cls.zip_compression(chapter_path,packCBZ_path+".CBZ",type="delete")
comicInfo.nextCBZToDoneChapter()

View File

@ -90,4 +90,5 @@ class netUtils:
os.makedirs(pathCBZComic) os.makedirs(pathCBZComic)
save_path = os.path.join(pathCBZComic,comicInfo.getChapter()+icon_su) save_path = os.path.join(pathCBZComic,comicInfo.getChapter()+icon_su)
shutil.copy(pathComicIcon, save_path) shutil.copy(pathComicIcon, save_path)
print(f"{pathComicIcon} 已复制至: {save_path}") print(f"{pathComicIcon} 已复制至: {save_path}")
comicInfo.nextDownloadToCBZChapter()

View File

@ -4,4 +4,4 @@ class pathStr:
base_comic_out = "COMICOUT" base_comic_out = "COMICOUT"
base_CBZ = os.path.join(base_comic_out,"CBZ") base_CBZ = os.path.join(base_comic_out,"CBZ")
base_comic_img = os.path.join(base_comic_out,"outputComic") base_comic_img = os.path.join(base_comic_out,"outputComic")
base_conf_path = ".conf" base_conf_path = os.path.join(base_comic_out,".conf")

View File

@ -7,11 +7,9 @@ from __future__ import print_function
import shutil import shutil
import imghdr import imghdr
import os,time import os
import concurrent.futures import concurrent.futures
import requests import requests
from utils.ImageUtils import imageUtils
from utils.HtmlUtils import htmlUtils
headers = { headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
@ -33,28 +31,27 @@ def download_image(image_url, dst_dir, file_name, timeout=20, proxy_type=None, p
response = None response = None
file_path = os.path.join(dst_dir, file_name) file_path = os.path.join(dst_dir, file_name)
temp_path = file_path+".downloads" temp_path = os.path.join(dst_dir, file_name+".downloads")
try_times = 0 repair_count = 1
while True: try:
try: response = requests.get(
try_times += 1 image_url, headers=headers, timeout=timeout, proxies=proxies)
response = requests.get( while response.status_code != 200 and repair_count <= 5:
image_url, headers=htmlUtils.headers, timeout=timeout, proxies=proxies) download_image(image_url,dst_dir,file_name)
with open(temp_path, 'wb') as f: print(f'重试:第{repair_count}{image_url}')
f.write(response.content) repair_count += 1
response.close() with open(temp_path, 'wb') as f:
shutil.move(temp_path,file_path) f.write(response.content)
except Exception as e: response.close()
if try_times < 10: shutil.move(temp_path, file_path)
print(f"{try_times} try {file_path}") print("## OK: {} {}".format(file_path, image_url))
continue except Exception as e:
if response: if response:
response.close() response.close()
print("## Fail: {} {}".format(image_url, e.args)) print("## Fail: {} {}".format(image_url, e.args))
break
def download_images(image_urls, dst_dir, file_prefix="img", concurrency=3, timeout=20, proxy_type=None, proxy=None,scrambles=None): def download_images(image_urls, dst_dir, file_prefix="img", concurrency=50, timeout=20, proxy_type=None, proxy=None,filesName=None):
""" """
Download image according to given urls and automatically rename them in order. Download image according to given urls and automatically rename them in order.
:param timeout: :param timeout:
@ -66,22 +63,15 @@ def download_images(image_urls, dst_dir, file_prefix="img", concurrency=3, timeo
:param concurrency: number of requests process simultaneously :param concurrency: number of requests process simultaneously
:return: none :return: none
""" """
concurrency = len(image_urls)
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
future_list = list() future_list = list()
count = 1 count = 0
if not os.path.exists(dst_dir): if not os.path.exists(dst_dir):
os.makedirs(dst_dir) os.makedirs(dst_dir)
for image_url in image_urls: for image_url in image_urls:
img_prefix = "."+str(image_url).split(".")[-1] file_name = filesName[count]
file_name = ("{:0>3d}".format(count))+img_prefix
if scrambles[count -1]:
su = "."+str(image_url).split(".")[-1]
de_str = str(image_url).split("/")[-1].replace(su,"==")
blocks = imageUtils.encodeImage(de_str)
file_name = "scramble="+str(blocks)+"_"+file_name
future_list.append(executor.submit( future_list.append(executor.submit(
download_image, image_url, dst_dir, file_name, timeout, proxy_type, proxy)) download_image, image_url, dst_dir, file_name, timeout, proxy_type, proxy))
time.sleep(0.1)
count += 1 count += 1
concurrent.futures.wait(future_list, timeout=180) concurrent.futures.wait(future_list, timeout=180)

View File

@ -1,4 +1,4 @@
import json,os,time import json,os,time,random
from utils.comic.ComicStr import comicStr from utils.comic.ComicStr import comicStr
from utils.ComicUtils import comicUtils from utils.ComicUtils import comicUtils
from utils.FileUtils import fileUtils from utils.FileUtils import fileUtils
@ -12,7 +12,33 @@ from utils.downloader import download_images
class comicEntity: class comicEntity:
@classmethod @classmethod
def oneComic(cls,c_url): def downladsComcis(cls,url):
#漫画名
data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url)
# #
data = json.loads(data[0])
data = data.get("props")
x = data.get("pageProps")
books = x.get("books")
len_books = len(books)
baseUrl = comicInfo.getBaseUrl(url)
for x in range(0, len_books):
book = books[x]
#https://rm01.xyz/books/052426c3-ec7d-4035-b614-3c9290ee625b
book_id = book.get("id")
book_name = book.get("name")
comicHref = baseUrl+"/books/"+book_id
random_int = random.uniform(8,30)
wait = print(f"{random_int}秒后开始下载 漫画:{book_name}")
time.sleep(random_int)
cls.oneComic(comicHref, random.uniform(0,3))
print(books)
#for comicHref in comicsHref:
# cls.oneComic(comicHref,random.uniform(10,20))
@classmethod
def oneComic(cls,c_url,sleep=None):
#漫画名 #漫画名
title = htmlUtils.xpathData('//div[@class="col"]/h5/text()',url=c_url,num=0) title = htmlUtils.xpathData('//div[@class="col"]/h5/text()',url=c_url,num=0)
#别名 #别名
@ -46,41 +72,40 @@ class comicEntity:
chapter = chapters[count_chapter] chapter = chapters[count_chapter]
comicInfo.setChapterName(chapter) comicInfo.setChapterName(chapter)
if not comicInfo.nextExistsGetPath("done_"): if not comicInfo.nextExistsGetPath("done_"):
comicEntity.comicChapter(href,scramble=True,sleep=8) comicEntity.comicChapter(href,scramble=True,sleep=random.uniform(8,20))
count_chapter += 1 count_chapter += 1
#一本漫画下载后等待
if not sleep == None:
time.sleep(sleep)
''' '''
读取某章节下所有图片 读取某章节下所有图片
''' '''
@classmethod @classmethod
def comicChapter(cls,chapter_url,scramble=None,sleep=None): def comicChapter(cls,chapter_url,scramble=None,sleep=None):
cls.Onechapter(chapter_url,scramble,sleep) cls.Onechapter(chapter_url,scramble)
#进入下个阶段 #进入下个阶段
comicInfo.nextImgToDownloadChapter()
if comicInfo.nextExistsGetPath("down_"): if comicInfo.nextExistsGetPath("down_"):
#章节图片全部下载后,调用下载封面 #章节图片全部下载后,调用下载封面
netUtils.downloadComicIcon() netUtils.downloadComicIcon()
#下个阶段 #下个阶段
comicInfo.nextDownloadToCBZChapter()
if comicInfo.nextExistsGetPath("cbz_"): if comicInfo.nextExistsGetPath("cbz_"):
#下载后自动打包 #下载后自动打包
CBZUtils.packAutoComicChapterCBZ() CBZUtils.packAutoComicChapterCBZ()
comicInfo.nextCBZToDoneChapter() if not sleep == None:
print(f"{sleep} 秒后开始下载下一个章节")
time.sleep(sleep)
@classmethod @classmethod
def Onechapter(cls,chapter_url,scramble=None,sleep=None): def Onechapter(cls,chapter_url,scramble=None):
if not str(chapter_url).startswith("http"): if not str(chapter_url).startswith("http"):
chapter_url = comicInfo.getBaseUrl() + chapter_url chapter_url = comicInfo.getBaseUrl() + chapter_url
chapter_dir = cls.comicChapterDownload(chapter_url) cls.comicChapterDownload(chapter_url)
if sleep == None: comicInfo.nextInfoToImgChapter()
print("not sleep")
#time.sleep(3)
else:
time.sleep(int(sleep))
#下载完成后, 开始解密图片 #下载完成后, 开始解密图片
if scramble: if scramble:
chapter_dir = comicInfo.getDirComicChapter()
dirs = os.listdir(chapter_dir) dirs = os.listdir(chapter_dir)
for img in dirs: for img in dirs:
isScramble = str(img).startswith("scramble=") isScramble = str(img).startswith("scramble=")
@ -88,7 +113,8 @@ class comicEntity:
c_path = os.path.join(chapter_dir, img) c_path = os.path.join(chapter_dir, img)
imageUtils.getScrambleImage(c_path) imageUtils.getScrambleImage(c_path)
#进入下一阶段 #进入下一阶段
comicInfo.nextInfoToImgChapter() comicInfo.nextImgToDownloadChapter()
@classmethod @classmethod
def comicChapterDownload(cls,chapter_url): def comicChapterDownload(cls,chapter_url):
@ -129,20 +155,23 @@ class comicEntity:
tags = x.get("tags") tags = x.get("tags")
x = tags x = tags
print(x) print(x)
count_image = 1 count = 1
list_img = [] list_img = []
list_scramble = [] list_scramble = []
list_fileName = []
for image in images: for image in images:
image_src = image.get("src") image_src = image.get("src")
scramble = image.get("scramble") scramble = image.get("scramble")
print("count=",count_image) count_image = "{:0>3d}".format(count)
list_img.append(image_src) list_img.append(image_src)
list_scramble.append(scramble) image_src_prefix = "."+str(image_src).split(".")[-1]
print(image_src) if scramble:
print(scramble) su = "."+str(image_src).split(".")[-1]
count_image+=1 de_str = str(image_src).split("/")[-1].replace(su,"==")
print(count_image) blocks = imageUtils.encodeImage(de_str)
print(list_img) count_image = "scramble="+str(blocks)+"_"+count_image
print(totalChapter) list_fileName.append(count_image+image_src_prefix)
count+=1
print("count_all_img=", count)
#netUtils.downloadComicChapterImages(list_img,scrambles=list_scramble) #netUtils.downloadComicChapterImages(list_img,scrambles=list_scramble)
download_images(list_img,comicInfo.getDirComicChapter(),scrambles=list_scramble) return download_images(list_img,comicInfo.getDirComicChapter(), filesName=list_fileName)