upate
This commit is contained in:
parent
a8148ac383
commit
f948382084
3
main.py
3
main.py
@ -5,4 +5,5 @@ if __name__ == '__main__':
|
||||
# os.environ["https_proxy"] = "http://127.0.0.1:7890"
|
||||
# url = "https://rm01.xyz/books/f08668a4-0cbc-488e-95a7-3c71de0c7a31/1"
|
||||
# comicEntity.comicChapter(url,scramble=True)
|
||||
comicEntity.oneComic("https://rm01.xyz/books/47376792-2816-4ccf-a146-957b8d6a2ac6")
|
||||
# comicEntity.oneComic("https://rm01.xyz/books/3104bc6f-bcf8-47a3-8383-2110f7fe6981")
|
||||
comicEntity.downladsComcis("https://rm01.xyz/books?&page=0")
|
||||
@ -53,3 +53,4 @@ class CBZUtils:
|
||||
chapter_path = comicInfo.getDirComicChapter()
|
||||
packCBZ_path = comicInfo.getDirCBZComicChapter()
|
||||
cls.zip_compression(chapter_path,packCBZ_path+".CBZ",type="delete")
|
||||
comicInfo.nextCBZToDoneChapter()
|
||||
@ -91,3 +91,4 @@ class netUtils:
|
||||
save_path = os.path.join(pathCBZComic,comicInfo.getChapter()+icon_su)
|
||||
shutil.copy(pathComicIcon, save_path)
|
||||
print(f"{pathComicIcon} 已复制至: {save_path}")
|
||||
comicInfo.nextDownloadToCBZChapter()
|
||||
|
||||
@ -4,4 +4,4 @@ class pathStr:
|
||||
base_comic_out = "COMICOUT"
|
||||
base_CBZ = os.path.join(base_comic_out,"CBZ")
|
||||
base_comic_img = os.path.join(base_comic_out,"outputComic")
|
||||
base_conf_path = ".conf"
|
||||
base_conf_path = os.path.join(base_comic_out,".conf")
|
||||
@ -7,11 +7,9 @@ from __future__ import print_function
|
||||
|
||||
import shutil
|
||||
import imghdr
|
||||
import os,time
|
||||
import os
|
||||
import concurrent.futures
|
||||
import requests
|
||||
from utils.ImageUtils import imageUtils
|
||||
from utils.HtmlUtils import htmlUtils
|
||||
|
||||
headers = {
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
@ -33,28 +31,27 @@ def download_image(image_url, dst_dir, file_name, timeout=20, proxy_type=None, p
|
||||
|
||||
response = None
|
||||
file_path = os.path.join(dst_dir, file_name)
|
||||
temp_path = file_path+".downloads"
|
||||
try_times = 0
|
||||
while True:
|
||||
temp_path = os.path.join(dst_dir, file_name+".downloads")
|
||||
repair_count = 1
|
||||
try:
|
||||
try_times += 1
|
||||
response = requests.get(
|
||||
image_url, headers=htmlUtils.headers, timeout=timeout, proxies=proxies)
|
||||
image_url, headers=headers, timeout=timeout, proxies=proxies)
|
||||
while response.status_code != 200 and repair_count <= 5:
|
||||
download_image(image_url,dst_dir,file_name)
|
||||
print(f'重试:第{repair_count}次 {image_url}')
|
||||
repair_count += 1
|
||||
with open(temp_path, 'wb') as f:
|
||||
f.write(response.content)
|
||||
response.close()
|
||||
shutil.move(temp_path, file_path)
|
||||
print("## OK: {} {}".format(file_path, image_url))
|
||||
except Exception as e:
|
||||
if try_times < 10:
|
||||
print(f"第{try_times} try {file_path}")
|
||||
continue
|
||||
if response:
|
||||
response.close()
|
||||
print("## Fail: {} {}".format(image_url, e.args))
|
||||
break
|
||||
|
||||
|
||||
def download_images(image_urls, dst_dir, file_prefix="img", concurrency=3, timeout=20, proxy_type=None, proxy=None,scrambles=None):
|
||||
def download_images(image_urls, dst_dir, file_prefix="img", concurrency=50, timeout=20, proxy_type=None, proxy=None,filesName=None):
|
||||
"""
|
||||
Download image according to given urls and automatically rename them in order.
|
||||
:param timeout:
|
||||
@ -66,22 +63,15 @@ def download_images(image_urls, dst_dir, file_prefix="img", concurrency=3, timeo
|
||||
:param concurrency: number of requests process simultaneously
|
||||
:return: none
|
||||
"""
|
||||
|
||||
concurrency = len(image_urls)
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
|
||||
future_list = list()
|
||||
count = 1
|
||||
count = 0
|
||||
if not os.path.exists(dst_dir):
|
||||
os.makedirs(dst_dir)
|
||||
for image_url in image_urls:
|
||||
img_prefix = "."+str(image_url).split(".")[-1]
|
||||
file_name = ("{:0>3d}".format(count))+img_prefix
|
||||
if scrambles[count -1]:
|
||||
su = "."+str(image_url).split(".")[-1]
|
||||
de_str = str(image_url).split("/")[-1].replace(su,"==")
|
||||
blocks = imageUtils.encodeImage(de_str)
|
||||
file_name = "scramble="+str(blocks)+"_"+file_name
|
||||
file_name = filesName[count]
|
||||
future_list.append(executor.submit(
|
||||
download_image, image_url, dst_dir, file_name, timeout, proxy_type, proxy))
|
||||
time.sleep(0.1)
|
||||
count += 1
|
||||
concurrent.futures.wait(future_list, timeout=180)
|
||||
@ -1,4 +1,4 @@
|
||||
import json,os,time
|
||||
import json,os,time,random
|
||||
from utils.comic.ComicStr import comicStr
|
||||
from utils.ComicUtils import comicUtils
|
||||
from utils.FileUtils import fileUtils
|
||||
@ -12,7 +12,33 @@ from utils.downloader import download_images
|
||||
class comicEntity:
|
||||
|
||||
@classmethod
|
||||
def oneComic(cls,c_url):
|
||||
def downladsComcis(cls,url):
|
||||
#漫画名
|
||||
data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url)
|
||||
# #
|
||||
data = json.loads(data[0])
|
||||
data = data.get("props")
|
||||
x = data.get("pageProps")
|
||||
books = x.get("books")
|
||||
len_books = len(books)
|
||||
baseUrl = comicInfo.getBaseUrl(url)
|
||||
for x in range(0, len_books):
|
||||
book = books[x]
|
||||
#https://rm01.xyz/books/052426c3-ec7d-4035-b614-3c9290ee625b
|
||||
book_id = book.get("id")
|
||||
book_name = book.get("name")
|
||||
comicHref = baseUrl+"/books/"+book_id
|
||||
random_int = random.uniform(8,30)
|
||||
wait = print(f"{random_int}秒后开始下载 漫画:{book_name}")
|
||||
time.sleep(random_int)
|
||||
cls.oneComic(comicHref, random.uniform(0,3))
|
||||
|
||||
print(books)
|
||||
#for comicHref in comicsHref:
|
||||
# cls.oneComic(comicHref,random.uniform(10,20))
|
||||
|
||||
@classmethod
|
||||
def oneComic(cls,c_url,sleep=None):
|
||||
#漫画名
|
||||
title = htmlUtils.xpathData('//div[@class="col"]/h5/text()',url=c_url,num=0)
|
||||
#别名
|
||||
@ -46,8 +72,11 @@ class comicEntity:
|
||||
chapter = chapters[count_chapter]
|
||||
comicInfo.setChapterName(chapter)
|
||||
if not comicInfo.nextExistsGetPath("done_"):
|
||||
comicEntity.comicChapter(href,scramble=True,sleep=8)
|
||||
comicEntity.comicChapter(href,scramble=True,sleep=random.uniform(8,20))
|
||||
count_chapter += 1
|
||||
#一本漫画下载后等待
|
||||
if not sleep == None:
|
||||
time.sleep(sleep)
|
||||
|
||||
'''
|
||||
|
||||
@ -55,32 +84,28 @@ class comicEntity:
|
||||
'''
|
||||
@classmethod
|
||||
def comicChapter(cls,chapter_url,scramble=None,sleep=None):
|
||||
cls.Onechapter(chapter_url,scramble,sleep)
|
||||
cls.Onechapter(chapter_url,scramble)
|
||||
#进入下个阶段
|
||||
comicInfo.nextImgToDownloadChapter()
|
||||
|
||||
if comicInfo.nextExistsGetPath("down_"):
|
||||
#章节图片全部下载后,调用下载封面
|
||||
netUtils.downloadComicIcon()
|
||||
#下个阶段
|
||||
comicInfo.nextDownloadToCBZChapter()
|
||||
if comicInfo.nextExistsGetPath("cbz_"):
|
||||
#下载后自动打包
|
||||
CBZUtils.packAutoComicChapterCBZ()
|
||||
comicInfo.nextCBZToDoneChapter()
|
||||
if not sleep == None:
|
||||
print(f"{sleep} 秒后开始下载下一个章节")
|
||||
time.sleep(sleep)
|
||||
|
||||
@classmethod
|
||||
def Onechapter(cls,chapter_url,scramble=None,sleep=None):
|
||||
def Onechapter(cls,chapter_url,scramble=None):
|
||||
if not str(chapter_url).startswith("http"):
|
||||
chapter_url = comicInfo.getBaseUrl() + chapter_url
|
||||
chapter_dir = cls.comicChapterDownload(chapter_url)
|
||||
if sleep == None:
|
||||
print("not sleep")
|
||||
#time.sleep(3)
|
||||
else:
|
||||
time.sleep(int(sleep))
|
||||
cls.comicChapterDownload(chapter_url)
|
||||
comicInfo.nextInfoToImgChapter()
|
||||
#下载完成后, 开始解密图片
|
||||
if scramble:
|
||||
chapter_dir = comicInfo.getDirComicChapter()
|
||||
dirs = os.listdir(chapter_dir)
|
||||
for img in dirs:
|
||||
isScramble = str(img).startswith("scramble=")
|
||||
@ -88,7 +113,8 @@ class comicEntity:
|
||||
c_path = os.path.join(chapter_dir, img)
|
||||
imageUtils.getScrambleImage(c_path)
|
||||
#进入下一阶段
|
||||
comicInfo.nextInfoToImgChapter()
|
||||
comicInfo.nextImgToDownloadChapter()
|
||||
|
||||
|
||||
@classmethod
|
||||
def comicChapterDownload(cls,chapter_url):
|
||||
@ -129,20 +155,23 @@ class comicEntity:
|
||||
tags = x.get("tags")
|
||||
x = tags
|
||||
print(x)
|
||||
count_image = 1
|
||||
count = 1
|
||||
list_img = []
|
||||
list_scramble = []
|
||||
list_fileName = []
|
||||
for image in images:
|
||||
image_src = image.get("src")
|
||||
scramble = image.get("scramble")
|
||||
print("count=",count_image)
|
||||
count_image = "{:0>3d}".format(count)
|
||||
list_img.append(image_src)
|
||||
list_scramble.append(scramble)
|
||||
print(image_src)
|
||||
print(scramble)
|
||||
count_image+=1
|
||||
print(count_image)
|
||||
print(list_img)
|
||||
print(totalChapter)
|
||||
image_src_prefix = "."+str(image_src).split(".")[-1]
|
||||
if scramble:
|
||||
su = "."+str(image_src).split(".")[-1]
|
||||
de_str = str(image_src).split("/")[-1].replace(su,"==")
|
||||
blocks = imageUtils.encodeImage(de_str)
|
||||
count_image = "scramble="+str(blocks)+"_"+count_image
|
||||
list_fileName.append(count_image+image_src_prefix)
|
||||
count+=1
|
||||
print("count_all_img=", count)
|
||||
#netUtils.downloadComicChapterImages(list_img,scrambles=list_scramble)
|
||||
download_images(list_img,comicInfo.getDirComicChapter(),scrambles=list_scramble)
|
||||
return download_images(list_img,comicInfo.getDirComicChapter(), filesName=list_fileName)
|
||||
Loading…
Reference in New Issue
Block a user