upate

2022-12-06 13:45:23 +08:00 · 2022-12-06 13:45:23 +08:00 · f948382084
commit f948382084
parent a8148ac383
6 changed files with 86 additions and 64 deletions
--- a/main.py
+++ b/main.py
@ -5,4 +5,5 @@ if __name__ == '__main__':
 #    os.environ["https_proxy"] = "http://127.0.0.1:7890"
 #    url = "https://rm01.xyz/books/f08668a4-0cbc-488e-95a7-3c71de0c7a31/1"
 #    comicEntity.comicChapter(url,scramble=True)
-    comicEntity.oneComic("https://rm01.xyz/books/47376792-2816-4ccf-a146-957b8d6a2ac6")
+# comicEntity.oneComic("https://rm01.xyz/books/3104bc6f-bcf8-47a3-8383-2110f7fe6981")
+    comicEntity.downladsComcis("https://rm01.xyz/books?&page=0")
--- a/utils/CBZUtils.py
+++ b/utils/CBZUtils.py
@ -53,3 +53,4 @@ class CBZUtils:
        chapter_path = comicInfo.getDirComicChapter() 
        packCBZ_path = comicInfo.getDirCBZComicChapter()
        cls.zip_compression(chapter_path,packCBZ_path+".CBZ",type="delete")
+        comicInfo.nextCBZToDoneChapter()
--- a/utils/NetUtils.py
+++ b/utils/NetUtils.py
@ -91,3 +91,4 @@ class netUtils:
        save_path = os.path.join(pathCBZComic,comicInfo.getChapter()+icon_su)
        shutil.copy(pathComicIcon, save_path)
        print(f"{pathComicIcon} 已复制至: {save_path}")
+        comicInfo.nextDownloadToCBZChapter()
--- a/utils/comic/PathStr.py
+++ b/utils/comic/PathStr.py
@ -4,4 +4,4 @@ class pathStr:
    base_comic_out = "COMICOUT" 
    base_CBZ = os.path.join(base_comic_out,"CBZ")
    base_comic_img = os.path.join(base_comic_out,"outputComic")
-    base_conf_path = ".conf"
+    base_conf_path = os.path.join(base_comic_out,".conf")
--- a/utils/downloader.py
+++ b/utils/downloader.py
@ -7,11 +7,9 @@ from __future__ import print_function

 import shutil
 import imghdr
-import os,time
+import os
 import concurrent.futures
 import requests
-from utils.ImageUtils import imageUtils
-from utils.HtmlUtils import htmlUtils

 headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
@ -33,28 +31,27 @@ def download_image(image_url, dst_dir, file_name, timeout=20, proxy_type=None, p

    response = None
    file_path = os.path.join(dst_dir, file_name)
-    temp_path = file_path+".downloads"
-    try_times = 0
-    while True:
+    temp_path = os.path.join(dst_dir, file_name+".downloads")
+    repair_count = 1
    try:
-            try_times += 1
        response = requests.get(
-                image_url, headers=htmlUtils.headers, timeout=timeout, proxies=proxies)
+            image_url, headers=headers, timeout=timeout, proxies=proxies)
+        while response.status_code != 200 and repair_count <= 5:
+            download_image(image_url,dst_dir,file_name)
+            print(f'重试：第{repair_count}次 {image_url}')
+            repair_count += 1
        with open(temp_path, 'wb') as f:
            f.write(response.content)
        response.close()
        shutil.move(temp_path, file_path)
+        print("## OK:  {}  {}".format(file_path, image_url))
    except Exception as e:
-            if try_times < 10:
-                print(f"第{try_times} try {file_path}")
-                continue
        if response:
           response.close()
        print("## Fail:  {}  {}".format(image_url, e.args))
-            break


-def download_images(image_urls, dst_dir, file_prefix="img", concurrency=3, timeout=20, proxy_type=None, proxy=None,scrambles=None):
+def download_images(image_urls, dst_dir, file_prefix="img", concurrency=50, timeout=20, proxy_type=None, proxy=None,filesName=None):
    """
    Download image according to given urls and automatically rename them in order.
    :param timeout:
@ -66,22 +63,15 @@ def download_images(image_urls, dst_dir, file_prefix="img", concurrency=3, timeo
    :param concurrency: number of requests process simultaneously
    :return: none
    """
-
+    concurrency = len(image_urls) 
    with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
        future_list = list()
-        count = 1 
+        count = 0 
        if not os.path.exists(dst_dir):
            os.makedirs(dst_dir)
        for image_url in image_urls:
-            img_prefix = "."+str(image_url).split(".")[-1]
-            file_name = ("{:0>3d}".format(count))+img_prefix
-            if scrambles[count -1]:
-                su = "."+str(image_url).split(".")[-1]
-                de_str = str(image_url).split("/")[-1].replace(su,"==")
-                blocks = imageUtils.encodeImage(de_str)
-                file_name = "scramble="+str(blocks)+"_"+file_name
+            file_name = filesName[count] 
            future_list.append(executor.submit(
                download_image, image_url, dst_dir, file_name, timeout, proxy_type, proxy))
-            time.sleep(0.1)
            count += 1
        concurrent.futures.wait(future_list, timeout=180)
--- a/utils/entity/RouMan.py
+++ b/utils/entity/RouMan.py
@ -1,4 +1,4 @@
-import json,os,time
+import json,os,time,random
 from utils.comic.ComicStr import comicStr
 from utils.ComicUtils import comicUtils
 from utils.FileUtils import fileUtils
@ -12,7 +12,33 @@ from utils.downloader import download_images
 class comicEntity:
    
    @classmethod
-    def oneComic(cls,c_url):
+    def downladsComcis(cls,url):
+            #漫画名
+        data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url)
+    #    #
+        data = json.loads(data[0])
+        data = data.get("props")
+        x = data.get("pageProps")
+        books = x.get("books")
+        len_books = len(books)
+        baseUrl = comicInfo.getBaseUrl(url)
+        for x in range(0, len_books):
+            book = books[x]
+            #https://rm01.xyz/books/052426c3-ec7d-4035-b614-3c9290ee625b
+            book_id = book.get("id")
+            book_name = book.get("name")
+            comicHref = baseUrl+"/books/"+book_id
+            random_int = random.uniform(8,30)
+            wait = print(f"{random_int}秒后开始下载 漫画：{book_name}")
+            time.sleep(random_int)
+            cls.oneComic(comicHref, random.uniform(0,3))
+        
+        print(books)
+        #for comicHref in comicsHref:
+        #    cls.oneComic(comicHref,random.uniform(10,20))
+     
+    @classmethod
+    def oneComic(cls,c_url,sleep=None):
            #漫画名
        title = htmlUtils.xpathData('//div[@class="col"]/h5/text()',url=c_url,num=0)
        #别名
@ -46,8 +72,11 @@ class comicEntity:
            chapter = chapters[count_chapter]
            comicInfo.setChapterName(chapter)
            if not comicInfo.nextExistsGetPath("done_"):
-                comicEntity.comicChapter(href,scramble=True,sleep=8)
+                comicEntity.comicChapter(href,scramble=True,sleep=random.uniform(8,20))
            count_chapter += 1
+        #一本漫画下载后等待
+        if not sleep == None:
+            time.sleep(sleep)
            
    '''
    
@ -55,32 +84,28 @@ class comicEntity:
    ''' 
    @classmethod
    def comicChapter(cls,chapter_url,scramble=None,sleep=None):
-        cls.Onechapter(chapter_url,scramble,sleep)
+        cls.Onechapter(chapter_url,scramble)
        #进入下个阶段
-        comicInfo.nextImgToDownloadChapter()
-        
        if comicInfo.nextExistsGetPath("down_"):
            #章节图片全部下载后，调用下载封面
            netUtils.downloadComicIcon()
        #下个阶段
-        comicInfo.nextDownloadToCBZChapter()
        if comicInfo.nextExistsGetPath("cbz_"):
            #下载后自动打包
            CBZUtils.packAutoComicChapterCBZ()
-        comicInfo.nextCBZToDoneChapter()
+        if not sleep == None:
+            print(f"{sleep} 秒后开始下载下一个章节")
+            time.sleep(sleep)
    
    @classmethod
-    def Onechapter(cls,chapter_url,scramble=None,sleep=None):
+    def Onechapter(cls,chapter_url,scramble=None):
        if not str(chapter_url).startswith("http"):
            chapter_url = comicInfo.getBaseUrl() + chapter_url
-        chapter_dir = cls.comicChapterDownload(chapter_url)
-        if sleep == None:
-            print("not sleep")
-            #time.sleep(3)
-        else:
-            time.sleep(int(sleep))
+        cls.comicChapterDownload(chapter_url)
+        comicInfo.nextInfoToImgChapter()
        #下载完成后, 开始解密图片
        if scramble:
+            chapter_dir = comicInfo.getDirComicChapter()
            dirs = os.listdir(chapter_dir)
            for img in dirs:
                isScramble = str(img).startswith("scramble=")
@ -88,7 +113,8 @@ class comicEntity:
                    c_path = os.path.join(chapter_dir, img)
                    imageUtils.getScrambleImage(c_path)
        #进入下一阶段
-        comicInfo.nextInfoToImgChapter()
+        comicInfo.nextImgToDownloadChapter()
+
    
    @classmethod
    def comicChapterDownload(cls,chapter_url):
@ -129,20 +155,23 @@ class comicEntity:
        tags = x.get("tags")
        x = tags
        print(x)
-        count_image = 1
+        count = 1
        list_img = []
        list_scramble = []
+        list_fileName = []
        for image in images:
            image_src = image.get("src")
            scramble = image.get("scramble")
-            print("count=",count_image)
+            count_image = "{:0>3d}".format(count)
            list_img.append(image_src)
-            list_scramble.append(scramble)
-            print(image_src)
-            print(scramble)
-            count_image+=1
-            print(count_image)
-        print(list_img)
-        print(totalChapter)
+            image_src_prefix = "."+str(image_src).split(".")[-1]
+            if scramble:
+                su = "."+str(image_src).split(".")[-1]
+                de_str = str(image_src).split("/")[-1].replace(su,"==")
+                blocks = imageUtils.encodeImage(de_str)
+                count_image = "scramble="+str(blocks)+"_"+count_image
+            list_fileName.append(count_image+image_src_prefix)
+            count+=1
+        print("count_all_img=", count)
        #netUtils.downloadComicChapterImages(list_img,scrambles=list_scramble)
-        download_images(list_img,comicInfo.getDirComicChapter(),scrambles=list_scramble)
+        return download_images(list_img,comicInfo.getDirComicChapter(), filesName=list_fileName)