fix

2024-11-14 18:52:33 +08:00 · 2024-11-14 18:52:33 +08:00 · be1e963cb7
commit be1e963cb7
parent 2ed9aba6dd
3 changed files with 116 additions and 18 deletions
--- a/Comics/_utils/downloader.py
+++ b/Comics/_utils/downloader.py
@ -0,0 +1,76 @@
+import os
+import requests
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from queue import Queue
+
+# 图片下载函数
+def download_image(url, save_path, retry_queue, max_retries=3):
+    retries = 0
+    while retries < max_retries:
+        try:
+            response = requests.get(url, timeout=10)
+            response.raise_for_status()  # 检查是否有请求错误
+            # 确保保存目录存在
+            os.makedirs(os.path.dirname(save_path), exist_ok=True)
+            with open(save_path, "wb") as image_file:
+                image_file.write(response.content)
+            print(f"下载成功: {save_path}")
+            return True
+        except Exception as e:
+            retries += 1
+            print(f"下载失败: {url} 错误: {e} 尝试次数: {retries}")
+    # 如果达到最大重试次数，放入重试队列
+    retry_queue.put((url, save_path))
+    return False
+
+# 多线程下载函数，带失败重试
+def download_images(urls_with_paths, max_workers=20, max_retries=3):
+    retry_queue = Queue()
+    # 使用 ThreadPoolExecutor 进行多线程下载
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        # 提交所有任务
+        future_to_url = {
+            executor.submit(download_image, url, save_path, retry_queue, max_retries): (url, save_path)
+            for url, save_path in urls_with_paths
+        }
+
+        # 使用 as_completed 监控下载进度
+        for future in as_completed(future_to_url):
+            url, save_path = future_to_url[future]
+            try:
+                future.result()
+            except Exception as e:
+                print(f"下载失败: {url} 错误: {e}")
+
+    # 处理下载失败的任务
+    while not retry_queue.empty():
+        retry_tasks = []
+        while not retry_queue.empty():
+            retry_tasks.append(retry_queue.get())
+
+        # 重试失败的下载任务
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            future_to_url = {
+                executor.submit(download_image, url, save_path, retry_queue, max_retries): (url, save_path)
+                for url, save_path in retry_tasks
+            }
+
+            # 使用 as_completed 监控下载进度
+            for future in as_completed(future_to_url):
+                url, save_path = future_to_url[future]
+                try:
+                    future.result()
+                except Exception as e:
+                    print(f"下载失败: {url} 错误: {e}（重试阶段）")
+
+
+if __name__ == "__main__":
+    # 示例URL及其对应的保存路径
+    urls_with_custom_paths = [
+        ("https://example.com/image1.jpg", "./images/custom_name1.jpg"),
+        ("https://example.com/image2.jpg", "./images/folder1/custom_name2.jpg"),
+        ("https://example.com/image3.jpg", "./images/folder2/custom_name3.jpg"),
+    ]
+
+    # 开始多线程下载，支持失败重试
+    download_images(urls_with_custom_paths, max_workers=20, max_retries=3)
--- a/Comics/_utils/utils.py
+++ b/Comics/_utils/utils.py
@ -466,7 +466,6 @@ class imageUtils:

                newh += b_h
            newimage.save(save_path)
-            time.sleep(0.1)
            logging.info(f"解密成功 {img_path} {save_path}")
            if os.path.exists(img_path):
                os.remove(img_path)
--- a/Comics/pipelines.py
+++ b/Comics/pipelines.py
@ -16,6 +16,7 @@ from Comics._utils.utils import oldUtils
 from Comics._utils.exporters import JsonExport,ItemExporter
 from scrapy.pipelines.images import ImagesPipeline
 from Comics._utils.ComicInfo import ComicInfoXml
+from Comics._utils.downloader import download_images

 class ComicsPipeline():
    '''
@ -71,7 +72,8 @@ class ImgDownloadPipeline(BaseImagesPipeline):
        donwloaded_images = [] 
        for image_item in images_item:
            image_url, image_path = [ image_item["image_url"], image_item["image_path"]]
-            if image_item["image_type"] == "Icon": image_path = super().get_file_path(item, result_type="icon_cache")
+            if image_item["image_type"] == "Icon": 
+                image_path = super().get_file_path(item, result_type="icon_cache")
                is_next = not super().image_scramble_exits(item, image_path)
                # 图像（含加密图像）存在
                if not is_next:
@ -123,7 +125,7 @@ class ImgDownloadPipeline(BaseImagesPipeline):
        images_file = oldUtils().old_images(folder=chapter_dir)
        images_urls = ComicLoader(item=item).get_image_urls()
        # 校验数据是正确
-        # if len(images_file) != len(images_urls) or len(images_urls) == 0: return
+        if len(images_file) != len(images_urls) or len(images_urls) == 0: return
        super().update_icon(item)
        # CBZ文件是否已存在
        if fu.exists(cbz_path):
@ -150,6 +152,27 @@ class ImgDownloadPipeline(BaseImagesPipeline):
            item (_type_): Comic item数据
            info (_type_): 信息
        """
+        comic = ComicLoader(item=item)
+        # 获取需要解析下载的图像
+        images_item = comic.parse_images()
+        donwloaded_images = []
+        down_queue = []
+        for image_item in images_item:
+            image_url, image_path = [ image_item["image_url"], image_item["image_path"]]
+            if image_item["image_type"] == "Image":
+                is_next = not super().image_scramble_exits(item, image_path)
+                # 图像（含加密图像）存在
+                if not is_next:
+                    logging.info(f"file exists: IMAGE_STORE {image_path}")
+                    donwloaded_images.append(image_path)
+                    logging.info(f"images count= {len(images_item)}  downloaded_images_count= {len(donwloaded_images)}")
+                # 如果图像已全部下载则直接跳至下一步（压缩CBZ）
+                # if len(donwloaded_images) == len(images_item): 
+                #    logging.info(f"len(donwloaded_images) == len(images_item)")
+                #    self.download_done(item)
+                if is_next:
+                    # logging.info(f"downloading {image_url} --> IMAGE_STORE {image_path}")
+                    down_queue.append((image_url, os.path.join(IMAGES_STORE, image_path)))
+        download_images(down_queue)
        # 存在未下载图像数据则重试
-        if self.download_validate(item):
        self.download_done(item)