174 lines
6.8 KiB
Python
174 lines
6.8 KiB
Python
""" Download image according to given urls and automatically rename them in order. """
|
|
# -*- coding: utf-8 -*-
|
|
# author: Yabin Zheng
|
|
# Email: sczhengyabin@hotmail.com
|
|
|
|
from __future__ import print_function
|
|
|
|
from queue import Queue
|
|
import shutil
|
|
import imghdr
|
|
import os
|
|
import concurrent.futures
|
|
import requests
|
|
import time
|
|
from utils.Ntfy import ntfy
|
|
from utils.comic.ComicInfo import comicInfo
|
|
from utils.HtmlUtils import htmlUtils
|
|
from utils.FileUtils import fileUtils as fu
|
|
|
|
headers = {
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
"Proxy-Connection": "keep-alive",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36",
|
|
"Accept-Encoding": "gzip, deflate, sdch",
|
|
# 'Connection': 'close',
|
|
}
|
|
|
|
down_queue = Queue()
|
|
|
|
def common_download(file_name,image_url,dst_dir,timeout=10,proxy=None,proxy_type=None):
|
|
proxies = None
|
|
if proxy_type is not None:
|
|
proxies = {
|
|
"http": proxy_type + "://" + proxy,
|
|
"https": proxy_type + "://" + proxy }
|
|
response = None
|
|
file_path = os.path.join(dst_dir, file_name)
|
|
if os.path.exists(file_path):
|
|
print("download_image 文件已存在,已跳过=",file_path)
|
|
return None
|
|
temp_path = os.path.join(dst_dir, file_name+".downloads")
|
|
repair_count = 1
|
|
response = requests.get(
|
|
image_url, headers=headers, timeout=timeout, proxies=proxies)
|
|
while response.status_code != 200 and repair_count <= 5:
|
|
time.sleep(0.7)
|
|
download_image(image_url,dst_dir,file_name)
|
|
ntfy.sendMsg(f'重试:第{repair_count}次 {image_url}')
|
|
repair_count += 1
|
|
with open(temp_path, 'wb') as f:
|
|
f.write(response.content)
|
|
response.close()
|
|
#验证是否是图像
|
|
if fu.ver_file(temp_path,type="image"):
|
|
shutil.move(temp_path, file_path)
|
|
print("## OK: {} {}".format(file_path, image_url))
|
|
else:
|
|
print("## Fail: {} {}".format(image_url, "图像损坏"))
|
|
down_queue.put([file_name,image_url,dst_dir])
|
|
|
|
def download_image(timeout=20, proxy_type=None, proxy=None,type="image"):
|
|
repeat = 0
|
|
while not down_queue.empty() and repeat <= 10:
|
|
repeat += 1
|
|
data = down_queue.get(False)
|
|
(file_name,image_url,dst_dir) = [data[0],data[1],data[2]]
|
|
if repeat > 1:
|
|
ntfy.sendMsg(f"第{repeat}次下载数据中... file_name={file_name}")
|
|
try:
|
|
common_download(file_name,image_url,dst_dir)
|
|
except:
|
|
ntfy.sendMsg(f"下载重试中 {file_name}={image_url}")
|
|
down_queue.put([file_name,image_url,dst_dir])
|
|
|
|
|
|
|
|
def download_images(image_urls, dst_dir,concurrency=None,timeout=20,proxy_type=None, proxy=None,files_name=None):
|
|
"""
|
|
Download image according to given urls and automatically rename them in order.
|
|
:param timeout:
|
|
:param proxy:
|
|
:param proxy_type:
|
|
:param image_urls: list of image urls
|
|
:param dst_dir: output the downloaded images to dst_dir
|
|
:param file_prefix: if set to "img", files will be in format "img_xxx.jpg"
|
|
:param concurrency: number of requests process simultaneously
|
|
:return: none
|
|
"""
|
|
if concurrency == None:
|
|
concurrency = len(image_urls)
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
|
|
future_list = list()
|
|
count = 0
|
|
if not os.path.exists(dst_dir):
|
|
os.makedirs(dst_dir)
|
|
for image_url in image_urls:
|
|
file_name = files_name[count]
|
|
down_queue.put([file_name,image_url,dst_dir])
|
|
future_list.append(executor.submit(
|
|
download_image,timeout, proxy_type, proxy))
|
|
count += 1
|
|
concurrent.futures.wait(future_list, timeout)
|
|
|
|
def download_comic_icon(is_new=comicInfo.IS_NEW_ICON):
|
|
icon_url = comicInfo.getIcon()
|
|
if icon_url == None:
|
|
print("icon 不存在,已跳过")
|
|
return None
|
|
save_name = comicInfo.COMIC_ICON_NAME
|
|
icon_prefix = "."+str(icon_url).split(".")[-1]
|
|
icon_prefix = icon_prefix.split("?")[0]
|
|
#判断漫画名路径是否已存在comicname/cover.jpg, 存在跳过
|
|
path_comic_icon = os.path.join(comicInfo.getDirConfComic(),save_name+icon_prefix)
|
|
if not comicInfo.equIcon() and fu.exists(path_comic_icon):
|
|
os.remove(path_comic_icon)
|
|
if fu.notExists(path_comic_icon):
|
|
download(icon_url, path_comic_icon)
|
|
#if not os.path.exists(path_cbz_comic):
|
|
# os.makedirs(path_cbz_comic)
|
|
save_path = os.path.join(comicInfo.getDirCBZComic(),comicInfo.getChapter()+icon_prefix)
|
|
if is_new:
|
|
#历史版本ICON
|
|
if os.path.exists(save_path):
|
|
os.remove(save_path)
|
|
if os.path.exists(path_comic_icon):
|
|
base_dir = comicInfo.getDirComicChapter()
|
|
if not os.path.exists(base_dir): os.makedirs(base_dir)
|
|
shutil.copy(path_comic_icon,os.path.join(base_dir,save_name+icon_prefix))
|
|
else:
|
|
if fu.notExists(comicInfo.getDirCBZComic()): os.makedirs(comicInfo.getDirCBZComic())
|
|
shutil.copy(path_comic_icon,save_path)
|
|
print(f"{path_comic_icon} 已复制至: {save_path}")
|
|
#保存icon信息
|
|
comicInfo.iconDB()
|
|
comicInfo.nextDownloadToCBZChapter()
|
|
comicInfo.setProgress(comicInfo.PROGRESS_CBZ)
|
|
|
|
# 定义下载函数
|
|
def download(url,path,file_type=None):
|
|
if os.path.exists(path):
|
|
if imghdr.what(path):
|
|
msg = "已存在同路径文件,已跳过:"+path
|
|
print(msg)
|
|
return msg
|
|
else:
|
|
print("文件已损坏,已重试:"+path)
|
|
path = os.path.join(os.path.dirname(path),str(os.path.basename(path)).split("?")[0])
|
|
tmp_file = path+".downloads"
|
|
if os.path.exists(tmp_file):
|
|
os.remove(tmp_file)
|
|
print("存在缓存文件,已删除:",tmp_file)
|
|
repair_count = 1
|
|
res = htmlUtils.getBytes(url)
|
|
while res.status_code != 200 and repair_count <= 5:
|
|
res = htmlUtils.getBytes(url)
|
|
print(f'重试:第{repair_count}次 {url}')
|
|
repair_count += 1
|
|
#判断是否为图片
|
|
if file_type == "image":
|
|
if 'image' not in res.headers.get("content-type",""):
|
|
print(f"url= {url} Error: URL doesnot appear to be an image")
|
|
basedir= os.path.dirname(path)
|
|
if not os.path.exists(basedir):
|
|
os.makedirs(basedir)
|
|
#expected_length = res.headers.get('Content-Length')
|
|
#actual_length = res.raw.tell()
|
|
with open(tmp_file, 'wb') as f:
|
|
for ch in res:
|
|
f.write(ch)
|
|
f.close()
|
|
shutil.move(tmp_file, path)
|
|
print(f"url={url} 保存至:{path}")
|
|
return path |