diff --git a/get_jm_url.py b/get_jm_url.py deleted file mode 100644 index 4101650..0000000 --- a/get_jm_url.py +++ /dev/null @@ -1,2 +0,0 @@ -def app(): - return ["jmcomic2.onl","jmcomic1.onl","jmcomic.onl","jmcomic.me","jmcomic1.me","18comic.org"] \ No newline at end of file diff --git a/jmdowning.py b/jmdowning.py deleted file mode 100644 index a709838..0000000 --- a/jmdowning.py +++ /dev/null @@ -1,285 +0,0 @@ -import io - -import requests -import time -import os,re -from multiprocessing import Queue -import threading -from threading import Lock -from lxml import etree -import math -import execjs -from PIL import Image -from myran import Myran -import get_jm_url -from utils.ComicInfo import comicInfo -from utils.PathStr import pathStr -from utils.CBZUtils import CBZUtils -from utils.HtmlUtils import htmlUtils - -os.environ['EXECJS_RUNTIME'] = "JScript" - -class Data: - - @classmethod - def oneChapter(cls,*args): - book_name = comicInfo.getComicName() - chapter_name = comicInfo.getChapter() - chapter_href = comicInfo.getWeb() - try: - #print(data[2]) - #response = requests.get(url=chapter_href, headers=headers, proxies=proxy) - path_album = os.path.join(pathStr.base_comic_img,book_name) - path_photo = os.path.join(path_album, chapter_name) - # path_img = "path_photo\\%s.jpg" %img_name - with lock: # 判断文件夹是否存在要加锁 - if not os.path.exists(path_album): os.makedirs(path_album) - if not os.path.exists(path_photo): os.makedirs(path_photo) -# comicInfo.writeComicInfoXML(data[0],path=path_photo) - cls.parse(chapter_href,path_photo,args[0]) - # except requests.exceptions.ConnectionError: - # print("重新抛入queue:",data) - # data_queue.put(data) - except Exception as e: - print(e.__traceback__.tb_lineno,e) - print("重新抛入data_queue:") - cls.oneChapter(args) - - @classmethod - def parse(cls,rsp,path_photo,photoid): - img_list =htmlUtils.xpathData("//div[@class='panel-body']/div/div[contains(@class,'center')]/img",url=rsp) - - pages_imgs =htmlUtils.xpathData("//div[@class='center scramble-page']/@id",url=rsp) - comicInfo.setPages(pages_imgs) - comicInfo.writeComicInfoXML(comicInfo.str_chapter,path=path_photo) - #print("img_list:",len(img_list)) - for i in img_list: - img_url= i.attrib['data-original'] - img_name = os.path.basename(img_url).split('.')[0] - path_img = "%s\\%s.jpg" % (path_photo, img_name) - #print([img_url,photoid,path_img]) - down_queue.put([img_url,photoid,path_img]) - -class Download(threading.Thread): - def __init__(self,thread_name): - super().__init__() - self.thread_name = thread_name - def run(self): - print("%s开始了!"%self.thread_name) - while not down_empty: - try: - print("还剩余%s张图片"%down_queue.qsize()) - if not down_queue.empty(): - down = down_queue.get(False) - else: - time.sleep(3) - down = down_queue.get(False) - try: - print("down",down) - if not os.path.exists(down[2]): - #scramble_id=220980 网页固定值 - if int(down[1])>220980:#albumid>aid就使用拼接函数 否则直接下载 - print("拼接图片") - self.pjdown(down[0],down[1],down[2]) - else: - print("直接下载图片") - self.dowm_img(down[0],down[2]) - - except Exception as e: - print(e.__traceback__.tb_lineno,e) - print("重新抛入queue:",down) - down_queue.put(down) - except: - pass - def dowm_img(self,url,path_img): - # s=random.choice(list(range(3)))+1+random.random() - # time.sleep(s) - #print("time.sleep=%d"%s) - headers["User_Agent"]=myran.agents() - response = requests.get(url,headers=headers,proxies=proxy) - if response.status_code == 200: - with open(path_img,"wb") as f: - f.write(response.content) - else:print("图片request失败") - def pjdown(self,*args): - imgurl = args[0] - #print(imgurl) - imgpath=args[-1] - # httpproxy_handler = urllib.request.ProxyHandler(proxies=proxy) - # opener = urllib.request.build_opener(httpproxy_handler) - # urlz = urllib.request.Request(imgurl, headers={"User-Agent": myran.agents()}) - # im2 = Image.open(opener.open(urlz)) - - headers["User_Agent"]=myran.agents() - response=requests.get(imgurl, headers=headers,proxies=proxy) - if response.status_code == 200: - im2 = Image.open(io.BytesIO(response.content)) - #im2.show() - #print(imgurl, args[1],imgpath, im2) - self.splitimage(imgurl, args[1],imgpath, im2) - def get_md5(self,num): - with open('js/md5.js', 'r') as file: - result = file.read() - context1 = execjs.compile(result) - result1 = context1.call('md5', num) - return result1 - def get_num(self,e, t): - #print(type(e),e, type(t),t) - a = 10 - try: - num_dict = {} - for i in range(10): - num_dict[i] = i * 2 + 2 - if (int(e) >= 268850): - n = str(e) + t; - # switch(n=(n = (n = md5(n)).substr(-1)), n %= 10) { - #print("n=",n) - tmp = ord(self.get_md5(n)[-1]) - result = num_dict[tmp % 10] - a = result - return a - except Exception as e: - print(e.__traceback__.tb_lineno,e) - return False - def splitimage(self,src, aid,imgpath,imageob=''): - if imageob == '': - image = Image.open(src) - else: - image = imageob - w, h = image.size - #image.show() - img_name = os.path.basename(src).split('.')[0] - # print(type(aid),type(img_name)) - if self.get_num(aid, img_name): - s = self.get_num(aid, img_name) # 随机值 - # print(s) - l = h % s # 切割最后多余的值 - box_list = [] - hz = 0 - for i in range(s): - c = math.floor(h / s) - g = i * c - hz += c - h2 = h - c * (i + 1) - l - if i == 0: - c += l;hz += l - else: - g += l - box_list.append((0, h2, w, h - g)) - - # print(box_list,len(box_list)) - item_width = w - # box_list.reverse() #还原切图可以倒序列表 - # print(box_list, len(box_list)) - newh = 0 - image_list = [image.crop(box) for box in box_list] - # print(box_list) - newimage = Image.new("RGB", (w, h)) - for image in image_list: - # image.show() - b_w, b_h = image.size - newimage.paste(image, (0, newh)) - - newh += b_h - newimage.save(imgpath) - -down_queue=Queue() -data_empty = False -down_empty = False -lock = Lock() -myran = Myran() -headers = { - #'cookie':'ipcountry=US; AVS=4eb0s4o5ho9hfmp704ge7jtium; ipm5=bb7f6ac39cebfa37e89bd07544c549fd; cover=1; guide=1; __atuvc=12|39,31|40,5|41,0|42,4|43; __atuvs=635cabf67eff0d49003; yuo1={"objName":"hT3l8Pyn15Uf","request_id":0,"zones":[{"idzone":"2967008","here":{}},{"idzone":"2967010","here":{}},{"idzone":"2967010","here":{}},{"idzone":"3597795","sub":"70","here":{}}]}', - #'referer': 'https://18comic.org/', - "User_Agent": myran.agents() -} -proxy = { -# "http":"127.0.0.1:7890", -# "https":"127.0.0.1:7890" -} -def app(url): - try: - global data_empty,down_empty - - newurl_list=get_jm_url.app() - response='' - if newurl_list: - if re.findall(r'https://(.*?)/\w+/\d+/',url)[0] not in newurl_list: - for newurl in newurl_list: - url = re.sub(re.findall(r'https://(.*?)/\w+/\d+/', url)[0], newurl, url) - response = requests.get(url=url, headers=headers, proxies=proxy) - break - else: - response = requests.get(url=url, headers=headers, proxies=proxy) - else: - response = requests.get(url=url, headers=headers, proxies=proxy) - if response: - albumid = re.search(r'/album/(\d+)', url).group(1) - referer = re.search(r'(https://\w+\.\w+)/', url).group(1) - print("albumid", albumid, referer, url) - print(response.url) - if response.status_code == 200: - print(response.status_code) - eth = etree.HTML(response.text) - #拿到所有话数 - nums = eth.xpath("//div[@class='row']/div[6]/div[1]/div[1]/ul[contains(@class,'btn-toolbar')]/a") - book_name = eth.xpath("//div[@itemprop='name']/h1[@id='book-name']/text()")[0] - book_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\:\-\*\<\>]', '', book_name) - tags = eth.xpath("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='tags']/a[@class='btn btn-sm btn-primary']/text()") - author = eth.xpath("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='author']/a[@class='btn btn-sm btn-primary']/text()") - book_msg = eth.xpath("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='p-t-5 p-b-5']/text()") - jmid = book_msg[0] - dep = str(book_msg[1]).replace("叙述:","") - - comicInfo.setComicName(book_name) - comicInfo.setAuthor(author) - comicInfo.setDep(dep) - comicInfo.setTags(tags) - comicInfo.setTag(tags) - comicInfo.setCBS("韩漫") - comicInfo.setLang("zh") - - if nums: - for i in nums: - photo_name_list = i.xpath("li/text()")[0].split() - photo_date = i.xpath("li/span/text()")[0].split() - #print(re.findall(r'[\u4E00-\u9FA5]+.*?', i.xpath("li/text()")[0])) - try: - if re.findall(r'[\u4E00-\u9FA5]', photo_name_list[2]): - photo_name=re.sub(r'\s','',photo_name_list[0])+' '+photo_name_list[2] - else:photo_name=re.sub(r'\s','',photo_name_list[0]) - except Exception as e: - photo_name = re.sub(r'\s', '', photo_name_list[0]) - photo_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\:\-\*\<\>\-]', '',photo_name) - #print(photo_name) - photoid=i.attrib['data-album'] - comicInfo.setChapterName(photo_name) - comicInfo.setDate(photo_date[0],split='-') - comicInfo.setWeb(referer+i.attrib['href']) - Data.oneChapter(photoid) - except Exception as e: - print(e.__traceback__.tb_lineno,e) - startime=time.perf_counter() - while True: - if down_queue.qsize()>100 or time.perf_counter()-startime>10: - break - print('down_queue.qsize():%s'%down_queue.qsize()) - down_list=['down下载线程%s号'%s for s in list(range(1,40 if down_queue.qsize()>40 else down_queue.qsize()))] - down_thread_list=[] - for i in down_list: - down=Download(i) - down.start() - time.sleep(0.7) - down_thread_list.append(down) - while not down_queue.empty(): - pass - down_empty=True - for down_thread in down_thread_list: - down_thread.join() - print("%s结束了!"%down_thread.thread_name) - - -if __name__ == '__main__': -# os.environ["http_proxy"] = "http://127.0.0.1:7890" -# os.environ["https_proxy"] = "http://127.0.0.1:7890" - app("https://18comic.vip/album/407792/") diff --git a/js/md5.js b/js/md5.js deleted file mode 100644 index a04493d..0000000 --- a/js/md5.js +++ /dev/null @@ -1,408 +0,0 @@ -/* - * JavaScript MD5 - * https://github.com/blueimp/JavaScript-MD5 - * - * Copyright 2011, Sebastian Tschan - * https://blueimp.net - * - * Licensed under the MIT license: - * https://opensource.org/licenses/MIT - * - * Based on - * A JavaScript implementation of the RSA Data Security, Inc. MD5 Message - * Digest Algorithm, as defined in RFC 1321. - * Version 2.2 Copyright (C) Paul Johnston 1999 - 2009 - * Other contributors: Greg Holt, Andrew Kepert, Ydnar, Lostinet - * Distributed under the BSD License - * See http://pajhome.org.uk/crypt/md5 for more info. - */ - -/* global define */ - -/* eslint-disable strict */ -;(function ($) { - 'use strict' - - /** - * Add integers, wrapping at 2^32. - * This uses 16-bit operations internally to work around bugs in interpreters. - * - * @param {number} x First integer - * @param {number} y Second integer - * @returns {number} Sum - */ - function safeAdd(x, y) { - var lsw = (x & 0xffff) + (y & 0xffff) - var msw = (x >> 16) + (y >> 16) + (lsw >> 16) - return (msw << 16) | (lsw & 0xffff) - } - - /** - * Bitwise rotate a 32-bit number to the left. - * - * @param {number} num 32-bit number - * @param {number} cnt Rotation count - * @returns {number} Rotated number - */ - function bitRotateLeft(num, cnt) { - return (num << cnt) | (num >>> (32 - cnt)) - } - - /** - * Basic operation the algorithm uses. - * - * @param {number} q q - * @param {number} a a - * @param {number} b b - * @param {number} x x - * @param {number} s s - * @param {number} t t - * @returns {number} Result - */ - function md5cmn(q, a, b, x, s, t) { - return safeAdd(bitRotateLeft(safeAdd(safeAdd(a, q), safeAdd(x, t)), s), b) - } - - /** - * Basic operation the algorithm uses. - * - * @param {number} a a - * @param {number} b b - * @param {number} c c - * @param {number} d d - * @param {number} x x - * @param {number} s s - * @param {number} t t - * @returns {number} Result - */ - function md5ff(a, b, c, d, x, s, t) { - return md5cmn((b & c) | (~b & d), a, b, x, s, t) - } - - /** - * Basic operation the algorithm uses. - * - * @param {number} a a - * @param {number} b b - * @param {number} c c - * @param {number} d d - * @param {number} x x - * @param {number} s s - * @param {number} t t - * @returns {number} Result - */ - function md5gg(a, b, c, d, x, s, t) { - return md5cmn((b & d) | (c & ~d), a, b, x, s, t) - } - - /** - * Basic operation the algorithm uses. - * - * @param {number} a a - * @param {number} b b - * @param {number} c c - * @param {number} d d - * @param {number} x x - * @param {number} s s - * @param {number} t t - * @returns {number} Result - */ - function md5hh(a, b, c, d, x, s, t) { - return md5cmn(b ^ c ^ d, a, b, x, s, t) - } - - /** - * Basic operation the algorithm uses. - * - * @param {number} a a - * @param {number} b b - * @param {number} c c - * @param {number} d d - * @param {number} x x - * @param {number} s s - * @param {number} t t - * @returns {number} Result - */ - function md5ii(a, b, c, d, x, s, t) { - return md5cmn(c ^ (b | ~d), a, b, x, s, t) - } - - /** - * Calculate the MD5 of an array of little-endian words, and a bit length. - * - * @param {Array} x Array of little-endian words - * @param {number} len Bit length - * @returns {Array} MD5 Array - */ - function binlMD5(x, len) { - /* append padding */ - x[len >> 5] |= 0x80 << len % 32 - x[(((len + 64) >>> 9) << 4) + 14] = len - - var i - var olda - var oldb - var oldc - var oldd - var a = 1732584193 - var b = -271733879 - var c = -1732584194 - var d = 271733878 - - for (i = 0; i < x.length; i += 16) { - olda = a - oldb = b - oldc = c - oldd = d - - a = md5ff(a, b, c, d, x[i], 7, -680876936) - d = md5ff(d, a, b, c, x[i + 1], 12, -389564586) - c = md5ff(c, d, a, b, x[i + 2], 17, 606105819) - b = md5ff(b, c, d, a, x[i + 3], 22, -1044525330) - a = md5ff(a, b, c, d, x[i + 4], 7, -176418897) - d = md5ff(d, a, b, c, x[i + 5], 12, 1200080426) - c = md5ff(c, d, a, b, x[i + 6], 17, -1473231341) - b = md5ff(b, c, d, a, x[i + 7], 22, -45705983) - a = md5ff(a, b, c, d, x[i + 8], 7, 1770035416) - d = md5ff(d, a, b, c, x[i + 9], 12, -1958414417) - c = md5ff(c, d, a, b, x[i + 10], 17, -42063) - b = md5ff(b, c, d, a, x[i + 11], 22, -1990404162) - a = md5ff(a, b, c, d, x[i + 12], 7, 1804603682) - d = md5ff(d, a, b, c, x[i + 13], 12, -40341101) - c = md5ff(c, d, a, b, x[i + 14], 17, -1502002290) - b = md5ff(b, c, d, a, x[i + 15], 22, 1236535329) - - a = md5gg(a, b, c, d, x[i + 1], 5, -165796510) - d = md5gg(d, a, b, c, x[i + 6], 9, -1069501632) - c = md5gg(c, d, a, b, x[i + 11], 14, 643717713) - b = md5gg(b, c, d, a, x[i], 20, -373897302) - a = md5gg(a, b, c, d, x[i + 5], 5, -701558691) - d = md5gg(d, a, b, c, x[i + 10], 9, 38016083) - c = md5gg(c, d, a, b, x[i + 15], 14, -660478335) - b = md5gg(b, c, d, a, x[i + 4], 20, -405537848) - a = md5gg(a, b, c, d, x[i + 9], 5, 568446438) - d = md5gg(d, a, b, c, x[i + 14], 9, -1019803690) - c = md5gg(c, d, a, b, x[i + 3], 14, -187363961) - b = md5gg(b, c, d, a, x[i + 8], 20, 1163531501) - a = md5gg(a, b, c, d, x[i + 13], 5, -1444681467) - d = md5gg(d, a, b, c, x[i + 2], 9, -51403784) - c = md5gg(c, d, a, b, x[i + 7], 14, 1735328473) - b = md5gg(b, c, d, a, x[i + 12], 20, -1926607734) - - a = md5hh(a, b, c, d, x[i + 5], 4, -378558) - d = md5hh(d, a, b, c, x[i + 8], 11, -2022574463) - c = md5hh(c, d, a, b, x[i + 11], 16, 1839030562) - b = md5hh(b, c, d, a, x[i + 14], 23, -35309556) - a = md5hh(a, b, c, d, x[i + 1], 4, -1530992060) - d = md5hh(d, a, b, c, x[i + 4], 11, 1272893353) - c = md5hh(c, d, a, b, x[i + 7], 16, -155497632) - b = md5hh(b, c, d, a, x[i + 10], 23, -1094730640) - a = md5hh(a, b, c, d, x[i + 13], 4, 681279174) - d = md5hh(d, a, b, c, x[i], 11, -358537222) - c = md5hh(c, d, a, b, x[i + 3], 16, -722521979) - b = md5hh(b, c, d, a, x[i + 6], 23, 76029189) - a = md5hh(a, b, c, d, x[i + 9], 4, -640364487) - d = md5hh(d, a, b, c, x[i + 12], 11, -421815835) - c = md5hh(c, d, a, b, x[i + 15], 16, 530742520) - b = md5hh(b, c, d, a, x[i + 2], 23, -995338651) - - a = md5ii(a, b, c, d, x[i], 6, -198630844) - d = md5ii(d, a, b, c, x[i + 7], 10, 1126891415) - c = md5ii(c, d, a, b, x[i + 14], 15, -1416354905) - b = md5ii(b, c, d, a, x[i + 5], 21, -57434055) - a = md5ii(a, b, c, d, x[i + 12], 6, 1700485571) - d = md5ii(d, a, b, c, x[i + 3], 10, -1894986606) - c = md5ii(c, d, a, b, x[i + 10], 15, -1051523) - b = md5ii(b, c, d, a, x[i + 1], 21, -2054922799) - a = md5ii(a, b, c, d, x[i + 8], 6, 1873313359) - d = md5ii(d, a, b, c, x[i + 15], 10, -30611744) - c = md5ii(c, d, a, b, x[i + 6], 15, -1560198380) - b = md5ii(b, c, d, a, x[i + 13], 21, 1309151649) - a = md5ii(a, b, c, d, x[i + 4], 6, -145523070) - d = md5ii(d, a, b, c, x[i + 11], 10, -1120210379) - c = md5ii(c, d, a, b, x[i + 2], 15, 718787259) - b = md5ii(b, c, d, a, x[i + 9], 21, -343485551) - - a = safeAdd(a, olda) - b = safeAdd(b, oldb) - c = safeAdd(c, oldc) - d = safeAdd(d, oldd) - } - return [a, b, c, d] - } - - /** - * Convert an array of little-endian words to a string - * - * @param {Array} input MD5 Array - * @returns {string} MD5 string - */ - function binl2rstr(input) { - var i - var output = '' - var length32 = input.length * 32 - for (i = 0; i < length32; i += 8) { - output += String.fromCharCode((input[i >> 5] >>> i % 32) & 0xff) - } - return output - } - - /** - * Convert a raw string to an array of little-endian words - * Characters >255 have their high-byte silently ignored. - * - * @param {string} input Raw input string - * @returns {Array} Array of little-endian words - */ - function rstr2binl(input) { - var i - var output = [] - output[(input.length >> 2) - 1] = undefined - for (i = 0; i < output.length; i += 1) { - output[i] = 0 - } - var length8 = input.length * 8 - for (i = 0; i < length8; i += 8) { - output[i >> 5] |= (input.charCodeAt(i / 8) & 0xff) << i % 32 - } - return output - } - - /** - * Calculate the MD5 of a raw string - * - * @param {string} s Input string - * @returns {string} Raw MD5 string - */ - function rstrMD5(s) { - return binl2rstr(binlMD5(rstr2binl(s), s.length * 8)) - } - - /** - * Calculates the HMAC-MD5 of a key and some data (raw strings) - * - * @param {string} key HMAC key - * @param {string} data Raw input string - * @returns {string} Raw MD5 string - */ - function rstrHMACMD5(key, data) { - var i - var bkey = rstr2binl(key) - var ipad = [] - var opad = [] - var hash - ipad[15] = opad[15] = undefined - if (bkey.length > 16) { - bkey = binlMD5(bkey, key.length * 8) - } - for (i = 0; i < 16; i += 1) { - ipad[i] = bkey[i] ^ 0x36363636 - opad[i] = bkey[i] ^ 0x5c5c5c5c - } - hash = binlMD5(ipad.concat(rstr2binl(data)), 512 + data.length * 8) - return binl2rstr(binlMD5(opad.concat(hash), 512 + 128)) - } - - /** - * Convert a raw string to a hex string - * - * @param {string} input Raw input string - * @returns {string} Hex encoded string - */ - function rstr2hex(input) { - var hexTab = '0123456789abcdef' - var output = '' - var x - var i - for (i = 0; i < input.length; i += 1) { - x = input.charCodeAt(i) - output += hexTab.charAt((x >>> 4) & 0x0f) + hexTab.charAt(x & 0x0f) - } - return output - } - - /** - * Encode a string as UTF-8 - * - * @param {string} input Input string - * @returns {string} UTF8 string - */ - function str2rstrUTF8(input) { - return unescape(encodeURIComponent(input)) - } - - /** - * Encodes input string as raw MD5 string - * - * @param {string} s Input string - * @returns {string} Raw MD5 string - */ - function rawMD5(s) { - return rstrMD5(str2rstrUTF8(s)) - } - - /** - * Encodes input string as Hex encoded string - * - * @param {string} s Input string - * @returns {string} Hex encoded string - */ - function hexMD5(s) { - return rstr2hex(rawMD5(s)) - } - - /** - * Calculates the raw HMAC-MD5 for the given key and data - * - * @param {string} k HMAC key - * @param {string} d Input string - * @returns {string} Raw MD5 string - */ - function rawHMACMD5(k, d) { - return rstrHMACMD5(str2rstrUTF8(k), str2rstrUTF8(d)) - } - - /** - * Calculates the Hex encoded HMAC-MD5 for the given key and data - * - * @param {string} k HMAC key - * @param {string} d Input string - * @returns {string} Raw MD5 string - */ - function hexHMACMD5(k, d) { - return rstr2hex(rawHMACMD5(k, d)) - } - - /** - * Calculates MD5 value for a given string. - * If a key is provided, calculates the HMAC-MD5 value. - * Returns a Hex encoded string unless the raw argument is given. - * - * @param {string} string Input string - * @param {string} [key] HMAC key - * @param {boolean} [raw] Raw output switch - * @returns {string} MD5 output - */ - function md5(string, key, raw) { - if (!key) { - if (!raw) { - return hexMD5(string) - } - return rawMD5(string) - } - if (!raw) { - return hexHMACMD5(key, string) - } - return rawHMACMD5(key, string) - } - - if (typeof define === 'function' && define.amd) { - define(function () { - return md5 - }) - } else if (typeof module === 'object' && module.exports) { - module.exports = md5 - } else { - $.md5 = md5 - } -})(this) diff --git a/js/md5.min.js b/js/md5.min.js deleted file mode 100644 index ca3642c..0000000 --- a/js/md5.min.js +++ /dev/null @@ -1,2 +0,0 @@ -!function(n){"use strict";function d(n,t){var r=(65535&n)+(65535&t);return(n>>16)+(t>>16)+(r>>16)<<16|65535&r}function f(n,t,r,e,o,u){return d((u=d(d(t,n),d(e,u)))<>>32-o,r)}function l(n,t,r,e,o,u,c){return f(t&r|~t&e,n,t,o,u,c)}function g(n,t,r,e,o,u,c){return f(t&e|r&~e,n,t,o,u,c)}function v(n,t,r,e,o,u,c){return f(t^r^e,n,t,o,u,c)}function m(n,t,r,e,o,u,c){return f(r^(t|~e),n,t,o,u,c)}function c(n,t){var r,e,o,u;n[t>>5]|=128<>>9<<4)]=t;for(var c=1732584193,f=-271733879,i=-1732584194,a=271733878,h=0;h>5]>>>e%32&255);return t}function a(n){var t=[];for(t[(n.length>>2)-1]=void 0,e=0;e>5]|=(255&n.charCodeAt(e/8))<>>4&15)+r.charAt(15&t);return e}function r(n){return unescape(encodeURIComponent(n))}function o(n){return i(c(a(n=r(n)),8*n.length))}function u(n,t){return function(n,t){var r,e=a(n),o=[],u=[];for(o[15]=u[15]=void 0,163d}".format(count_img)) - file_name = count + os.path.splitext(img)[-1] - save_file_path = os.path.join(file_path, file_name) - if scrambles[count_img -1]: - su = "."+str(img).split(".")[-1] - de_str = str(img).split("/")[-1].replace(su,"==") - blockInt = imageUtils.encodeImage(de_str) - save_file_path = os.path.join(file_path,"scramble="+str(blockInt)+"_"+file_name) - cls.threadDownload(img, save_file_path, fileType="image") - count_img += 1 - return os.path.dirname(save_file_path) - - @classmethod - def downloadComicIcon(cls): - icon_url = comicInfo.getIcon() - if icon_url == None: - print("icon 不存在,已跳过") - return None - save_name = "cover" - icon_su = "."+str(icon_url).split(".")[-1] - icon_su = icon_su.split("?")[0] - #判断漫画名路径是否已存在comicname/cover.jpg, 存在跳过 - pathComicIcon = os.path.join(comicInfo.getDirConfComic(),save_name+icon_su) - if not os.path.exists(pathComicIcon): - cls.download(icon_url, pathComicIcon) - pathCBZComic = comicInfo.getDirCBZComic() - if not os.path.exists(pathCBZComic): - os.makedirs(pathCBZComic) - save_path = os.path.join(pathCBZComic,comicInfo.getChapter()+icon_su) - shutil.copy(pathComicIcon, save_path) - print(f"{pathComicIcon} 已复制至: {save_path}") - comicInfo.nextDownloadToCBZChapter() \ No newline at end of file diff --git a/utils/VerUtils.py b/utils/VerUtils.py deleted file mode 100644 index a875cea..0000000 --- a/utils/VerUtils.py +++ /dev/null @@ -1,60 +0,0 @@ -from utils.HtmlUtils import htmlUtils -from utils.Ntfy import ntfy -from utils.CBZUtils import CBZUtils -from utils.comic.ComicInfo import comicInfo -import json,os - -class verUtils: - @classmethod - def verCBZ(cls,data=None): - len_zip = len(CBZUtils.zip_info()) - 1 - info_data = None - if data == None: - info_path = comicInfo.nextSavePath("done_") - try: - with open(info_path,"r",encoding="utf-8") as fs: - info_data = json.loads(fs.read()) - fs.close() - except: - ntfy.sendMsg("校验失败") - else: - info_data = data - if info_data != None: - if len(info_data) == len_zip: - return True - else: - ntfy.sendMsg("数据不完整,删除配置文件中") - try: - os.remove(comicInfo.nextSavePath("done_")) - ntfy.sendMsg("配置文件删除成功") - except: - ntfy.sendMsg("配置文件删除失败") - return False - else: - ntfy.sendMsg("info_data 为空") - return False - - @classmethod - def verNextCBZ(cls,list_img): - #验证数据是已存在且是否完整 - cbz_path = comicInfo.getDirCBZComicChapter()+".CBZ" - is_next = False - if os.path.exists(cbz_path): - try: - cbz_size = len(CBZUtils.zip_info(cbz_path)) - 1 - except: - cbz_size = 0 - if len(list_img) == cbz_size: - ntfy.sendMsg(f"{comicInfo.getComicName()} {comicInfo.getChapter()} 数据完整,已跳过") - is_next = True - else: - ntfy.sendMsg(f"{comicInfo.getComicName()} {comicInfo.getChapter()} 数据不完整,尝试删除配置CBZ文件后重试") - try: - if cbz_size < len(list_img) or os.path.getsize(cbz_path) < 300000: - ntfy.sendMsg(f"删除 {cbz_path}") - os.remove(cbz_path) - else: - is_next = True - except: - ntfy(f"删除失败 {cbz_path}") - return is_next \ No newline at end of file diff --git a/utils/comic/ComicInfo.py b/utils/comic/ComicInfo.py index 4318c98..1c95971 100644 --- a/utils/comic/ComicInfo.py +++ b/utils/comic/ComicInfo.py @@ -37,6 +37,8 @@ class comicInfo(): str_date_day = None str_page_count = None str_web = None + str_list_img = None + str_files_img = None chapter_node = None comicName_node = None @@ -126,6 +128,22 @@ class comicInfo(): cls.str_web = value cls.web_node = cls.setNodeAndValue(cls.web,value) + @classmethod + def setChapterListImg(cls,value): + cls.str_list_img=value + + @classmethod + def getChapterListImg(cls): + return cls.str_list_img + + @classmethod + def setChapterFilesName(cls,value): + cls.str_files_img=value + + @classmethod + def getChapterFilesName(cls): + return cls.str_files_img + @classmethod def getWeb(cls): return cls.str_web @@ -431,4 +449,9 @@ class comicInfo(): comic_update = data.get(comic_name) if comic_name != None and comic_update == c_update_at: is_update = False - return is_update \ No newline at end of file + return is_update + + @classmethod + def comicChapterDownload(cls,imgs,names): + cls.setChapterListImg(imgs) + cls.setChapterFilesName(names) \ No newline at end of file diff --git a/utils/downloader.py b/utils/downloader.py index 439734f..7d2e159 100644 --- a/utils/downloader.py +++ b/utils/downloader.py @@ -12,6 +12,8 @@ import concurrent.futures import requests import time from utils.Ntfy import ntfy +from utils.comic.ComicInfo import comicInfo +from utils.HtmlUtils import htmlUtils headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", @@ -86,4 +88,61 @@ def download_images(image_urls, dst_dir, file_prefix="img", concurrency=50, time future_list.append(executor.submit( download_image, image_url, dst_dir, file_name, timeout, proxy_type, proxy)) count += 1 - concurrent.futures.wait(future_list, timeout) \ No newline at end of file + concurrent.futures.wait(future_list, timeout) + +def download_comic_icon(): + icon_url = comicInfo.getIcon() + if icon_url == None: + print("icon 不存在,已跳过") + return None + save_name = "cover" + icon_su = "."+str(icon_url).split(".")[-1] + icon_su = icon_su.split("?")[0] + #判断漫画名路径是否已存在comicname/cover.jpg, 存在跳过 + pathComicIcon = os.path.join(comicInfo.getDirConfComic(),save_name+icon_su) + if not os.path.exists(pathComicIcon): + download(icon_url, pathComicIcon) + pathCBZComic = comicInfo.getDirCBZComic() + if not os.path.exists(pathCBZComic): + os.makedirs(pathCBZComic) + save_path = os.path.join(pathCBZComic,comicInfo.getChapter()+icon_su) + shutil.copy(pathComicIcon, save_path) + print(f"{pathComicIcon} 已复制至: {save_path}") + comicInfo.nextDownloadToCBZChapter() + + # 定义下载函数 +def download(url,path,fileType=None): + if os.path.exists(path): + if imghdr.what(path): + msg = "已存在同路径文件,已跳过:"+path + print(msg) + return msg + else: + print("文件已损坏,已重试:"+path) + path = os.path.join(os.path.dirname(path),str(os.path.basename(path)).split("?")[0]) + tmp_file = path+".downloads" + if os.path.exists(tmp_file): + os.remove(tmp_file) + print("存在缓存文件,已删除:",tmp_file) + repair_count = 1 + res = htmlUtils.getBytes(url) + while res.status_code != 200 and repair_count <= 5: + res = htmlUtils.getBytes(url) + print(f'重试:第{repair_count}次 {url}') + repair_count += 1 + #判断是否为图片 + if fileType == "image": + if 'image' not in res.headers.get("content-type",""): + print(f"url= {url} Error: URL doesnot appear to be an image") + basedir= os.path.dirname(path) + if not os.path.exists(basedir): + os.makedirs(basedir) + #expected_length = res.headers.get('Content-Length') + #actual_length = res.raw.tell() + with open(tmp_file, 'wb') as f: + for ch in res: + f.write(ch) + f.close() + shutil.move(tmp_file, path) + print(f"url={url} 保存至:{path}") + return path \ No newline at end of file diff --git a/utils/entity/BaseComicEntity.py b/utils/entity/BaseComicEntity.py new file mode 100644 index 0000000..d02412c --- /dev/null +++ b/utils/entity/BaseComicEntity.py @@ -0,0 +1,181 @@ +import json,os,time,random,shutil +from utils.HtmlUtils import htmlUtils +from utils.ImageUtils import imageUtils +from utils.comic.ComicInfo import comicInfo +from utils.CBZUtils import CBZUtils +from utils.downloader import download_images +from utils.downloader import download_comic_icon +from utils.Ntfy import ntfy +from utils.CBZUtils import verUtils +from utils.entity.down.RouMan import comicCommon + +class baseComic: + count_chapter = 0 + + @classmethod + def downladsComcis(cls,book_name,comic_href,updated=None): + comicInfo.setComicName(book_name) + if updated != None: + comicInfo.setUpdateAt(updated) + random_int = random.randint(5,20) + comicInfo.setComicName(book_name) + dir_conf_comic = comicInfo.getDirConfComic() + if not os.path.exists(dir_conf_comic): + ntfy.sendMsg(f"{random_int}秒后开始下载 漫画:{book_name}") + time.sleep(random_int) + else: + ntfy.sendMsg(f"已存在 漫画:{book_name}") + if comicInfo.isUpdateComic(): + return comic_href + else: + ntfy.sendMsg(f"{book_name} 已是最新") + + #print(books) + #for comicHref in comicsHref: + # cls.oneComic(comicHref,random.uniform(10,20)) + + @classmethod + def oneComic(cls,url,title,author,icon,tags,dep,chapters,chapter_href,alias=None,genre="韩漫",lang="zh",sleep=None): + author = str(author).replace("&",",").replace(" ",",") + comicInfo.setHomePage(url) + comicInfo.setComicName(str(title)) + if alias != None: + comicInfo.setComicNames(title+","+alias) + comicInfo.setAuthor(author) + comicInfo.setIcon(icon) + comicInfo.setTags(tags) + comicInfo.setDep(dep) + #comicInfo.setCBS("韩漫") + comicInfo.setGenre(genre) + comicInfo.setLang(lang) + comicInfo.setListChapter(chapters) + + #comicUtils.setComic(title,alias,icon,author,tags,action,dep,update_date,chapters,chapter_href) + cls.count_chapter = 0 + for href in chapter_href: + chapter = chapters[cls.count_chapter] + comicInfo.setChapterName(chapter) + if not comicInfo.nextExistsGetPath("done_"): + cls.comicChapter(href,scramble=True,sleep=random.randint(5,15)) + #存在就校验CBZ包是否完整 + if comicInfo.nextExistsGetPath("done_"): + verUtils.verCBZ() + cls.count_chapter += 1 + #一本漫画下载后等待 + #清空文件夹 + path_dir_comic = comicInfo.getDirComic() + if os.path.exists(path_dir_comic): + shutil.rmtree(path_dir_comic) + if sleep != None: + time.sleep(sleep) + + ''' + + 读取某章节下所有图片 + ''' + @classmethod + def comicChapter(cls,chapter_url,scramble=None,sleep=None): + is_next = True + #try: + is_next = cls.Onechapter(chapter_url,scramble) + #进入下个阶段 + if comicInfo.nextExistsGetPath("down_"): + #章节图片全部下载后,调用下载封面 + download_comic_icon() + #下个阶段 + if comicInfo.nextExistsGetPath("cbz_"): + time.sleep(0.1) + #下载后自动打包 + is_next = CBZUtils.packAutoComicChapterCBZ() + #完成删除原文件 + remove_path = comicInfo.getDirComicChapter() + if os.path.exists(remove_path): + shutil.rmtree(remove_path) + print(f"文件已删除: {remove_path}") + #except Exception as e: + # print(e) + # ntfy.sendMsg(f"{comicInfo.getComicName()} 下载出错了") + # is_next = False + ntfy.sendMsg(f"预计总章节大小:{cls.count_chapter + 1} / "+ str(comicInfo.getLenChapters())) + if sleep != None and is_next == True: + ntfy.sendMsg(f"{sleep} 秒后开始下载下一个章节") + time.sleep(sleep) + + + @classmethod + def Onechapter(cls,chapter_url,scramble=None): + if not str(chapter_url).startswith("http"): + chapter_url = comicInfo.getBaseUrl() + chapter_url + try: + is_next = cls.comicChapterDownload(chapter_url) + except: + htmlUtils.remove_HtmlCache(chapter_url) + is_next = cls.comicChapterDownload(chapter_url) + comicInfo.nextInfoToImgChapter() + #下载完成后, 开始解密图片 + chapter_dir = comicInfo.getDirComicChapter() + if scramble and os.path.exists(chapter_dir): + #获取章节图片路径 + dirs = os.listdir(chapter_dir) + for img in dirs: + if img.startswith("scramble="): + c_path = os.path.join(chapter_dir, img) + #imageUtils.getScrambleImage(c_path) + imageUtils.encode_scramble_image(c_path) + #进入下一阶段 + comicInfo.nextImgToDownloadChapter() + return is_next + + @classmethod + def comicChapterDownload(cls,url): + comicCommon.comicChapterDownload(url) + list_img = comicInfo.getChapterListImg() + files_name = comicInfo.getChapterFilesName() + #netUtils.downloadComicChapterImages(list_img,scrambles=list_scramble) + chapter_name = comicInfo.getChapter() + book_name = comicInfo.getComicName() + comicInfo.setChapterImgs(list_img) + #保存图像 + comicInfo.nextSaveInfoChapter(chapter_name, list_img) + #验证数据是已存在且是否完整 + cbz_path = comicInfo.getDirCBZComicChapter()+".CBZ" + is_next = True + if os.path.exists(cbz_path): + try: + cbz_size = len(CBZUtils.zip_info(cbz_path)) - 1 + except: + cbz_size = 0 + if len(list_img) == cbz_size: + ntfy.sendMsg(f"{book_name} {chapter_name} 数据完整,已跳过") + comicInfo.nextDoneSave(list_img) + is_next = False + else: + ntfy.sendMsg(f"{book_name} {chapter_name} 数据不完整,尝试删除配置CBZ文件后重试") + try: + if cbz_size < len(list_img) or os.path.getsize(cbz_path) < 300000: + ntfy.sendMsg(f"删除 {cbz_path}") + os.remove(cbz_path) + else: + is_next = False + except: + ntfy(f"删除失败 {cbz_path}") + if is_next: + path_comic_info = comicInfo.getPathComicInfoXML() + if not os.path.exists(path_comic_info): + #print("不存在ComicInfo.xml 生成中...") + comicInfo.setPages(files_name) + comicInfo.writeComicInfoXML(chapter_name) + ntfy.sendMsg(f"{book_name} {chapter_name} 下载中") + is_next = verUtils.verNextCBZ(list_img) + repeat = 0 + while not is_next or repeat <= 3: + download_images(list_img,comicInfo.getDirComicChapter(), filesName=files_name,timeout=180) + file_imgs = os.listdir(comicInfo.getDirComicChapter()) + count_jpg = ",".join(file_imgs).split(".jpg") + is_next = len(count_jpg)-1 == len(list_img) + if not is_next: + sleep_time = 3+int(repeat)*2 + time.sleep(sleep_time) + ntfy.sendMsg(f"下载数据({len(count_jpg)-1}/{len(list_img)})不完整,{sleep_time}秒钟后尝试第{repeat}次") + repeat += 1 + return is_next \ No newline at end of file diff --git a/utils/entity/JMTI.py b/utils/entity/JMTI.py index 262b1a1..7bc725a 100644 --- a/utils/entity/JMTI.py +++ b/utils/entity/JMTI.py @@ -1,18 +1,13 @@ import hashlib import json,os,time,random,shutil import re,math - -import execjs -from utils.NetUtils import netUtils from utils.HtmlUtils import htmlUtils from utils.ImageUtils import imageUtils from utils.comic.ComicInfo import comicInfo from utils.CBZUtils import CBZUtils from utils.downloader import download_images from utils.Ntfy import ntfy -from utils.VerUtils import verUtils from PIL import Image -import get_jm_url class comicEntity: count_chapter = 0 @@ -21,7 +16,7 @@ class comicEntity: @classmethod def baseReUrl(cls,url): - newurl_list=get_jm_url.app() + newurl_list="" if newurl_list: if re.findall(r'https://(.*?)/\w+/\d+/',url)[0] not in newurl_list: for newurl in newurl_list: diff --git a/utils/entity/RouMan.py b/utils/entity/RouMan.py index 1df5354..1531a7e 100644 --- a/utils/entity/RouMan.py +++ b/utils/entity/RouMan.py @@ -1,17 +1,10 @@ -import json,os,time,random,shutil -from utils.NetUtils import netUtils +import json from utils.HtmlUtils import htmlUtils -from utils.ImageUtils import imageUtils from utils.comic.ComicInfo import comicInfo -from utils.CBZUtils import CBZUtils from utils.downloader import download_images -from utils.Ntfy import ntfy -from utils.VerUtils import verUtils +from utils.entity.BaseComicEntity import baseComic class comicEntity: - count_chapter = 0 - - @classmethod def baseComicData(cls,url,update=False): data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url,update=update) @@ -32,27 +25,10 @@ class comicEntity: book_id = book.get("id") book_name = book.get("name") updated = book.get("updatedAt") - comicInfo.setComicName(book_name) - comicInfo.setUpdateAt(updated) comic_href = base_url+"/books/"+book_id - random_int = random.randint(5,20) - comicInfo.setComicName(book_name) - dir_conf_comic = comicInfo.getDirConfComic() - if not os.path.exists(dir_conf_comic): - ntfy.sendMsg(f"{random_int}秒后开始下载 漫画:{book_name}") - time.sleep(random_int) - else: - ntfy.sendMsg(f"已存在 漫画:{book_name}") - if comicInfo.isUpdateComic(): - cls.oneComic(comic_href, random.uniform(0,3)) - comicInfo.updateComicDate() - else: - ntfy.sendMsg(f"{book_name} 已是最新") - - #print(books) - #for comicHref in comicsHref: - # cls.oneComic(comicHref,random.uniform(10,20)) - + href = baseComic.downladsComcis(book_name=book_name,comic_href=comic_href,updated=updated) + cls.oneComic(href) + @classmethod def oneComic(cls,c_url,sleep=None): #漫画名 @@ -69,189 +45,6 @@ class comicEntity: chapters = htmlUtils.xpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()') chapter_href = htmlUtils.xpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/@href') - author = str(author).replace("&",",").replace(" ",",") - comicInfo.setHomePage(c_url) - comicInfo.setComicName(str(title)) - comicInfo.setComicNames(title+","+alias) - comicInfo.setAuthor(author) - comicInfo.setIcon(icon) - comicInfo.setTags(tags) - comicInfo.setDep(dep) - #comicInfo.setCBS("韩漫") - comicInfo.setGenre("韩漫") - comicInfo.setLang("zh") - comicInfo.setComicNames(title+","+alias) - comicInfo.setListChapter(chapters) - - #comicUtils.setComic(title,alias,icon,author,tags,action,dep,update_date,chapters,chapter_href) - cls.count_chapter = 0 - for href in chapter_href: - chapter = chapters[cls.count_chapter] - comicInfo.setChapterName(chapter) - if not comicInfo.nextExistsGetPath("done_"): - comicEntity.comicChapter(href,scramble=True,sleep=random.randint(5,15)) - #存在就校验CBZ包是否完整 - if comicInfo.nextExistsGetPath("done_"): - verUtils.verCBZ() - cls.count_chapter += 1 - #一本漫画下载后等待 - #清空文件夹 - path_dir_comic = comicInfo.getDirComic() - if os.path.exists(path_dir_comic): - shutil.rmtree(path_dir_comic) - if sleep != None: - time.sleep(sleep) - - ''' - - 读取某章节下所有图片 - ''' - @classmethod - def comicChapter(cls,chapter_url,scramble=None,sleep=None): - is_next = True - #try: - is_next = cls.Onechapter(chapter_url,scramble) - #进入下个阶段 - if comicInfo.nextExistsGetPath("down_"): - #章节图片全部下载后,调用下载封面 - netUtils.downloadComicIcon() - #下个阶段 - if comicInfo.nextExistsGetPath("cbz_"): - time.sleep(0.1) - #下载后自动打包 - is_next = CBZUtils.packAutoComicChapterCBZ() - #完成删除原文件 - remove_path = comicInfo.getDirComicChapter() - if os.path.exists(remove_path): - shutil.rmtree(remove_path) - print(f"文件已删除: {remove_path}") - #except Exception as e: - # print(e) - # ntfy.sendMsg(f"{comicInfo.getComicName()} 下载出错了") - # is_next = False - ntfy.sendMsg(f"预计总章节大小:{cls.count_chapter + 1} / "+ str(comicInfo.getLenChapters())) - if sleep != None and is_next == True: - ntfy.sendMsg(f"{sleep} 秒后开始下载下一个章节") - time.sleep(sleep) - - - @classmethod - def Onechapter(cls,chapter_url,scramble=None): - if not str(chapter_url).startswith("http"): - chapter_url = comicInfo.getBaseUrl() + chapter_url - try: - is_next = cls.comicChapterDownload(chapter_url) - except: - htmlUtils.remove_HtmlCache(chapter_url) - is_next = cls.comicChapterDownload(chapter_url) - comicInfo.nextInfoToImgChapter() - #下载完成后, 开始解密图片 - chapter_dir = comicInfo.getDirComicChapter() - if scramble and os.path.exists(chapter_dir): - #获取章节图片路径 - dirs = os.listdir(chapter_dir) - for img in dirs: - if img.startswith("scramble="): - c_path = os.path.join(chapter_dir, img) - #imageUtils.getScrambleImage(c_path) - imageUtils.encode_scramble_image(c_path) - #进入下一阶段 - comicInfo.nextImgToDownloadChapter() - return is_next - - @classmethod - def comicChapterDownload(cls,chapter_url): - x = cls.baseComicData(chapter_url,update=True) - bookName = x.get("bookName") - chapterName = x.get("chapterName") - #fileUtils.saveConfComicChapterInfo(chapterName,x,bookName) - #if comicInfo.nextExistsGetPath("info_"): - # print(f"{bookName} {chapterName} info文件已存在跳过") - alias = x.get("alias") - description = x.get("description") - images = x.get("images") - chapterAPIPath = x.get("chapterAPIPath") - comicInfo.setComicName(bookName) - comicInfo.setChapterName(chapterName) - comicInfo.setDep(description) - - if not chapterAPIPath == None: - chapterAPIPath = str(chapterAPIPath).encode('utf-8').decode('unicode_escape') - base_url = comicInfo.getBaseUrl(chapter_url) - chapterAPIUrl = base_url+chapterAPIPath - ntfy.sendMsg(f"chapterApiUrl= {chapterAPIUrl}",alert=False) - data = htmlUtils.getJSON(chapterAPIUrl) - if data != None: - data = data.get("chapter") - chapterName = data.get("name") - images = data.get("images") - if images == None: - ntfy.sendMsg(f"未获取到章节图像 comic_name={bookName} chapter={chapterName}") - - tags = x.get("tags") - x = tags - count = 1 - list_img = [] - list_scramble = [] - list_fileName = [] - for image in images: - image_src = image.get("src") - scramble = image.get("scramble") - count_image = "{:0>3d}".format(count) - list_img.append(image_src) - image_src_prefix = "."+str(image_src).split(".")[-1] - if scramble: - su = "."+str(image_src).split(".")[-1] - de_str = str(image_src).split("/")[-1].replace(su,"==") - blocks = imageUtils.encodeImage(de_str) - count_image = "scramble="+str(blocks)+"_"+count_image - list_fileName.append(count_image+image_src_prefix) - count+=1 - #print("count_all_img=", count) - #netUtils.downloadComicChapterImages(list_img,scrambles=list_scramble) - comicInfo.setChapterImgs(list_img) - #保存图像 - comicInfo.nextSaveInfoChapter(chapterName, list_img) - #验证数据是已存在且是否完整 - cbz_path = comicInfo.getDirCBZComicChapter()+".CBZ" - is_next = True - if os.path.exists(cbz_path): - try: - cbz_size = len(CBZUtils.zip_info(cbz_path)) - 1 - except: - cbz_size = 0 - if len(list_img) == cbz_size: - ntfy.sendMsg(f"{bookName} {chapterName} 数据完整,已跳过") - comicInfo.nextDoneSave(list_img) - is_next = False - else: - ntfy.sendMsg(f"{bookName} {chapterName} 数据不完整,尝试删除配置CBZ文件后重试") - htmlUtils.remove_HtmlCache(chapter_url) - try: - if cbz_size < len(list_img) or os.path.getsize(cbz_path) < 300000: - ntfy.sendMsg(f"删除 {cbz_path}") - os.remove(cbz_path) - else: - is_next = False - except: - ntfy(f"删除失败 {cbz_path}") - if is_next: - path_comic_info = comicInfo.getPathComicInfoXML() - if not os.path.exists(path_comic_info): - #print("不存在ComicInfo.xml 生成中...") - comicInfo.setPages(list_fileName) - comicInfo.writeComicInfoXML(chapterName) - ntfy.sendMsg(f"{bookName} {chapterName} 下载中") - is_next = verUtils.verNextCBZ(list_img) - repeat = 0 - while not is_next or repeat <= 3: - download_images(list_img,comicInfo.getDirComicChapter(), filesName=list_fileName,timeout=180) - file_imgs = os.listdir(comicInfo.getDirComicChapter()) - count_jpg = ",".join(file_imgs).split(".jpg") - is_next = len(count_jpg)-1 == len(list_img) - if not is_next: - sleep_time = 3+int(repeat)*2 - time.sleep(sleep_time) - ntfy.sendMsg(f"下载数据({len(count_jpg)-1}/{len(list_img)})不完整,{sleep_time}秒钟后尝试第{repeat}次") - repeat += 1 - return is_next \ No newline at end of file + baseComic.oneComic(url=c_url,title=title,author=author, + icon=icon,tags=tags,dep=dep,chapters=chapters,chapter_href=chapter_href, + alias=alias) \ No newline at end of file diff --git a/utils/entity/down/RouMan.py b/utils/entity/down/RouMan.py new file mode 100644 index 0000000..06dd995 --- /dev/null +++ b/utils/entity/down/RouMan.py @@ -0,0 +1,63 @@ +import json +from utils.HtmlUtils import htmlUtils +from utils.ImageUtils import imageUtils +from utils.comic.ComicInfo import comicInfo +from utils.Ntfy import ntfy + +class comicCommon: + @classmethod + def baseComicData(cls,url,update=False): + data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url,update=update) + data = json.loads(data[0]) + data = data.get("props") + x = data.get("pageProps") + return x + + @classmethod + def comicChapterDownload(cls,chapter_url): + x = cls.baseComicData(chapter_url,update=True) + book_name = x.get("bookName") + chapter_name = x.get("chapterName") + #fileUtils.saveConfComicChapterInfo(chapterName,x,bookName) + #if comicInfo.nextExistsGetPath("info_"): + # print(f"{bookName} {chapterName} info文件已存在跳过") + alias = x.get("alias") + description = x.get("description") + images = x.get("images") + chapter_api_path = x.get("chapterAPIPath") + comicInfo.setComicName(book_name) + comicInfo.setChapterName(chapter_name) + comicInfo.setDep(description) + + if chapter_api_path != None: + chapter_api_path = str(chapter_api_path).encode('utf-8').decode('unicode_escape') + base_url = comicInfo.getBaseUrl(chapter_url) + chapter_api_url = base_url+chapter_api_path + ntfy.sendMsg(f"chapterApiUrl= {chapter_api_url}",alert=False) + data = htmlUtils.getJSON(chapter_api_url) + if data != None: + data = data.get("chapter") + chapter_name = data.get("name") + images = data.get("images") + if images == None: + ntfy.sendMsg(f"未获取到章节图像 comic_name={book_name} chapter={chapter_name}") + + count = 1 + list_img = [] + list_file_name = [] + for image in images: + image_src = image.get("src") + scramble = image.get("scramble") + count_image = "{:0>3d}".format(count) + list_img.append(image_src) + image_src_prefix = "."+str(image_src).split(".")[-1] + if scramble: + su = "."+str(image_src).split(".")[-1] + de_str = str(image_src).split("/")[-1].replace(su,"==") + blocks = imageUtils.encodeImage(de_str) + count_image = "scramble="+str(blocks)+"_"+count_image + list_file_name.append(count_image+image_src_prefix) + count+=1 + #print("count_all_img=", count) + #netUtils.downloadComicChapterImages(list_img,scrambles=list_scramble) + comicInfo.comicChapterDownload(list_img,list_file_name) \ No newline at end of file