This commit is contained in:
caiwx86 2023-01-08 17:30:08 +08:00
parent 8739e0f4c8
commit f632ba738f
16 changed files with 1230 additions and 45 deletions

2
get_jm_url.py Normal file
View File

@ -0,0 +1,2 @@
def app():
return ["jmcomic2.onl","jmcomic1.onl","jmcomic.onl","jmcomic.me","jmcomic1.me","18comic.org"]

285
jmdowning.py Normal file
View File

@ -0,0 +1,285 @@
import io
import requests
import time
import os,re
from multiprocessing import Queue
import threading
from threading import Lock
from lxml import etree
import math
import execjs
from PIL import Image
from myran import Myran
import get_jm_url
from utils.ComicInfo import comicInfo
from utils.PathStr import pathStr
from utils.CBZUtils import CBZUtils
from utils.HtmlUtils import htmlUtils
os.environ['EXECJS_RUNTIME'] = "JScript"
class Data:
@classmethod
def oneChapter(cls,*args):
book_name = comicInfo.getComicName()
chapter_name = comicInfo.getChapter()
chapter_href = comicInfo.getWeb()
try:
#print(data[2])
#response = requests.get(url=chapter_href, headers=headers, proxies=proxy)
path_album = os.path.join(pathStr.base_comic_img,book_name)
path_photo = os.path.join(path_album, chapter_name)
# path_img = "path_photo\\%s.jpg" %img_name
with lock: # 判断文件夹是否存在要加锁
if not os.path.exists(path_album): os.makedirs(path_album)
if not os.path.exists(path_photo): os.makedirs(path_photo)
# comicInfo.writeComicInfoXML(data[0],path=path_photo)
cls.parse(chapter_href,path_photo,args[0])
# except requests.exceptions.ConnectionError:
# print("重新抛入queue",data)
# data_queue.put(data)
except Exception as e:
print(e.__traceback__.tb_lineno,e)
print("重新抛入data_queue")
cls.oneChapter(args)
@classmethod
def parse(cls,rsp,path_photo,photoid):
img_list =htmlUtils.xpathData("//div[@class='panel-body']/div/div[contains(@class,'center')]/img",url=rsp)
pages_imgs =htmlUtils.xpathData("//div[@class='center scramble-page']/@id",url=rsp)
comicInfo.setPages(pages_imgs)
comicInfo.writeComicInfoXML(comicInfo.str_chapter,path=path_photo)
#print("img_list:",len(img_list))
for i in img_list:
img_url= i.attrib['data-original']
img_name = os.path.basename(img_url).split('.')[0]
path_img = "%s\\%s.jpg" % (path_photo, img_name)
#print([img_url,photoid,path_img])
down_queue.put([img_url,photoid,path_img])
class Download(threading.Thread):
def __init__(self,thread_name):
super().__init__()
self.thread_name = thread_name
def run(self):
print("%s开始了!"%self.thread_name)
while not down_empty:
try:
print("还剩余%s张图片"%down_queue.qsize())
if not down_queue.empty():
down = down_queue.get(False)
else:
time.sleep(3)
down = down_queue.get(False)
try:
print("down",down)
if not os.path.exists(down[2]):
#scramble_id=220980 网页固定值
if int(down[1])>220980:#albumid>aid就使用拼接函数 否则直接下载
print("拼接图片")
self.pjdown(down[0],down[1],down[2])
else:
print("直接下载图片")
self.dowm_img(down[0],down[2])
except Exception as e:
print(e.__traceback__.tb_lineno,e)
print("重新抛入queue",down)
down_queue.put(down)
except:
pass
def dowm_img(self,url,path_img):
# s=random.choice(list(range(3)))+1+random.random()
# time.sleep(s)
#print("time.sleep=%d"%s)
headers["User_Agent"]=myran.agents()
response = requests.get(url,headers=headers,proxies=proxy)
if response.status_code == 200:
with open(path_img,"wb") as f:
f.write(response.content)
else:print("图片request失败")
def pjdown(self,*args):
imgurl = args[0]
#print(imgurl)
imgpath=args[-1]
# httpproxy_handler = urllib.request.ProxyHandler(proxies=proxy)
# opener = urllib.request.build_opener(httpproxy_handler)
# urlz = urllib.request.Request(imgurl, headers={"User-Agent": myran.agents()})
# im2 = Image.open(opener.open(urlz))
headers["User_Agent"]=myran.agents()
response=requests.get(imgurl, headers=headers,proxies=proxy)
if response.status_code == 200:
im2 = Image.open(io.BytesIO(response.content))
#im2.show()
#print(imgurl, args[1],imgpath, im2)
self.splitimage(imgurl, args[1],imgpath, im2)
def get_md5(self,num):
with open('js/md5.js', 'r') as file:
result = file.read()
context1 = execjs.compile(result)
result1 = context1.call('md5', num)
return result1
def get_num(self,e, t):
#print(type(e),e, type(t),t)
a = 10
try:
num_dict = {}
for i in range(10):
num_dict[i] = i * 2 + 2
if (int(e) >= 268850):
n = str(e) + t;
# switch(n=(n = (n = md5(n)).substr(-1)), n %= 10) {
#print("n=",n)
tmp = ord(self.get_md5(n)[-1])
result = num_dict[tmp % 10]
a = result
return a
except Exception as e:
print(e.__traceback__.tb_lineno,e)
return False
def splitimage(self,src, aid,imgpath,imageob=''):
if imageob == '':
image = Image.open(src)
else:
image = imageob
w, h = image.size
#image.show()
img_name = os.path.basename(src).split('.')[0]
# print(type(aid),type(img_name))
if self.get_num(aid, img_name):
s = self.get_num(aid, img_name) # 随机值
# print(s)
l = h % s # 切割最后多余的值
box_list = []
hz = 0
for i in range(s):
c = math.floor(h / s)
g = i * c
hz += c
h2 = h - c * (i + 1) - l
if i == 0:
c += l;hz += l
else:
g += l
box_list.append((0, h2, w, h - g))
# print(box_list,len(box_list))
item_width = w
# box_list.reverse() #还原切图可以倒序列表
# print(box_list, len(box_list))
newh = 0
image_list = [image.crop(box) for box in box_list]
# print(box_list)
newimage = Image.new("RGB", (w, h))
for image in image_list:
# image.show()
b_w, b_h = image.size
newimage.paste(image, (0, newh))
newh += b_h
newimage.save(imgpath)
down_queue=Queue()
data_empty = False
down_empty = False
lock = Lock()
myran = Myran()
headers = {
#'cookie':'ipcountry=US; AVS=4eb0s4o5ho9hfmp704ge7jtium; ipm5=bb7f6ac39cebfa37e89bd07544c549fd; cover=1; guide=1; __atuvc=12|39,31|40,5|41,0|42,4|43; __atuvs=635cabf67eff0d49003; yuo1={"objName":"hT3l8Pyn15Uf","request_id":0,"zones":[{"idzone":"2967008","here":{}},{"idzone":"2967010","here":{}},{"idzone":"2967010","here":{}},{"idzone":"3597795","sub":"70","here":{}}]}',
#'referer': 'https://18comic.org/',
"User_Agent": myran.agents()
}
proxy = {
# "http":"127.0.0.1:7890",
# "https":"127.0.0.1:7890"
}
def app(url):
try:
global data_empty,down_empty
newurl_list=get_jm_url.app()
response=''
if newurl_list:
if re.findall(r'https://(.*?)/\w+/\d+/',url)[0] not in newurl_list:
for newurl in newurl_list:
url = re.sub(re.findall(r'https://(.*?)/\w+/\d+/', url)[0], newurl, url)
response = requests.get(url=url, headers=headers, proxies=proxy)
break
else:
response = requests.get(url=url, headers=headers, proxies=proxy)
else:
response = requests.get(url=url, headers=headers, proxies=proxy)
if response:
albumid = re.search(r'/album/(\d+)', url).group(1)
referer = re.search(r'(https://\w+\.\w+)/', url).group(1)
print("albumid", albumid, referer, url)
print(response.url)
if response.status_code == 200:
print(response.status_code)
eth = etree.HTML(response.text)
#拿到所有话数
nums = eth.xpath("//div[@class='row']/div[6]/div[1]/div[1]/ul[contains(@class,'btn-toolbar')]/a")
book_name = eth.xpath("//div[@itemprop='name']/h1[@id='book-name']/text()")[0]
book_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\\-\*\<\>]', '', book_name)
tags = eth.xpath("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='tags']/a[@class='btn btn-sm btn-primary']/text()")
author = eth.xpath("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='author']/a[@class='btn btn-sm btn-primary']/text()")
book_msg = eth.xpath("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='p-t-5 p-b-5']/text()")
jmid = book_msg[0]
dep = str(book_msg[1]).replace("叙述:","")
comicInfo.setComicName(book_name)
comicInfo.setAuthor(author)
comicInfo.setDep(dep)
comicInfo.setTags(tags)
comicInfo.setTag(tags)
comicInfo.setCBS("韩漫")
comicInfo.setLang("zh")
if nums:
for i in nums:
photo_name_list = i.xpath("li/text()")[0].split()
photo_date = i.xpath("li/span/text()")[0].split()
#print(re.findall(r'[\u4E00-\u9FA5]+.*?', i.xpath("li/text()")[0]))
try:
if re.findall(r'[\u4E00-\u9FA5]', photo_name_list[2]):
photo_name=re.sub(r'\s','',photo_name_list[0])+' '+photo_name_list[2]
else:photo_name=re.sub(r'\s','',photo_name_list[0])
except Exception as e:
photo_name = re.sub(r'\s', '', photo_name_list[0])
photo_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\\-\*\<\>\-]', '',photo_name)
#print(photo_name)
photoid=i.attrib['data-album']
comicInfo.setChapterName(photo_name)
comicInfo.setDate(photo_date[0],split='-')
comicInfo.setWeb(referer+i.attrib['href'])
Data.oneChapter(photoid)
except Exception as e:
print(e.__traceback__.tb_lineno,e)
startime=time.perf_counter()
while True:
if down_queue.qsize()>100 or time.perf_counter()-startime>10:
break
print('down_queue.qsize():%s'%down_queue.qsize())
down_list=['down下载线程%s'%s for s in list(range(1,40 if down_queue.qsize()>40 else down_queue.qsize()))]
down_thread_list=[]
for i in down_list:
down=Download(i)
down.start()
time.sleep(0.7)
down_thread_list.append(down)
while not down_queue.empty():
pass
down_empty=True
for down_thread in down_thread_list:
down_thread.join()
print("%s结束了!"%down_thread.thread_name)
if __name__ == '__main__':
# os.environ["http_proxy"] = "http://127.0.0.1:7890"
# os.environ["https_proxy"] = "http://127.0.0.1:7890"
app("https://18comic.vip/album/407792/")

408
js/md5.js Normal file
View File

@ -0,0 +1,408 @@
/*
* JavaScript MD5
* https://github.com/blueimp/JavaScript-MD5
*
* Copyright 2011, Sebastian Tschan
* https://blueimp.net
*
* Licensed under the MIT license:
* https://opensource.org/licenses/MIT
*
* Based on
* A JavaScript implementation of the RSA Data Security, Inc. MD5 Message
* Digest Algorithm, as defined in RFC 1321.
* Version 2.2 Copyright (C) Paul Johnston 1999 - 2009
* Other contributors: Greg Holt, Andrew Kepert, Ydnar, Lostinet
* Distributed under the BSD License
* See http://pajhome.org.uk/crypt/md5 for more info.
*/
/* global define */
/* eslint-disable strict */
;(function ($) {
'use strict'
/**
* Add integers, wrapping at 2^32.
* This uses 16-bit operations internally to work around bugs in interpreters.
*
* @param {number} x First integer
* @param {number} y Second integer
* @returns {number} Sum
*/
function safeAdd(x, y) {
var lsw = (x & 0xffff) + (y & 0xffff)
var msw = (x >> 16) + (y >> 16) + (lsw >> 16)
return (msw << 16) | (lsw & 0xffff)
}
/**
* Bitwise rotate a 32-bit number to the left.
*
* @param {number} num 32-bit number
* @param {number} cnt Rotation count
* @returns {number} Rotated number
*/
function bitRotateLeft(num, cnt) {
return (num << cnt) | (num >>> (32 - cnt))
}
/**
* Basic operation the algorithm uses.
*
* @param {number} q q
* @param {number} a a
* @param {number} b b
* @param {number} x x
* @param {number} s s
* @param {number} t t
* @returns {number} Result
*/
function md5cmn(q, a, b, x, s, t) {
return safeAdd(bitRotateLeft(safeAdd(safeAdd(a, q), safeAdd(x, t)), s), b)
}
/**
* Basic operation the algorithm uses.
*
* @param {number} a a
* @param {number} b b
* @param {number} c c
* @param {number} d d
* @param {number} x x
* @param {number} s s
* @param {number} t t
* @returns {number} Result
*/
function md5ff(a, b, c, d, x, s, t) {
return md5cmn((b & c) | (~b & d), a, b, x, s, t)
}
/**
* Basic operation the algorithm uses.
*
* @param {number} a a
* @param {number} b b
* @param {number} c c
* @param {number} d d
* @param {number} x x
* @param {number} s s
* @param {number} t t
* @returns {number} Result
*/
function md5gg(a, b, c, d, x, s, t) {
return md5cmn((b & d) | (c & ~d), a, b, x, s, t)
}
/**
* Basic operation the algorithm uses.
*
* @param {number} a a
* @param {number} b b
* @param {number} c c
* @param {number} d d
* @param {number} x x
* @param {number} s s
* @param {number} t t
* @returns {number} Result
*/
function md5hh(a, b, c, d, x, s, t) {
return md5cmn(b ^ c ^ d, a, b, x, s, t)
}
/**
* Basic operation the algorithm uses.
*
* @param {number} a a
* @param {number} b b
* @param {number} c c
* @param {number} d d
* @param {number} x x
* @param {number} s s
* @param {number} t t
* @returns {number} Result
*/
function md5ii(a, b, c, d, x, s, t) {
return md5cmn(c ^ (b | ~d), a, b, x, s, t)
}
/**
* Calculate the MD5 of an array of little-endian words, and a bit length.
*
* @param {Array} x Array of little-endian words
* @param {number} len Bit length
* @returns {Array<number>} MD5 Array
*/
function binlMD5(x, len) {
/* append padding */
x[len >> 5] |= 0x80 << len % 32
x[(((len + 64) >>> 9) << 4) + 14] = len
var i
var olda
var oldb
var oldc
var oldd
var a = 1732584193
var b = -271733879
var c = -1732584194
var d = 271733878
for (i = 0; i < x.length; i += 16) {
olda = a
oldb = b
oldc = c
oldd = d
a = md5ff(a, b, c, d, x[i], 7, -680876936)
d = md5ff(d, a, b, c, x[i + 1], 12, -389564586)
c = md5ff(c, d, a, b, x[i + 2], 17, 606105819)
b = md5ff(b, c, d, a, x[i + 3], 22, -1044525330)
a = md5ff(a, b, c, d, x[i + 4], 7, -176418897)
d = md5ff(d, a, b, c, x[i + 5], 12, 1200080426)
c = md5ff(c, d, a, b, x[i + 6], 17, -1473231341)
b = md5ff(b, c, d, a, x[i + 7], 22, -45705983)
a = md5ff(a, b, c, d, x[i + 8], 7, 1770035416)
d = md5ff(d, a, b, c, x[i + 9], 12, -1958414417)
c = md5ff(c, d, a, b, x[i + 10], 17, -42063)
b = md5ff(b, c, d, a, x[i + 11], 22, -1990404162)
a = md5ff(a, b, c, d, x[i + 12], 7, 1804603682)
d = md5ff(d, a, b, c, x[i + 13], 12, -40341101)
c = md5ff(c, d, a, b, x[i + 14], 17, -1502002290)
b = md5ff(b, c, d, a, x[i + 15], 22, 1236535329)
a = md5gg(a, b, c, d, x[i + 1], 5, -165796510)
d = md5gg(d, a, b, c, x[i + 6], 9, -1069501632)
c = md5gg(c, d, a, b, x[i + 11], 14, 643717713)
b = md5gg(b, c, d, a, x[i], 20, -373897302)
a = md5gg(a, b, c, d, x[i + 5], 5, -701558691)
d = md5gg(d, a, b, c, x[i + 10], 9, 38016083)
c = md5gg(c, d, a, b, x[i + 15], 14, -660478335)
b = md5gg(b, c, d, a, x[i + 4], 20, -405537848)
a = md5gg(a, b, c, d, x[i + 9], 5, 568446438)
d = md5gg(d, a, b, c, x[i + 14], 9, -1019803690)
c = md5gg(c, d, a, b, x[i + 3], 14, -187363961)
b = md5gg(b, c, d, a, x[i + 8], 20, 1163531501)
a = md5gg(a, b, c, d, x[i + 13], 5, -1444681467)
d = md5gg(d, a, b, c, x[i + 2], 9, -51403784)
c = md5gg(c, d, a, b, x[i + 7], 14, 1735328473)
b = md5gg(b, c, d, a, x[i + 12], 20, -1926607734)
a = md5hh(a, b, c, d, x[i + 5], 4, -378558)
d = md5hh(d, a, b, c, x[i + 8], 11, -2022574463)
c = md5hh(c, d, a, b, x[i + 11], 16, 1839030562)
b = md5hh(b, c, d, a, x[i + 14], 23, -35309556)
a = md5hh(a, b, c, d, x[i + 1], 4, -1530992060)
d = md5hh(d, a, b, c, x[i + 4], 11, 1272893353)
c = md5hh(c, d, a, b, x[i + 7], 16, -155497632)
b = md5hh(b, c, d, a, x[i + 10], 23, -1094730640)
a = md5hh(a, b, c, d, x[i + 13], 4, 681279174)
d = md5hh(d, a, b, c, x[i], 11, -358537222)
c = md5hh(c, d, a, b, x[i + 3], 16, -722521979)
b = md5hh(b, c, d, a, x[i + 6], 23, 76029189)
a = md5hh(a, b, c, d, x[i + 9], 4, -640364487)
d = md5hh(d, a, b, c, x[i + 12], 11, -421815835)
c = md5hh(c, d, a, b, x[i + 15], 16, 530742520)
b = md5hh(b, c, d, a, x[i + 2], 23, -995338651)
a = md5ii(a, b, c, d, x[i], 6, -198630844)
d = md5ii(d, a, b, c, x[i + 7], 10, 1126891415)
c = md5ii(c, d, a, b, x[i + 14], 15, -1416354905)
b = md5ii(b, c, d, a, x[i + 5], 21, -57434055)
a = md5ii(a, b, c, d, x[i + 12], 6, 1700485571)
d = md5ii(d, a, b, c, x[i + 3], 10, -1894986606)
c = md5ii(c, d, a, b, x[i + 10], 15, -1051523)
b = md5ii(b, c, d, a, x[i + 1], 21, -2054922799)
a = md5ii(a, b, c, d, x[i + 8], 6, 1873313359)
d = md5ii(d, a, b, c, x[i + 15], 10, -30611744)
c = md5ii(c, d, a, b, x[i + 6], 15, -1560198380)
b = md5ii(b, c, d, a, x[i + 13], 21, 1309151649)
a = md5ii(a, b, c, d, x[i + 4], 6, -145523070)
d = md5ii(d, a, b, c, x[i + 11], 10, -1120210379)
c = md5ii(c, d, a, b, x[i + 2], 15, 718787259)
b = md5ii(b, c, d, a, x[i + 9], 21, -343485551)
a = safeAdd(a, olda)
b = safeAdd(b, oldb)
c = safeAdd(c, oldc)
d = safeAdd(d, oldd)
}
return [a, b, c, d]
}
/**
* Convert an array of little-endian words to a string
*
* @param {Array<number>} input MD5 Array
* @returns {string} MD5 string
*/
function binl2rstr(input) {
var i
var output = ''
var length32 = input.length * 32
for (i = 0; i < length32; i += 8) {
output += String.fromCharCode((input[i >> 5] >>> i % 32) & 0xff)
}
return output
}
/**
* Convert a raw string to an array of little-endian words
* Characters >255 have their high-byte silently ignored.
*
* @param {string} input Raw input string
* @returns {Array<number>} Array of little-endian words
*/
function rstr2binl(input) {
var i
var output = []
output[(input.length >> 2) - 1] = undefined
for (i = 0; i < output.length; i += 1) {
output[i] = 0
}
var length8 = input.length * 8
for (i = 0; i < length8; i += 8) {
output[i >> 5] |= (input.charCodeAt(i / 8) & 0xff) << i % 32
}
return output
}
/**
* Calculate the MD5 of a raw string
*
* @param {string} s Input string
* @returns {string} Raw MD5 string
*/
function rstrMD5(s) {
return binl2rstr(binlMD5(rstr2binl(s), s.length * 8))
}
/**
* Calculates the HMAC-MD5 of a key and some data (raw strings)
*
* @param {string} key HMAC key
* @param {string} data Raw input string
* @returns {string} Raw MD5 string
*/
function rstrHMACMD5(key, data) {
var i
var bkey = rstr2binl(key)
var ipad = []
var opad = []
var hash
ipad[15] = opad[15] = undefined
if (bkey.length > 16) {
bkey = binlMD5(bkey, key.length * 8)
}
for (i = 0; i < 16; i += 1) {
ipad[i] = bkey[i] ^ 0x36363636
opad[i] = bkey[i] ^ 0x5c5c5c5c
}
hash = binlMD5(ipad.concat(rstr2binl(data)), 512 + data.length * 8)
return binl2rstr(binlMD5(opad.concat(hash), 512 + 128))
}
/**
* Convert a raw string to a hex string
*
* @param {string} input Raw input string
* @returns {string} Hex encoded string
*/
function rstr2hex(input) {
var hexTab = '0123456789abcdef'
var output = ''
var x
var i
for (i = 0; i < input.length; i += 1) {
x = input.charCodeAt(i)
output += hexTab.charAt((x >>> 4) & 0x0f) + hexTab.charAt(x & 0x0f)
}
return output
}
/**
* Encode a string as UTF-8
*
* @param {string} input Input string
* @returns {string} UTF8 string
*/
function str2rstrUTF8(input) {
return unescape(encodeURIComponent(input))
}
/**
* Encodes input string as raw MD5 string
*
* @param {string} s Input string
* @returns {string} Raw MD5 string
*/
function rawMD5(s) {
return rstrMD5(str2rstrUTF8(s))
}
/**
* Encodes input string as Hex encoded string
*
* @param {string} s Input string
* @returns {string} Hex encoded string
*/
function hexMD5(s) {
return rstr2hex(rawMD5(s))
}
/**
* Calculates the raw HMAC-MD5 for the given key and data
*
* @param {string} k HMAC key
* @param {string} d Input string
* @returns {string} Raw MD5 string
*/
function rawHMACMD5(k, d) {
return rstrHMACMD5(str2rstrUTF8(k), str2rstrUTF8(d))
}
/**
* Calculates the Hex encoded HMAC-MD5 for the given key and data
*
* @param {string} k HMAC key
* @param {string} d Input string
* @returns {string} Raw MD5 string
*/
function hexHMACMD5(k, d) {
return rstr2hex(rawHMACMD5(k, d))
}
/**
* Calculates MD5 value for a given string.
* If a key is provided, calculates the HMAC-MD5 value.
* Returns a Hex encoded string unless the raw argument is given.
*
* @param {string} string Input string
* @param {string} [key] HMAC key
* @param {boolean} [raw] Raw output switch
* @returns {string} MD5 output
*/
function md5(string, key, raw) {
if (!key) {
if (!raw) {
return hexMD5(string)
}
return rawMD5(string)
}
if (!raw) {
return hexHMACMD5(key, string)
}
return rawHMACMD5(key, string)
}
if (typeof define === 'function' && define.amd) {
define(function () {
return md5
})
} else if (typeof module === 'object' && module.exports) {
module.exports = md5
} else {
$.md5 = md5
}
})(this)

2
js/md5.min.js vendored Normal file
View File

@ -0,0 +1,2 @@
!function(n){"use strict";function d(n,t){var r=(65535&n)+(65535&t);return(n>>16)+(t>>16)+(r>>16)<<16|65535&r}function f(n,t,r,e,o,u){return d((u=d(d(t,n),d(e,u)))<<o|u>>>32-o,r)}function l(n,t,r,e,o,u,c){return f(t&r|~t&e,n,t,o,u,c)}function g(n,t,r,e,o,u,c){return f(t&e|r&~e,n,t,o,u,c)}function v(n,t,r,e,o,u,c){return f(t^r^e,n,t,o,u,c)}function m(n,t,r,e,o,u,c){return f(r^(t|~e),n,t,o,u,c)}function c(n,t){var r,e,o,u;n[t>>5]|=128<<t%32,n[14+(t+64>>>9<<4)]=t;for(var c=1732584193,f=-271733879,i=-1732584194,a=271733878,h=0;h<n.length;h+=16)c=l(r=c,e=f,o=i,u=a,n[h],7,-680876936),a=l(a,c,f,i,n[h+1],12,-389564586),i=l(i,a,c,f,n[h+2],17,606105819),f=l(f,i,a,c,n[h+3],22,-1044525330),c=l(c,f,i,a,n[h+4],7,-176418897),a=l(a,c,f,i,n[h+5],12,1200080426),i=l(i,a,c,f,n[h+6],17,-1473231341),f=l(f,i,a,c,n[h+7],22,-45705983),c=l(c,f,i,a,n[h+8],7,1770035416),a=l(a,c,f,i,n[h+9],12,-1958414417),i=l(i,a,c,f,n[h+10],17,-42063),f=l(f,i,a,c,n[h+11],22,-1990404162),c=l(c,f,i,a,n[h+12],7,1804603682),a=l(a,c,f,i,n[h+13],12,-40341101),i=l(i,a,c,f,n[h+14],17,-1502002290),c=g(c,f=l(f,i,a,c,n[h+15],22,1236535329),i,a,n[h+1],5,-165796510),a=g(a,c,f,i,n[h+6],9,-1069501632),i=g(i,a,c,f,n[h+11],14,643717713),f=g(f,i,a,c,n[h],20,-373897302),c=g(c,f,i,a,n[h+5],5,-701558691),a=g(a,c,f,i,n[h+10],9,38016083),i=g(i,a,c,f,n[h+15],14,-660478335),f=g(f,i,a,c,n[h+4],20,-405537848),c=g(c,f,i,a,n[h+9],5,568446438),a=g(a,c,f,i,n[h+14],9,-1019803690),i=g(i,a,c,f,n[h+3],14,-187363961),f=g(f,i,a,c,n[h+8],20,1163531501),c=g(c,f,i,a,n[h+13],5,-1444681467),a=g(a,c,f,i,n[h+2],9,-51403784),i=g(i,a,c,f,n[h+7],14,1735328473),c=v(c,f=g(f,i,a,c,n[h+12],20,-1926607734),i,a,n[h+5],4,-378558),a=v(a,c,f,i,n[h+8],11,-2022574463),i=v(i,a,c,f,n[h+11],16,1839030562),f=v(f,i,a,c,n[h+14],23,-35309556),c=v(c,f,i,a,n[h+1],4,-1530992060),a=v(a,c,f,i,n[h+4],11,1272893353),i=v(i,a,c,f,n[h+7],16,-155497632),f=v(f,i,a,c,n[h+10],23,-1094730640),c=v(c,f,i,a,n[h+13],4,681279174),a=v(a,c,f,i,n[h],11,-358537222),i=v(i,a,c,f,n[h+3],16,-722521979),f=v(f,i,a,c,n[h+6],23,76029189),c=v(c,f,i,a,n[h+9],4,-640364487),a=v(a,c,f,i,n[h+12],11,-421815835),i=v(i,a,c,f,n[h+15],16,530742520),c=m(c,f=v(f,i,a,c,n[h+2],23,-995338651),i,a,n[h],6,-198630844),a=m(a,c,f,i,n[h+7],10,1126891415),i=m(i,a,c,f,n[h+14],15,-1416354905),f=m(f,i,a,c,n[h+5],21,-57434055),c=m(c,f,i,a,n[h+12],6,1700485571),a=m(a,c,f,i,n[h+3],10,-1894986606),i=m(i,a,c,f,n[h+10],15,-1051523),f=m(f,i,a,c,n[h+1],21,-2054922799),c=m(c,f,i,a,n[h+8],6,1873313359),a=m(a,c,f,i,n[h+15],10,-30611744),i=m(i,a,c,f,n[h+6],15,-1560198380),f=m(f,i,a,c,n[h+13],21,1309151649),c=m(c,f,i,a,n[h+4],6,-145523070),a=m(a,c,f,i,n[h+11],10,-1120210379),i=m(i,a,c,f,n[h+2],15,718787259),f=m(f,i,a,c,n[h+9],21,-343485551),c=d(c,r),f=d(f,e),i=d(i,o),a=d(a,u);return[c,f,i,a]}function i(n){for(var t="",r=32*n.length,e=0;e<r;e+=8)t+=String.fromCharCode(n[e>>5]>>>e%32&255);return t}function a(n){var t=[];for(t[(n.length>>2)-1]=void 0,e=0;e<t.length;e+=1)t[e]=0;for(var r=8*n.length,e=0;e<r;e+=8)t[e>>5]|=(255&n.charCodeAt(e/8))<<e%32;return t}function e(n){for(var t,r="0123456789abcdef",e="",o=0;o<n.length;o+=1)t=n.charCodeAt(o),e+=r.charAt(t>>>4&15)+r.charAt(15&t);return e}function r(n){return unescape(encodeURIComponent(n))}function o(n){return i(c(a(n=r(n)),8*n.length))}function u(n,t){return function(n,t){var r,e=a(n),o=[],u=[];for(o[15]=u[15]=void 0,16<e.length&&(e=c(e,8*n.length)),r=0;r<16;r+=1)o[r]=909522486^e[r],u[r]=1549556828^e[r];return t=c(o.concat(a(t)),512+8*t.length),i(c(u.concat(t),640))}(r(n),r(t))}function t(n,t,r){return t?r?u(t,n):e(u(t,n)):r?o(n):e(o(n))}"function"==typeof define&&define.amd?define(function(){return t}):"object"==typeof module&&module.exports?module.exports=t:n.md5=t}(this);
//# sourceMappingURL=md5.min.jsmd5.map

70
js/md5.min.js.map Normal file

File diff suppressed because one or more lines are too long

13
main.py
View File

@ -1,15 +1,16 @@
from utils.entity.RouMan import comicEntity
from utils.entity.JMTI import comicEntity
from utils.comic.ComicInfo import comicInfo
import os,shutil
def comics():
for x in range(0,30):
comicEntity.downladsComcis("https://rm01.xyz/books?&page="+str(x))
for x in range(1,2):
comicEntity.downladsComcis("https://jmcomic.moe/albums/hanman?page="+str(x))
if __name__ == '__main__':
# os.environ["http_proxy"] = "http://127.0.0.1:7890"
# os.environ["https_proxy"] = "http://127.0.0.1:7890"
# os.environ["http_proxy"] = "http://127.0.0.1:7890"
# os.environ["https_proxy"] = "http://127.0.0.1:7890"
# url = "https://rm01.xyz/books/3700b70b-c5a9-4328-9ae3-cffda6a77339/15"
# comicEntity.comicChapter(url,scramble=True)
# comicEntity.oneComic("https://rm01.xyz/books/c94f80c1-a673-4c74-bb5e-ad5ac7dd766b")
comics()
# comicEntity.oneComic("https://jmcomic2.onl/album/403327/")
comicEntity.downladsComcis("https://jmcomic2.onl/albums/hanman?shunt=2")

32
myran.py Normal file
View File

@ -0,0 +1,32 @@
import random
class Myran:
# 随机取出头文件
def agents(self):
browser_typelist = ['Windows NT 10.0; Win64; x64', 'Windows NT 10.0', 'Windows NT 6.3; Win64; x64',
'Windows NT 6.3', 'Windows NT 6.2; Win64; x64', 'Windows NT 6.2',
'Windows NT 6.1; Win64; x64', 'Windows NT 6.1']
os_versionlist = ['106.0.5249.36', '85.0.4183.34', '56.0.2924.26', '85.0.4183.84', '65.0.3325.65',
'84.0.4147.136', '76.0.3809.102', '93.0.4577.69', '91.0.4472.66', '105.0.5195.112',
'103.0.5060.132', '104.0.5112.110', '91.0.4472.81', '69.0.3497.35', '106.0.5239.0',
'91.0.4456.0', '70.0.3532.8', '91.0.4472.167', '77.0.3865.105', '106.0.5249.12',
'93.0.4577.10', '86.0.4240.199', '98.0.4758.107', '75.0.3770.102', '92.0.4515.162',
'105.0.5195.24', '56.0.2924.110', '58.0.3029.89', '98.0.4758.79', '93.0.4577.95',
'90.0.4430.51', '90.0.4430.218', '84.0.4147.21', '86.0.4240.15', '58.0.3029.140',
'92.0.4515.157', '69.0.3497.120', '91.0.4472.10', '89.0.4389.116', '72.0.3626.49',
'65.0.3325.167', '85.0.4183.134', '65.0.3325.209', '75.0.3770.144', '69.0.3497.95',
'56.0.2924.87', '72.0.3626.97', '103.0.5060.53', '93.0.4577.36', '103.0.5060.15',
'107.0.5283.0', '77.0.3865.35', '105.0.5195.134', '76.0.3809.136', '72.0.3626.122',
'90.0.4430.73', '86.0.4240.77', '104.0.5112.83', '77.0.3865.120', '77.0.3865.93',
'107.0.5304.10', '84.0.4147.94', '75.0.3770.19', '106.0.5249.49', '98.0.4758.0',
'98.0.4758.14', '76.0.3809.38', '105.0.5195.42', '58.0.3029.31']
browser_type = random.choice(browser_typelist)
os_version = random.choice(os_versionlist)
ugt = "Mozilla/5.0 (%s) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36" % (
browser_type, os_version)
return ugt
myran = Myran()

View File

@ -60,6 +60,7 @@ class CBZUtils:
size_imgs = len(comicInfo.getChapterImgs())
if size_zip -1 == size_imgs:
ntfy.sendMsg(f"打包校验成功: {packCBZ_path}")
comicInfo.nextCBZToDoneChapter()
return True
else:
ntfy.sendMsg(f"打包检验不完整:{packCBZ_path}")
@ -68,7 +69,6 @@ class CBZUtils:
except:
print("")
#ntfy.sendMsg("CBZ打包失败")
comicInfo.nextCBZToDoneChapter()
@classmethod
def zip_info(cls,zip_path=None):

View File

@ -5,18 +5,29 @@ import traceback
import time
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
from myran import Myran
from utils.Ntfy import ntfy
import re
from utils.comic.PathStr import pathStr
class htmlUtils:
headers = {'User-Agent': UserAgent().random}
myran = Myran()
headers = {
#'cookie':'ipcountry=US; AVS=4eb0s4o5ho9hfmp704ge7jtium; ipm5=bb7f6ac39cebfa37e89bd07544c549fd; cover=1; guide=1; __atuvc=12|39,31|40,5|41,0|42,4|43; __atuvs=635cabf67eff0d49003; yuo1={"objName":"hT3l8Pyn15Uf","request_id":0,"zones":[{"idzone":"2967008","here":{}},{"idzone":"2967010","here":{}},{"idzone":"2967010","here":{}},{"idzone":"3597795","sub":"70","here":{}}]}',
#'referer': 'https://18comic.org/',
"User_Agent": myran.agents()
}
# headers = {'User-Agent': UserAgent().random}
url_data = {}
@classmethod
def getPathSaveHtml(cls,url,type=None):
rstr = r"[\/\\\:\*\?\"\<\>\|\.]" #  '/ \ : * ? " < > |'
file_url = re.sub(rstr, "", url)
try:
file_url = re.sub(rstr, "", url)
except:
file_url = "error_cache"
file_path = os.path.join(pathStr.base_html_cache,file_url)
if type == "new":
return file_path
@ -64,7 +75,7 @@ class htmlUtils:
if count <= 3:
try:
print(f"请求地址:{curl}")
res = s.get(curl,stream=True, headers=cls.headers, timeout=180)
res = s.get(curl,stream=True, headers=cls.headers, timeout=1000)
if type == "bytes":
url_text = res
if type == "json":
@ -74,6 +85,8 @@ class htmlUtils:
if type == None:
url_text = html.fromstring(res.text)
cls.saveHtml(curl,res.text)
# if res.status_code != 200:
# cls.remove_HtmlCache(curl)
except:
print(f'Retry! 第{count}')
time.sleep(1)

View File

@ -20,6 +20,7 @@ class netUtils:
return msg
else:
print("文件已损坏,已重试:"+path)
path = os.path.join(os.path.dirname(path),str(os.path.basename(path)).split("?")[0])
tmp_file = path+".downloads"
if os.path.exists(tmp_file):
os.remove(tmp_file)
@ -81,6 +82,7 @@ class netUtils:
return None
save_name = "cover"
icon_su = "."+str(icon_url).split(".")[-1]
icon_su = icon_su.split("?")[0]
#判断漫画名路径是否已存在comicname/cover.jpg, 存在跳过
pathComicIcon = os.path.join(comicInfo.getDirConfComic(),save_name+icon_su)
if not os.path.exists(pathComicIcon):

View File

@ -1,3 +1,4 @@
from utils.HtmlUtils import htmlUtils
from utils.Ntfy import ntfy
from utils.CBZUtils import CBZUtils
from utils.comic.ComicInfo import comicInfo
@ -31,4 +32,29 @@ class verUtils:
return False
else:
ntfy.sendMsg("info_data 为空")
return False
return False
@classmethod
def verNextCBZ(cls,list_img):
#验证数据是已存在且是否完整
cbz_path = comicInfo.getDirCBZComicChapter()+".CBZ"
is_next = False
if os.path.exists(cbz_path):
try:
cbz_size = len(CBZUtils.zip_info(cbz_path)) - 1
except:
cbz_size = 0
if len(list_img) == cbz_size:
ntfy.sendMsg(f"{comicInfo.getComicName()} {comicInfo.getChapter()} 数据完整,已跳过")
is_next = True
else:
ntfy.sendMsg(f"{comicInfo.getComicName()} {comicInfo.getChapter()} 数据不完整尝试删除配置CBZ文件后重试")
try:
if cbz_size < len(list_img) or os.path.getsize(cbz_path) < 300000:
ntfy.sendMsg(f"删除 {cbz_path}")
os.remove(cbz_path)
else:
is_next = True
except:
ntfy(f"删除失败 {cbz_path}")
return is_next

View File

@ -17,8 +17,14 @@ class comicInfo():
cbs = "Publisher"
lang = "LanguageISO"
comic_names = "SeriesGroup"
tags = "Tag"
tags = "Tags"
date_year = "Year"
date_month = "Month"
date_day = "Day"
page_count = "PageCount"
pages = "Pages"
web = "Web"
str_comicName = None
str_chapter = None
str_icon = None
@ -26,7 +32,12 @@ class comicInfo():
str_listChapter = None
str_chapter_imgs = None
str_updateAt = None
str_date_year = None
str_date_month = None
str_date_day = None
str_page_count = None
str_web = None
chapter_node = None
comicName_node = None
dep_node = None
@ -36,12 +47,18 @@ class comicInfo():
lang_node = None
comicNames_node = None
tags_node = None
date_year_node = None
date_month_node = None
date_day_node = None
page_count_node = None
pages_node = None
web_node = None
@classmethod
def setNodeAndValue(cls,node,value):
if not value == None:
if value != None:
c_node = cls.document.createElement(node)
node_text = cls.document.createTextNode(value)
node_text = cls.document.createTextNode(str(value).replace("\n",""))
c_node.appendChild(node_text)
return c_node
return None
@ -86,7 +103,7 @@ class comicInfo():
@classmethod
def setAuthor(cls,value):
cls.author_node = cls.setNodeAndValue(cls.author,value)
cls.author_node = cls.setNodeAndValue(cls.author,cls.getListToString(value))
@classmethod
def setLang(cls,value):
@ -94,16 +111,55 @@ class comicInfo():
@classmethod
def setTag(cls,value):
cls.tag_node = cls.setNodeAndValue(cls.tag, value)
cls.tag_node = cls.setNodeAndValue(cls.tag, cls.getListToString(value))
@classmethod
def setTags(cls,value):
cls.tags_node = cls.setNodeAndValue(cls.tags,value)
cls.tags_node = cls.setNodeAndValue(cls.tags,cls.getListToString(value))
@classmethod
def setCBS(cls,value):
cls.cbs_node = cls.setNodeAndValue(cls.cbs,value)
@classmethod
def setWeb(cls,value):
cls.str_web = value
cls.web_node = cls.setNodeAndValue(cls.web,value)
@classmethod
def getWeb(cls):
return cls.str_web
@classmethod
def setPageCount(cls,value):
cls.str_page_count = value
cls.page_count_node = cls.setNodeAndValue(cls.page_count,value)
@classmethod
def setPages(cls,value):
if value != None:
su = "."+str(value[0]).split(".")[-1]
join_list=",".join(value).replace(su,"")
value = join_list.split(",")
cls.setPageCount(str(len(value)))
root_node = cls.document.createElement(cls.pages)
for page in value:
c_node = cls.document.createElement("Page")
c_node.setAttribute("Image",page)
root_node.appendChild(c_node)
cls.pages_node = root_node
@classmethod
def setDate(cls,value,split):
values = str(value).split(split)
cls.str_date_year = values[0]
cls.str_date_month = values[1]
cls.str_date_day = values[2]
cls.date_year_node = cls.setNodeAndValue(cls.date_year,values[0])
cls.date_month_node = cls.setNodeAndValue(cls.date_month,values[1])
cls.date_day_node = cls.setNodeAndValue(cls.date_day,values[2])
@classmethod
def setIcon(cls,value):
cls.str_icon = value
@ -124,7 +180,14 @@ class comicInfo():
@classmethod
def getUpdateAt(cls):
return cls.str_updateAt
@classmethod
def getListToString(cls,to_list):
value = to_list
if isinstance(to_list,list):
value = ",".join(to_list)
return value
'''
获取网站主页
'''
@ -207,39 +270,40 @@ class comicInfo():
return cls.pathComicInfo
@classmethod
def writeComicInfoXML(cls,chapter):
def writeComicInfoXML(cls,chapter,path=None):
if cls.chapter == cls.fixFileName(chapter):
print(f"cls.chapter {cls.chapter} 与 chapter {chapter} 不相等,已自动跳过")
root = cls.Root()
newDocument = Document()
newDocument.appendChild(root)
if not cls.chapter_node == None:
root.appendChild(cls.chapter_node)
if not cls.comicName_node == None:
root.appendChild(cls.comicName_node)
if not cls.dep_node == None:
root.appendChild(cls.dep_node)
if not cls.author_node == None:
root.appendChild(cls.author_node)
if not cls.tag_node == None:
root.appendChild(cls.tag_node)
if not cls.cbs_node == None:
root.appendChild(cls.cbs_node)
if not cls.lang_node == None:
root.appendChild(cls.lang_node)
if not cls.comicNames_node == None:
root.appendChild(cls.comicNames_node)
if not cls.tags_node == None:
root.appendChild(cls.tags_node)
if cls.chapter_node != None: root.appendChild(cls.chapter_node)
if cls.comicName_node != None: root.appendChild(cls.comicName_node)
if cls.dep_node != None: root.appendChild(cls.dep_node)
if cls.author_node != None: root.appendChild(cls.author_node)
if cls.tag_node != None: root.appendChild(cls.tag_node)
if cls.cbs_node != None: root.appendChild(cls.cbs_node)
if cls.lang_node != None: root.appendChild(cls.lang_node)
if cls.comicNames_node != None: root.appendChild(cls.comicNames_node)
if cls.tags_node != None: root.appendChild(cls.tags_node)
if cls.date_year_node != None: root.appendChild(cls.date_year_node)
if cls.date_month_node != None: root.appendChild(cls.date_month_node)
if cls.date_day_node != None: root.appendChild(cls.date_day_node)
if cls.page_count_node != None: root.appendChild(cls.page_count_node)
if cls.pages_node != None: root.appendChild(cls.pages_node)
cls.getPathComicInfoXML()
if path != None:
cls.pathComicInfo = os.path.join(path,"ComicInfo.xml")
base_dir = os.path.dirname(cls.pathComicInfo)
if not os.path.exists(base_dir):
os.makedirs(base_dir)
if os.path.exists(cls.pathComicInfo):
print("ComicInfo.xml 已存在")
return None
with open(cls.pathComicInfo , "w", encoding="utf-8") as fo:
newDocument.writexml(fo, indent='', addindent='\t', newl='\n', encoding="utf-8")
fo.close()
#print("ComicInfo.xml 已生成 pathd=", cls.pathComicInfo)
print("ComicInfo.xml 已生成 pathd=", cls.pathComicInfo)
#文件保存
@classmethod

View File

@ -2,7 +2,7 @@ import os,datetime
from time import strftime
class pathStr:
#base_comic_out = "COMICOUT"
base_comic_out = os.path.join("/mnt", "bigTComics")
base_comic_out = os.path.join("/mnt", "bigTComics","JM")
base_CBZ = os.path.join(base_comic_out,"CBZ")
base_comic_img = os.path.join(base_comic_out,"outputComic")
base_conf_path = os.path.join(base_comic_out,".conf")

View File

@ -33,13 +33,16 @@ def download_image(image_url, dst_dir, file_name, timeout=20, proxy_type=None, p
response = None
file_path = os.path.join(dst_dir, file_name)
if os.path.exists(file_path):
print("文件已存在,已跳过=",file_path)
return None
temp_path = os.path.join(dst_dir, file_name+".downloads")
repair_count = 1
try:
response = requests.get(
image_url, headers=headers, timeout=timeout, proxies=proxies)
while response.status_code != 200 and repair_count <= 5:
time.sleep(3)
time.sleep(0.7)
download_image(image_url,dst_dir,file_name)
ntfy.sendMsg(f'重试:第{repair_count}{image_url}')
repair_count += 1
@ -83,4 +86,4 @@ def download_images(image_urls, dst_dir, file_prefix="img", concurrency=50, time
future_list.append(executor.submit(
download_image, image_url, dst_dir, file_name, timeout, proxy_type, proxy))
count += 1
concurrent.futures.wait(future_list, timeout=360)
concurrent.futures.wait(future_list, timeout)

277
utils/entity/JMTI.py Normal file
View File

@ -0,0 +1,277 @@
import hashlib
import json,os,time,random,shutil
import re,math
import execjs
from utils.NetUtils import netUtils
from utils.HtmlUtils import htmlUtils
from utils.ImageUtils import imageUtils
from utils.comic.ComicInfo import comicInfo
from utils.CBZUtils import CBZUtils
from utils.downloader import download_images
from utils.Ntfy import ntfy
from utils.VerUtils import verUtils
from PIL import Image
class comicEntity:
count_chapter = 0
aid = None
repeat = 0
@classmethod
def downladsComcis(cls,url):
#漫画名
comic_href_list = htmlUtils.xpathData("//div[@class='thumb-overlay-albums']/a/@href",url,update=True)
comics_name = htmlUtils.xpathData("//span[@class='video-title title-truncate m-t-5']/text()")
len_books = len(comic_href_list)
base_url = comicInfo.getBaseUrl(url)
for x in range(0, len_books):
book_name = comics_name[x]
comicInfo.setComicName(book_name)
comic_href = base_url+comic_href_list[x]
random_int = random.randint(5,20)
dir_conf_comic = comicInfo.getDirConfComic()
if not os.path.exists(dir_conf_comic):
ntfy.sendMsg(f"{random_int}秒后开始下载 漫画:{book_name}")
time.sleep(random_int)
else:
ntfy.sendMsg(f"已存在 漫画:{book_name}")
cls.oneComic(comic_href, random.uniform(0,10))
#print(books)
#for comicHref in comicsHref:
# cls.oneComic(comicHref,random.uniform(10,20))
@classmethod
def oneComic(cls,c_url,sleep=None):
nums = htmlUtils.xpathData("//div[@class='row']/div[6]/div[1]/div[1]/ul[contains(@class,'btn-toolbar')]/a",url=c_url,update=True)
book_name = htmlUtils.xpathData("//div[@itemprop='name']/h1[@id='book-name']/text()")
book_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\\-\*\<\>]', '', book_name)
tags = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='tags']/a[@class='btn btn-sm btn-primary']/text()")
author = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='author']/a[@class='btn btn-sm btn-primary']/text()")
book_msg = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='p-t-5 p-b-5']/text()")
jmid = book_msg[0]
dep = str(book_msg[1]).replace("叙述:","")
icon = htmlUtils.xpathData("//div[@class='row']//div[@class='thumb-overlay']/img/@src",num=0)
comicInfo.setIcon(icon)
comicInfo.setHomePage(c_url)
comicInfo.setComicName(book_name)
comicInfo.setAuthor(author)
comicInfo.setDep(dep)
comicInfo.setTags(tags)
comicInfo.setTag(tags)
comicInfo.setCBS("韩漫")
comicInfo.setLang("zh")
albumid = re.search(r'/album/(\d+)', c_url).group(1)
referer = re.search(r'(https://\w+\.\w+)/', c_url).group(1)
if nums:
list_chapter_name = []
list_chapter_href = []
cls.count_chapter = 0
for i in nums:
photo_name_list = i.xpath("li/text()")[0].split()
photo_date = i.xpath("li/span/text()")[0].split()
#print(re.findall(r'[\u4E00-\u9FA5]+.*?', i.xpath("li/text()")[0]))
try:
if re.findall(r'[\u4E00-\u9FA5]', photo_name_list[2]):
photo_name=re.sub(r'\s','',photo_name_list[0])+' '+photo_name_list[2]
else:photo_name=re.sub(r'\s','',photo_name_list[0])
except Exception as e:
photo_name = re.sub(r'\s', '', photo_name_list[0])
photo_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\\-\*\<\>\-]', '',photo_name)
#print(photo_name)
photoid=i.attrib['data-album']
cls.aid = photoid
comicInfo.setChapterName(photo_name)
comicInfo.setDate(photo_date[0],split='-')
comicInfo.setWeb(referer+i.attrib['href'])
is_scramble = False
if int(photoid) > 220980:
is_scramble = True
if not comicInfo.nextExistsGetPath("done_"):
comicEntity.comicChapter(referer+i.attrib['href'],scramble=is_scramble,sleep=random.randint(5,15))
#存在就校验CBZ包是否完整
if comicInfo.nextExistsGetPath("done_"):
verUtils.verCBZ()
cls.count_chapter += 1
#一本漫画下载后等待
#清空文件夹
path_dir_comic = comicInfo.getDirComic()
if os.path.exists(path_dir_comic):
shutil.rmtree(path_dir_comic)
if sleep != None:
time.sleep(sleep)
'''
读取某章节下所有图片
'''
@classmethod
def comicChapter(cls,chapter_url,scramble=None,sleep=None):
is_next = True
# try:
is_next = cls.Onechapter(chapter_url,scramble)
#进入下个阶段
if comicInfo.nextExistsGetPath("down_"):
#章节图片全部下载后,调用下载封面
netUtils.downloadComicIcon()
#下个阶段
if comicInfo.nextExistsGetPath("cbz_"):
time.sleep(0.1)
#下载后自动打包
is_next = CBZUtils.packAutoComicChapterCBZ()
#完成删除原文件
remove_path = comicInfo.getDirComicChapter()
shutil.rmtree(remove_path)
print(f"文件已删除: {remove_path}")
# except:
# ntfy.sendMsg(f"{comicInfo.getComicName()} 下载出错了")
# is_next = False
#ntfy.sendMsg(f"预计总章节大小:{cls.count_chapter + 1} / "+ str(comicInfo.getLenChapters()))
if sleep != None and is_next == True:
ntfy.sendMsg(f"{sleep} 秒后开始下载下一个章节")
time.sleep(sleep)
@classmethod
def Onechapter(cls,chapter_url,scramble=None):
if not str(chapter_url).startswith("http"):
chapter_url = comicInfo.getBaseUrl() + chapter_url
try:
is_next = cls.comicChapterDownload(chapter_url,scramble)
except:
htmlUtils.remove_HtmlCache(chapter_url)
cls.repeat = 0
is_next = cls.comicChapterDownload(chapter_url,scramble)
comicInfo.nextInfoToImgChapter()
#下载完成后, 开始解密图片
if scramble:
#获取章节图片路径
chapter_dir = comicInfo.getDirComicChapter()
dirs = os.listdir(chapter_dir)
for img in dirs:
is_scramble = str(img).startswith("scramble=")
if is_scramble:
c_path = os.path.join(chapter_dir, img)
#imageUtils.getScrambleImage(c_path)
cls.encode_scramble_image(c_path)
#进入下一阶段
comicInfo.nextImgToDownloadChapter()
return is_next
@classmethod
def comicChapterDownload(cls,chapter_url,c_scramble):
img_list =htmlUtils.xpathData("//div[@class='panel-body']/div/div[contains(@class,'center')]/img/@data-original",url=chapter_url,update=True)
pages_imgs =htmlUtils.xpathData("//div[@class='center scramble-page']/@id",url=chapter_url)
comicInfo.setPages(pages_imgs)
comicInfo.writeComicInfoXML(comicInfo.str_chapter)
#print("img_list:",len(img_list))
list_img = []
list_file_name = []
for i in img_list:
img_url= i
img_name = os.path.basename(img_url).split('.')[0]
if c_scramble:
img_name = "scramble="+str(cls.get_scramble_num(cls.aid,img_name))+"_"+img_name
#path_img = "%s\\%s.jpg" % (cls.aid, img_name)
path_img = "%s.jpg" % (img_name)
list_img.append(img_url)
list_file_name.append(path_img)
comicInfo.setChapterImgs(list_img)
#保存图像
comicInfo.nextSaveInfoChapter(comicInfo.str_chapter, list_img)
is_next = verUtils.verNextCBZ(list_img)
list_shunt = ["?shunt=2","?shunt=1","?shunt=3",""]
while not is_next:
time.sleep(1)
download_images(list_img,comicInfo.getDirComicChapter(), filesName=list_file_name,timeout=1500)
file_imgs = os.listdir(comicInfo.getDirComicChapter())
count_jpg = ",".join(file_imgs).split(".jpg")
is_next = len(count_jpg)-1 == len(list_img)
cls.repeat += 1
if cls.repeat > 3:
url = list_shunt[cls.repeat % len(list_shunt)]
print("分流中=",url)
cls.comicChapterDownload(str(chapter_url).split("?")[0]+url,c_scramble)
return True
@classmethod
def get_md5(cls,num):
result1 = hashlib.md5(num.encode()).hexdigest()
print('get_md5-', result1)
return result1
@classmethod
def get_scramble_num(cls,e, t):
#print(type(e),e, type(t),t)
a = 10
try:
num_dict = {}
for i in range(10):
num_dict[i] = i * 2 + 2
if (int(e) >= 268850):
n = str(e) + t;
# switch(n=(n = (n = md5(n)).substr(-1)), n %= 10) {
#print("n=",n)
tmp = ord(cls.get_md5(n)[-1])
result = num_dict[tmp % 10]
a = result
return a
except Exception as e:
print(e.__traceback__.tb_lineno,e)
return False
@classmethod
def encode_scramble_image(cls,imgpath):
image = Image.open(imgpath)
w, h = image.size
#image.show()
file_str = str(imgpath).split("=")
#10_29.jpg
base_dir = file_str[0].replace("scramble","")
base_name = file_str[-1]
base_fn = base_name.split("_")
save_name = base_fn[1]
save_name_delesu = save_name.split(".")[0]
blocks = int(base_fn[0])
img_type = os.path.basename(imgpath).split('.')[-1]
save_path = os.path.join(os.path.dirname(imgpath),save_name_delesu+"."+img_type)
# print(type(aid),type(img_name))
if blocks:
s = blocks # 随机值
# print(s)
l = h % s # 切割最后多余的值
box_list = []
hz = 0
for i in range(s):
c = math.floor(h / s)
g = i * c
hz += c
h2 = h - c * (i + 1) - l
if i == 0:
c += l;hz += l
else:
g += l
box_list.append((0, h2, w, h - g))
# print(box_list,len(box_list))
item_width = w
# box_list.reverse() #还原切图可以倒序列表
# print(box_list, len(box_list))
newh = 0
image_list = [image.crop(box) for box in box_list]
# print(box_list)
newimage = Image.new("RGB", (w, h))
for image in image_list:
# image.show()
b_w, b_h = image.size
newimage.paste(image, (0, newh))
newh += b_h
newimage.save(save_path)
if os.path.exists(imgpath):
os.remove(imgpath)

View File

@ -101,7 +101,7 @@ class comicEntity:
shutil.rmtree(path_dir_comic)
if sleep != None:
time.sleep(sleep)
'''
读取某章节下所有图片