update
This commit is contained in:
parent
499cb29fa3
commit
a8148ac383
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,2 +1,3 @@
|
||||
COMICOUT/
|
||||
.conf/
|
||||
**/__pycache__/**
|
||||
76
main.py
76
main.py
@ -1,72 +1,8 @@
|
||||
from utils.HtmlUtils import htmlUtils
|
||||
from utils.comic.ComicStr import comicStr
|
||||
from utils.FileUtils import fileUtils
|
||||
import json,os
|
||||
from utils.NetUtils import netUtils
|
||||
from utils.ImageUtils import imageUtils
|
||||
from utils.ComicUtils import comicUtils
|
||||
|
||||
comic_chapter_path = "COMICOUT"
|
||||
|
||||
def oneComic(c_url):
|
||||
global comic_chapter_path
|
||||
# #漫画名
|
||||
data = htmlUtils.xpathData(comicStr.result,
|
||||
'//script[@id="__NEXT_DATA__"]/text()',url=c_url)
|
||||
# #
|
||||
data = json.loads(data[0])
|
||||
data = data.get("props")
|
||||
data = data.get("pageProps")
|
||||
print(data)
|
||||
fileUtils.saveConfComicChapterInfo("1話 親子餐廳的媽媽們",data,"親子餐廳的媽媽們")
|
||||
x = fileUtils.getInfoConfComicChapter("1話 親子餐廳的媽媽們","親子餐廳的媽媽們")
|
||||
bookName = x.get("bookName")
|
||||
alias = x.get("alias")
|
||||
chapterName = x.get("chapterName")
|
||||
description = x.get("description")
|
||||
images = x.get("images")
|
||||
chapterAPIPath = x.get("chapterAPIPath")
|
||||
print(chapterAPIPath)
|
||||
if not chapterAPIPath == None:
|
||||
base_url = comicUtils.getBaseUrl(c_url)
|
||||
chapterAPIUrl = base_url+chapterAPIPath
|
||||
data = htmlUtils.getJSON(chapterAPIUrl)
|
||||
data = data.get("chapter")
|
||||
chapterName = data.get("name")
|
||||
images = data.get("images")
|
||||
print("test")
|
||||
if images == None:
|
||||
print("fsf")
|
||||
totalChapter = x.get("totalChapter")
|
||||
tags = x.get("tags")
|
||||
x = tags
|
||||
print(x)
|
||||
count_image = 1
|
||||
list_img = []
|
||||
list_scramble = []
|
||||
for image in images:
|
||||
image_src = image.get("src")
|
||||
scramble = image.get("scramble")
|
||||
print("count=",count_image)
|
||||
list_img.append(image_src)
|
||||
list_scramble.append(scramble)
|
||||
print(image_src)
|
||||
print(scramble)
|
||||
count_image+=1
|
||||
print(count_image)
|
||||
print(list_img)
|
||||
print(totalChapter)
|
||||
netUtils.downloadComicChapterImages(bookName,chapterName,list_img,scrambles=list_scramble)
|
||||
comic_chapter_path = os.path.join("COMICOUT",bookName,chapterName)
|
||||
from utils.entity.RouMan import comicEntity
|
||||
|
||||
if __name__ == '__main__':
|
||||
oneComic("https://rm01.xyz/books/f08668a4-0cbc-488e-95a7-3c71de0c7a31/23")
|
||||
# oneComic()
|
||||
# path = "COMICOUT\好友的私生活\第1話 好友的私生活"
|
||||
path = comic_chapter_path
|
||||
dirs = os.listdir(path)
|
||||
for dir in dirs:
|
||||
isScramble = str(dir).startswith("scramble=")
|
||||
if isScramble:
|
||||
c_path = os.path.join(path, dir)
|
||||
imageUtils.getScrambleImage(c_path)
|
||||
# os.environ["http_proxy"] = "http://127.0.0.1:7890"
|
||||
# os.environ["https_proxy"] = "http://127.0.0.1:7890"
|
||||
# url = "https://rm01.xyz/books/f08668a4-0cbc-488e-95a7-3c71de0c7a31/1"
|
||||
# comicEntity.comicChapter(url,scramble=True)
|
||||
comicEntity.oneComic("https://rm01.xyz/books/47376792-2816-4ccf-a146-957b8d6a2ac6")
|
||||
55
utils/CBZUtils.py
Normal file
55
utils/CBZUtils.py
Normal file
@ -0,0 +1,55 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from zipfile import ZipFile
|
||||
import shutil
|
||||
from utils.comic.ComicInfo import comicInfo
|
||||
|
||||
class CBZUtils:
|
||||
|
||||
@classmethod
|
||||
def readDirsOrFiles(cls,dir,type):
|
||||
data = []
|
||||
files = os.listdir(dir)
|
||||
for file in files:
|
||||
path = os.path.join(dir,file)
|
||||
if type == "files" and os.path.isfile(path):
|
||||
data.append(path)
|
||||
if type == "dirs" and os.path.isdir(path):
|
||||
data.append(path)
|
||||
return data
|
||||
|
||||
@classmethod
|
||||
def zip_compression(cls,source_dir, target_file,type=None):
|
||||
msg = {}
|
||||
target_dir = os.path.dirname(target_file)
|
||||
if not os.path.exists(target_dir):
|
||||
os.makedirs(target_dir)
|
||||
|
||||
if not os.path.exists(target_file):
|
||||
with ZipFile(target_file, mode='w') as zf:
|
||||
for path, dir_names, filenames in os.walk(source_dir):
|
||||
path = Path(path)
|
||||
arc_dir = path.relative_to(source_dir)
|
||||
y = 0
|
||||
for filename in filenames:
|
||||
y = y + 1
|
||||
print("打包中:" + str(y) + "/" + str(len(filenames)), os.path.join(source_dir, filename))
|
||||
zf.write(path.joinpath(filename), arc_dir.joinpath(filename))
|
||||
zf.close()
|
||||
print("打包完成:", target_file)
|
||||
#md5_file = md5(target_file)
|
||||
#print("md5:", md5_file)
|
||||
#msg[target_file] = md5_file
|
||||
return msg
|
||||
else:
|
||||
print("文件已存在:", target_file)
|
||||
|
||||
if type == "delete":
|
||||
shutil.rmtree(source_dir)
|
||||
print(f"已删除 source_dir: {source_dir}")
|
||||
|
||||
@classmethod
|
||||
def packAutoComicChapterCBZ(cls):
|
||||
chapter_path = comicInfo.getDirComicChapter()
|
||||
packCBZ_path = comicInfo.getDirCBZComicChapter()
|
||||
cls.zip_compression(chapter_path,packCBZ_path+".CBZ",type="delete")
|
||||
@ -1,10 +1,8 @@
|
||||
import json
|
||||
import requests,os
|
||||
from utils.comic.PathStr import pathStr
|
||||
|
||||
class fileUtils:
|
||||
base_path = "COMIC_OUT"
|
||||
conf_path = os.path.join(base_path,".conf")
|
||||
comic_path = os.path.join(base_path,".conf","comic")
|
||||
comic_name = ""
|
||||
|
||||
#文件保存
|
||||
@ -47,8 +45,8 @@ class fileUtils:
|
||||
def getPathConfComicChapterInfo(cls,chapter,comic_name=None):
|
||||
if comic_name == None:
|
||||
comic_name = cls.comic_name
|
||||
return os.path.join(cls.comic_path,comic_name,"info_"+chapter)
|
||||
|
||||
return os.path.join(pathStr.base_conf_path,comic_name,"info_"+chapter)
|
||||
|
||||
@classmethod
|
||||
def getInfoConfComicChapter(cls,chapter,comic_name=None):
|
||||
data = None
|
||||
@ -73,5 +71,5 @@ class fileUtils:
|
||||
'''
|
||||
@classmethod
|
||||
def get_utl_save_comic(cls,name):
|
||||
file = os.path.join(cls.comic_path,name)
|
||||
file = os.path.join(pathStr.base_comic_img,name)
|
||||
return file
|
||||
@ -2,11 +2,12 @@ from fake_useragent import UserAgent
|
||||
import requests
|
||||
from lxml import html
|
||||
from utils.comic.ComicStr import comicStr
|
||||
import os
|
||||
|
||||
class htmlUtils:
|
||||
headers = {'User-Agent': UserAgent().random}
|
||||
url_data = {}
|
||||
|
||||
|
||||
@classmethod
|
||||
def getHTML(cls, curl):
|
||||
rstr = r"[\/\\\:\*\?\"\<\>\|\.]" # '/ \ : * ? " < > |'
|
||||
@ -21,7 +22,11 @@ class htmlUtils:
|
||||
data = { curl : url_text}
|
||||
cls.url_data.update(data)
|
||||
return url_text
|
||||
|
||||
|
||||
@classmethod
|
||||
def getBytes(cls, url):
|
||||
return requests.get(url, headers=cls.headers,stream=True)
|
||||
|
||||
@classmethod
|
||||
def getJSON(cls,curl):
|
||||
res = requests.get(curl, headers=cls.headers)
|
||||
@ -29,7 +34,7 @@ class htmlUtils:
|
||||
return data_json
|
||||
|
||||
@classmethod
|
||||
def xpathData(cls,c_title, c_xpath,url=None,num=None,type=None,not_eq=None):
|
||||
def xpathData(cls,c_xpath,url=None,num=None,not_eq=None):
|
||||
if url == None:
|
||||
url = cls.temp_url
|
||||
else:
|
||||
@ -44,11 +49,6 @@ class htmlUtils:
|
||||
if not x == not_eq:
|
||||
result.append(x)
|
||||
count +=1
|
||||
data = {c_title : result}
|
||||
if not num == None:
|
||||
data = {c_title : result[num]}
|
||||
if not type == None:
|
||||
data = { type : result }
|
||||
if c_title == comicStr.result:
|
||||
data = result
|
||||
return data
|
||||
result = result[num]
|
||||
return result
|
||||
@ -22,6 +22,9 @@ class imageUtils:
|
||||
|
||||
@classmethod
|
||||
def scrambleImage(cls,file_path):
|
||||
if str(file_path).endswith(".downloads"):
|
||||
os.remove(file_path)
|
||||
return None
|
||||
file_str = str(file_path).split("=")
|
||||
#10_29.jpg
|
||||
baseDir = file_str[0].replace("scramble","")
|
||||
@ -45,7 +48,9 @@ class imageUtils:
|
||||
blockWidth = int(width / blocks)
|
||||
print("blockHeight=",blockHeight)
|
||||
su = str(file_path).split(".")[-1]
|
||||
split_path = os.path(baseDir,save_name_delesu+"split")
|
||||
split_path = os.path.join(baseDir,save_name_delesu+"split")
|
||||
if image_su == "downloads":
|
||||
return None
|
||||
cls.splitimage(file_path,blocks,1,split_path)
|
||||
cls.image_compose(split_path+"/",blocks,1,save_file_path,blockHeight,width)
|
||||
#完成后清空
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
import os.path,shutil
|
||||
import requests
|
||||
from concurrent.futures import ThreadPoolExecutor,wait,ALL_COMPLETED
|
||||
import time
|
||||
import time,random
|
||||
import imghdr
|
||||
from utils.comic.PathStr import pathStr
|
||||
from utils.ImageUtils import imageUtils
|
||||
from utils.comic.ComicInfo import comicInfo
|
||||
from utils.HtmlUtils import htmlUtils
|
||||
from utils.downloader import download_image
|
||||
|
||||
class netUtils:
|
||||
|
||||
@ -23,9 +25,9 @@ class netUtils:
|
||||
os.remove(tmp_file)
|
||||
print("存在缓存文件,已删除:",tmp_file)
|
||||
repair_count = 1
|
||||
res = requests.get(url, stream=True)
|
||||
res = htmlUtils.getBytes(url)
|
||||
while res.status_code != 200 and repair_count <= 5:
|
||||
res = requests.get(url, stream=True)
|
||||
res = htmlUtils.getBytes(url)
|
||||
print(f'重试:第{repair_count}次 {url}')
|
||||
repair_count += 1
|
||||
#判断是否为图片
|
||||
@ -35,23 +37,26 @@ class netUtils:
|
||||
basedir= os.path.dirname(path)
|
||||
if not os.path.exists(basedir):
|
||||
os.makedirs(basedir)
|
||||
#expected_length = res.headers.get('Content-Length')
|
||||
#actual_length = res.raw.tell()
|
||||
with open(tmp_file, 'wb') as f:
|
||||
for ch in res:
|
||||
f.write(ch)
|
||||
f.close()
|
||||
shutil.move(tmp_file, path)
|
||||
print(f"url={url} 保存至:{path}")
|
||||
return path
|
||||
for ch in res:
|
||||
f.write(ch)
|
||||
f.close()
|
||||
shutil.move(tmp_file, path)
|
||||
print(f"url={url} 保存至:{path}")
|
||||
return path
|
||||
|
||||
@classmethod
|
||||
def threadDownload(cls,url,path,fileType=None):
|
||||
executor = ThreadPoolExecutor(max_workers=3)
|
||||
tasks = executor.submit(cls.download, url,path,fileType)
|
||||
time.sleep(random.uniform(0,1))
|
||||
#wait(tasks, return_when=ALL_COMPLETED)
|
||||
|
||||
@classmethod
|
||||
def downloadComicChapterImages(cls,comic_name, chapter_name, imgs,scrambles=None):
|
||||
file_path = os.path.join(pathStr.base_comic_out, comic_name, chapter_name)
|
||||
def downloadComicChapterImages(cls, imgs,scrambles=None):
|
||||
file_path = comicInfo.getDirComicChapter()
|
||||
print("files=",file_path)
|
||||
|
||||
count_img = 1
|
||||
@ -65,22 +70,24 @@ class netUtils:
|
||||
blockInt = imageUtils.encodeImage(de_str)
|
||||
save_file_path = os.path.join(file_path,"scramble="+str(blockInt)+"_"+file_name)
|
||||
cls.threadDownload(img, save_file_path, fileType="image")
|
||||
time.sleep(0.1)
|
||||
count_img += 1
|
||||
return os.path.dirname(save_file_path)
|
||||
|
||||
@classmethod
|
||||
def downloadComicIcon(cls,comic_name,chapter,img):
|
||||
file_su = os.path.splitext(img)[-1]
|
||||
icon_name = "cover"+ file_su
|
||||
save_file_path = os.path.join(cls.save_comic_img_basePath,comic_name,icon_name)
|
||||
if os.path.exists(save_file_path):
|
||||
"已存在,跳过下载"
|
||||
def downloadComicIcon(cls):
|
||||
icon_url = comicInfo.getIcon()
|
||||
if icon_url == None:
|
||||
print("icon 不存在,已跳过")
|
||||
return None
|
||||
else:
|
||||
cls.download(img,save_file_path,fileType="image")
|
||||
target_dir = os.path.join(cls.CBZ_path,comic_name)
|
||||
target_file = os.path.join(target_dir,chapter+file_su)
|
||||
if not os.path.exists(target_dir):
|
||||
os.makedirs(target_dir)
|
||||
shutil.copy(save_file_path,target_file)
|
||||
save_name = "cover"
|
||||
icon_su = "."+str(icon_url).split(".")[-1]
|
||||
#判断漫画名路径是否已存在comicname/cover.jpg, 存在跳过
|
||||
pathComicIcon = os.path.join(comicInfo.getDirConfComic(),save_name+icon_su)
|
||||
if not os.path.exists(pathComicIcon):
|
||||
cls.download(icon_url, pathComicIcon)
|
||||
pathCBZComic = comicInfo.getDirCBZComic()
|
||||
if not os.path.exists(pathCBZComic):
|
||||
os.makedirs(pathCBZComic)
|
||||
save_path = os.path.join(pathCBZComic,comicInfo.getChapter()+icon_su)
|
||||
shutil.copy(pathComicIcon, save_path)
|
||||
print(f"{pathComicIcon} 已复制至: {save_path}")
|
||||
@ -1,37 +1,290 @@
|
||||
from xml.dom.minidom import Document
|
||||
import os,re
|
||||
from utils.comic.PathStr import pathStr
|
||||
import json,shutil
|
||||
|
||||
document = Document()
|
||||
|
||||
class comicInfoXmlNode():
|
||||
class comicInfo():
|
||||
document = Document()
|
||||
pathComicInfo = None
|
||||
|
||||
root = "ComicInfo"
|
||||
chapter = "Title"
|
||||
comic_name = "Series"
|
||||
dep = "Summary"
|
||||
author = "Writer"
|
||||
tags = "Genre"
|
||||
tag = "Genre"
|
||||
cbs = "Publisher"
|
||||
lang = "LanguageISO"
|
||||
comic_names = "SeriesGroup"
|
||||
tags = "Tag"
|
||||
|
||||
str_comicName = None
|
||||
str_chapter = None
|
||||
str_icon = None
|
||||
str_homePage = None
|
||||
|
||||
chapter_node = None
|
||||
comicName_node = None
|
||||
dep_node = None
|
||||
author_node = None
|
||||
tag_node = None
|
||||
cbs_node = None
|
||||
lang_node = None
|
||||
comicNames_node = None
|
||||
tags_node = None
|
||||
|
||||
@classmethod
|
||||
def setNodeAndValue(cls,node,value):
|
||||
node = document.createElement(node)
|
||||
node_text = document.createTextNode(value)
|
||||
node.appendChild(node_text)
|
||||
if not value == None:
|
||||
c_node = cls.document.createElement(node)
|
||||
node_text = cls.document.createTextNode(value)
|
||||
c_node.appendChild(node_text)
|
||||
return c_node
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def Root(cls):
|
||||
return cls.document.createElement(cls.root)
|
||||
|
||||
@classmethod
|
||||
def setChapter(cls,value):
|
||||
cls.setNodeAndValue(cls.chapter,value)
|
||||
def setChapterName(cls,value):
|
||||
cls.str_chapter = cls.fixFileName(value)
|
||||
cls.chapter_node = cls.setNodeAndValue(cls.chapter,value)
|
||||
|
||||
@classmethod
|
||||
def setComicName(cls,value):
|
||||
cls.setNodeAndValue(cls.comic_name, value)
|
||||
cls.str_comicName = cls.fixFileName(value)
|
||||
cls.comicName_node = cls.setNodeAndValue(cls.comic_name, value)
|
||||
|
||||
@classmethod
|
||||
def getComicInfoXML(cls):
|
||||
cls.setChapter()
|
||||
class comicInfo():
|
||||
def setComicNames(cls,value):
|
||||
cls.comicNames_node = cls.setNodeAndValue(cls.comic_names,value)
|
||||
|
||||
#输出xml
|
||||
@classmethod
|
||||
def writeComicInfoXML(cls,c_title,chapter,dep,author,tags="韩漫",c_publisher="韩漫",language="zh"):
|
||||
#file 预期 CBZ/"comic_name"/chapter
|
||||
print()
|
||||
@classmethod
|
||||
def setDep(cls,value):
|
||||
cls.dep_node = cls.setNodeAndValue(cls.dep, value)
|
||||
|
||||
@classmethod
|
||||
def setAuthor(cls,value):
|
||||
cls.author_node = cls.setNodeAndValue(cls.author,value)
|
||||
|
||||
@classmethod
|
||||
def setLang(cls,value):
|
||||
cls.lang_node = cls.setNodeAndValue(cls.lang, value)
|
||||
|
||||
@classmethod
|
||||
def setTag(cls,value):
|
||||
cls.tag_node = cls.setNodeAndValue(cls.tag, value)
|
||||
|
||||
@classmethod
|
||||
def setTags(cls,value):
|
||||
cls.tags_node = cls.setNodeAndValue(cls.tags,value)
|
||||
|
||||
@classmethod
|
||||
def setCBS(cls,value):
|
||||
cls.cbs_node = cls.setNodeAndValue(cls.cbs,value)
|
||||
|
||||
@classmethod
|
||||
def setIcon(cls,value):
|
||||
cls.str_icon = value
|
||||
return cls.str_icon
|
||||
|
||||
@classmethod
|
||||
def setHomePage(cls, value):
|
||||
cls.str_homePage = value
|
||||
|
||||
@classmethod
|
||||
def getHomePage(cls):
|
||||
return cls.str_homePage
|
||||
|
||||
'''
|
||||
获取网站主页
|
||||
'''
|
||||
@classmethod
|
||||
def getBaseUrl(cls,url=None):
|
||||
if url == None:
|
||||
url = cls.str_homePage
|
||||
num = 3
|
||||
index = 0
|
||||
for x in range(0, num):
|
||||
index = str(url).find("/",index)+1
|
||||
return url[0:index-1]
|
||||
|
||||
@classmethod
|
||||
def getIcon(cls):
|
||||
return cls.str_icon
|
||||
|
||||
@classmethod
|
||||
def getComicName(cls):
|
||||
return cls.str_comicName
|
||||
|
||||
@classmethod
|
||||
def getChapter(cls):
|
||||
return cls.str_chapter
|
||||
|
||||
@classmethod
|
||||
def fixFileName(cls,filename,replace=None):
|
||||
intab = r'[?*/\|.:><]'
|
||||
str_replace = ""
|
||||
if not replace == None:
|
||||
str_replace = replace
|
||||
filename = re.sub(intab, str_replace, filename)
|
||||
return filename
|
||||
|
||||
@classmethod
|
||||
def getDirConfComic(cls):
|
||||
if not cls.str_comicName == None:
|
||||
return os.path.join(pathStr.base_conf_path, cls.str_comicName)
|
||||
else:
|
||||
print("comicName不存在,退出中")
|
||||
exit()
|
||||
|
||||
@classmethod
|
||||
def getDirCBZComic(cls):
|
||||
if not cls.str_comicName == None:
|
||||
return os.path.join(pathStr.base_CBZ, cls.str_comicName)
|
||||
else:
|
||||
print("comicName不存在,退出中")
|
||||
exit()
|
||||
|
||||
@classmethod
|
||||
def getDirCBZComicChapter(cls):
|
||||
if not cls.str_comicName == None and not cls.str_chapter == None:
|
||||
return os.path.join(pathStr.base_CBZ,cls.str_comicName,cls.str_chapter)
|
||||
else:
|
||||
print("comicName不存在,退出中")
|
||||
exit()
|
||||
|
||||
@classmethod
|
||||
def getDirComic(cls):
|
||||
if not cls.str_comicName == None:
|
||||
return os.path.join(pathStr.base_comic_img, cls.str_comicName)
|
||||
else:
|
||||
print("comicName不存在,退出中")
|
||||
exit()
|
||||
|
||||
@classmethod
|
||||
def getDirComicChapter(cls):
|
||||
if not cls.str_comicName == None and not cls.str_chapter == None:
|
||||
return os.path.join(pathStr.base_comic_img,cls.str_comicName,cls.str_chapter)
|
||||
else:
|
||||
print("comicName与chapter 不存在,退出中")
|
||||
exit()
|
||||
@classmethod
|
||||
def getPathComicInfoXML(cls):
|
||||
try:
|
||||
cls.pathComicInfo = os.path.join(pathStr.base_comic_img,cls.str_comicName,cls.str_chapter,"ComicInfo.xml")
|
||||
except:
|
||||
return None
|
||||
return cls.pathComicInfo
|
||||
|
||||
@classmethod
|
||||
def writeComicInfoXML(cls,chapter):
|
||||
if cls.chapter == cls.fixFileName(chapter):
|
||||
wait = input(f"cls.chapter {cls.chapter} 与 chapter {chapter} 不相等,是否继续y/n ")
|
||||
if wait == "y":
|
||||
print("继续中...")
|
||||
else:
|
||||
print("用户操作取消...")
|
||||
exit()
|
||||
|
||||
root = cls.Root()
|
||||
newDocument = Document()
|
||||
newDocument.appendChild(root)
|
||||
if not cls.chapter_node == None:
|
||||
root.appendChild(cls.chapter_node)
|
||||
if not cls.comicName_node == None:
|
||||
root.appendChild(cls.comicName_node)
|
||||
if not cls.dep_node == None:
|
||||
root.appendChild(cls.dep_node)
|
||||
if not cls.author_node == None:
|
||||
root.appendChild(cls.author_node)
|
||||
if not cls.tag_node == None:
|
||||
root.appendChild(cls.tag_node)
|
||||
if not cls.cbs_node == None:
|
||||
root.appendChild(cls.cbs_node)
|
||||
if not cls.lang_node == None:
|
||||
root.appendChild(cls.lang_node)
|
||||
if not cls.comicNames_node == None:
|
||||
root.appendChild(cls.comicNames_node)
|
||||
if not cls.tags_node == None:
|
||||
root.appendChild(cls.tags_node)
|
||||
cls.getPathComicInfoXML()
|
||||
base_dir = os.path.dirname(cls.pathComicInfo)
|
||||
if not os.path.exists(base_dir):
|
||||
os.makedirs(base_dir)
|
||||
with open(cls.pathComicInfo , "w", encoding="utf-8") as fo:
|
||||
newDocument.writexml(fo, indent='', addindent='\t', newl='\n', encoding="utf-8")
|
||||
fo.close()
|
||||
print("ComicInfo.xml 已生成 pathd=", cls.pathComicInfo)
|
||||
|
||||
#文件保存
|
||||
@classmethod
|
||||
def file_save(cls,path,data,mode=None):
|
||||
result = {}
|
||||
f = {}
|
||||
dir_name = os.path.dirname(path)
|
||||
if not os.path.exists(dir_name):
|
||||
os.makedirs(dir_name)
|
||||
save_path = os.path.join(path)
|
||||
data = json.dumps(data)
|
||||
if mode == None:
|
||||
mode = "w+"
|
||||
try:
|
||||
f = open(save_path, mode, encoding="utf-8")
|
||||
f.write(data)
|
||||
f.close()
|
||||
print("data=",data)
|
||||
result = path + "文件写入成功"
|
||||
except:
|
||||
result = path + "文件写入失败"
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def nextSavePath(cls,next,data=None):
|
||||
save_path = cls.getDirConfComic()+"/"+next+cls.str_chapter
|
||||
if not data == None:
|
||||
cls.file_save(save_path, data)
|
||||
return save_path
|
||||
|
||||
@classmethod
|
||||
def nextSaveInfoChapter(cls,chapter,data):
|
||||
if cls.str_chapter == chapter:
|
||||
cls.nextSavePath("info_",data)
|
||||
else:
|
||||
wait = input(f"chapter {cls.str_chapter} 与 {chapter} 不一致,是否继续 y/n")
|
||||
if wait == "y":
|
||||
print("继续中")
|
||||
else:
|
||||
exit()
|
||||
|
||||
@classmethod
|
||||
def nextInfoToImgChapter(cls):
|
||||
info = cls.nextSavePath("info_")
|
||||
img = cls.nextSavePath("img_")
|
||||
shutil.move(info,img)
|
||||
|
||||
@classmethod
|
||||
def nextImgToDownloadChapter(cls):
|
||||
img = cls.nextSavePath("img_")
|
||||
download = cls.nextSavePath("down_")
|
||||
shutil.move(img, download)
|
||||
|
||||
@classmethod
|
||||
def nextDownloadToCBZChapter(cls):
|
||||
download = cls.nextSavePath("down_")
|
||||
cbz = cls.nextSavePath("cbz_")
|
||||
shutil.move(download, cbz)
|
||||
|
||||
@classmethod
|
||||
def nextCBZToDoneChapter(cls):
|
||||
cbz = cls.nextSavePath("cbz_")
|
||||
done = cls.nextSavePath("done_")
|
||||
shutil.move(cbz,done)
|
||||
|
||||
@classmethod
|
||||
def nextExistsGetPath(cls,msg):
|
||||
path = cls.nextSavePath(msg)
|
||||
return os.path.exists(path)
|
||||
|
||||
@ -3,4 +3,5 @@ import os
|
||||
class pathStr:
|
||||
base_comic_out = "COMICOUT"
|
||||
base_CBZ = os.path.join(base_comic_out,"CBZ")
|
||||
base_comic_img = os.path.join(base_comic_out,"outputComic")
|
||||
base_comic_img = os.path.join(base_comic_out,"outputComic")
|
||||
base_conf_path = ".conf"
|
||||
87
utils/downloader.py
Normal file
87
utils/downloader.py
Normal file
@ -0,0 +1,87 @@
|
||||
""" Download image according to given urls and automatically rename them in order. """
|
||||
# -*- coding: utf-8 -*-
|
||||
# author: Yabin Zheng
|
||||
# Email: sczhengyabin@hotmail.com
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import shutil
|
||||
import imghdr
|
||||
import os,time
|
||||
import concurrent.futures
|
||||
import requests
|
||||
from utils.ImageUtils import imageUtils
|
||||
from utils.HtmlUtils import htmlUtils
|
||||
|
||||
headers = {
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
"Proxy-Connection": "keep-alive",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36",
|
||||
"Accept-Encoding": "gzip, deflate, sdch",
|
||||
# 'Connection': 'close',
|
||||
}
|
||||
|
||||
|
||||
def download_image(image_url, dst_dir, file_name, timeout=20, proxy_type=None, proxy=None):
|
||||
proxies = None
|
||||
if proxy_type is not None:
|
||||
proxies = {
|
||||
"http": proxy_type + "://" + proxy,
|
||||
"https": proxy_type + "://" + proxy
|
||||
}
|
||||
|
||||
response = None
|
||||
file_path = os.path.join(dst_dir, file_name)
|
||||
temp_path = file_path+".downloads"
|
||||
try_times = 0
|
||||
while True:
|
||||
try:
|
||||
try_times += 1
|
||||
response = requests.get(
|
||||
image_url, headers=htmlUtils.headers, timeout=timeout, proxies=proxies)
|
||||
with open(temp_path, 'wb') as f:
|
||||
f.write(response.content)
|
||||
response.close()
|
||||
shutil.move(temp_path,file_path)
|
||||
except Exception as e:
|
||||
if try_times < 10:
|
||||
print(f"第{try_times} try {file_path}")
|
||||
continue
|
||||
if response:
|
||||
response.close()
|
||||
print("## Fail: {} {}".format(image_url, e.args))
|
||||
break
|
||||
|
||||
|
||||
def download_images(image_urls, dst_dir, file_prefix="img", concurrency=3, timeout=20, proxy_type=None, proxy=None,scrambles=None):
|
||||
"""
|
||||
Download image according to given urls and automatically rename them in order.
|
||||
:param timeout:
|
||||
:param proxy:
|
||||
:param proxy_type:
|
||||
:param image_urls: list of image urls
|
||||
:param dst_dir: output the downloaded images to dst_dir
|
||||
:param file_prefix: if set to "img", files will be in format "img_xxx.jpg"
|
||||
:param concurrency: number of requests process simultaneously
|
||||
:return: none
|
||||
"""
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
|
||||
future_list = list()
|
||||
count = 1
|
||||
if not os.path.exists(dst_dir):
|
||||
os.makedirs(dst_dir)
|
||||
for image_url in image_urls:
|
||||
img_prefix = "."+str(image_url).split(".")[-1]
|
||||
file_name = ("{:0>3d}".format(count))+img_prefix
|
||||
if scrambles[count -1]:
|
||||
su = "."+str(image_url).split(".")[-1]
|
||||
de_str = str(image_url).split("/")[-1].replace(su,"==")
|
||||
blocks = imageUtils.encodeImage(de_str)
|
||||
file_name = "scramble="+str(blocks)+"_"+file_name
|
||||
future_list.append(executor.submit(
|
||||
download_image, image_url, dst_dir, file_name, timeout, proxy_type, proxy))
|
||||
time.sleep(0.1)
|
||||
count += 1
|
||||
concurrent.futures.wait(future_list, timeout=180)
|
||||
@ -1,63 +1,148 @@
|
||||
import json,os
|
||||
import json,os,time
|
||||
from utils.comic.ComicStr import comicStr
|
||||
from utils.ComicUtils import comicUtils
|
||||
from utils.FileUtils import fileUtils
|
||||
|
||||
class comicEntityRM:
|
||||
from utils.comic.PathStr import pathStr
|
||||
from utils.NetUtils import netUtils
|
||||
from utils.HtmlUtils import htmlUtils
|
||||
from utils.ImageUtils import imageUtils
|
||||
from utils.comic.ComicInfo import comicInfo
|
||||
from utils.CBZUtils import CBZUtils
|
||||
from utils.downloader import download_images
|
||||
class comicEntity:
|
||||
|
||||
@classmethod
|
||||
def oneComic(cls,c_url):
|
||||
#漫画名
|
||||
title = comicUtils.xpathData(comicStr.title,
|
||||
'//div[@class="col"]/h5/text()',url=c_url,num=0)
|
||||
title = htmlUtils.xpathData('//div[@class="col"]/h5/text()',url=c_url,num=0)
|
||||
#别名
|
||||
alias = comicUtils.xpathData(comicStr.alias,
|
||||
'//span[contains(@class,"bookid_alias")]/text()',num=1)
|
||||
icon = comicUtils.xpathData(comicStr.icon,
|
||||
'//img[@class="img-thumbnail"]/@src')
|
||||
alias = htmlUtils.xpathData('//span[contains(@class,"bookid_alias")]/text()',num=1)
|
||||
icon = htmlUtils.xpathData('//img[@class="img-thumbnail"]/@src',num=0)
|
||||
|
||||
author = comicUtils.xpathData(comicStr.author,
|
||||
'//div[contains(@class,"bookid_bookInfo")]/p[1]/text()',num=1)
|
||||
tags = comicUtils.xpathData(comicStr.tags,
|
||||
'//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()')
|
||||
action = comicUtils.xpathData(comicStr.action,
|
||||
'//div[contains(@class,"bookid_bookInfo")]/p[2]/text()',num=1)
|
||||
dep = comicUtils.xpathData(comicStr.dep,
|
||||
'//div[contains(@class,"bookid_bookInfo")]/p[4]/text()',num=1)
|
||||
update_date = comicUtils.xpathData(comicStr.last_update,
|
||||
'//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()',num=1)
|
||||
chapters = comicUtils.xpathData(comicStr.chapters,
|
||||
'//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()')
|
||||
chapter_href = comicUtils.xpathData(comicStr.chapter_href,
|
||||
'//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/@href')
|
||||
|
||||
comicUtils.setComic(title,alias,icon,author,tags,action,dep,update_date,chapters,chapter_href)
|
||||
author = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[1]/text()',num=1)
|
||||
tags = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()',num=0)
|
||||
action = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[2]/text()',num=1)
|
||||
dep = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[4]/text()',num=1)
|
||||
update_date = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()',num=1)
|
||||
chapters = htmlUtils.xpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()')
|
||||
chapter_href = htmlUtils.xpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/@href')
|
||||
|
||||
#{'title': ['社區重建協會']}
|
||||
#主页
|
||||
#homepage = {comicStr.homepage : [c_url] }
|
||||
#图片
|
||||
#comicUtils.setComic(titles,homepage,icons,authors,deps,chapters, chapter_hrefs,last_update)
|
||||
comicData = comicUtils.getComic()
|
||||
print(comicData)
|
||||
wait = input("数据暂停查看y/n")
|
||||
if not wait == "y":
|
||||
exit()
|
||||
return comicData
|
||||
author = str(author).replace("&",",").replace(" ",",")
|
||||
comicInfo.setHomePage(c_url)
|
||||
comicInfo.setComicName(str(title))
|
||||
comicInfo.setComicNames(title+","+alias)
|
||||
comicInfo.setAuthor(author)
|
||||
comicInfo.setIcon(icon)
|
||||
comicInfo.setTag(tags)
|
||||
comicInfo.setTags(tags)
|
||||
comicInfo.setDep(dep)
|
||||
comicInfo.setCBS("韩漫")
|
||||
comicInfo.setLang("zh")
|
||||
comicInfo.setComicNames(title+","+alias)
|
||||
|
||||
#comicUtils.setComic(title,alias,icon,author,tags,action,dep,update_date,chapters,chapter_href)
|
||||
count_chapter = 0
|
||||
for href in chapter_href:
|
||||
chapter = chapters[count_chapter]
|
||||
comicInfo.setChapterName(chapter)
|
||||
if not comicInfo.nextExistsGetPath("done_"):
|
||||
comicEntity.comicChapter(href,scramble=True,sleep=8)
|
||||
count_chapter += 1
|
||||
|
||||
'''
|
||||
'''
|
||||
|
||||
读取某章节下所有图片
|
||||
'''
|
||||
@classmethod
|
||||
def comicChapter(cls,c_url,chapter):
|
||||
xpath_str = '//img[contains(@class,"id_comicImage")]/@src'
|
||||
not_eq = "/loading.jpg"
|
||||
#章节下所有图片链接
|
||||
list_img = comicUtils.xpathData(comicStr.list_img,
|
||||
xpath_str,url=c_url,type=chapter,not_eq=not_eq)
|
||||
def comicChapter(cls,chapter_url,scramble=None,sleep=None):
|
||||
cls.Onechapter(chapter_url,scramble,sleep)
|
||||
#进入下个阶段
|
||||
comicInfo.nextImgToDownloadChapter()
|
||||
|
||||
if comicInfo.nextExistsGetPath("down_"):
|
||||
#章节图片全部下载后,调用下载封面
|
||||
netUtils.downloadComicIcon()
|
||||
#下个阶段
|
||||
comicInfo.nextDownloadToCBZChapter()
|
||||
if comicInfo.nextExistsGetPath("cbz_"):
|
||||
#下载后自动打包
|
||||
CBZUtils.packAutoComicChapterCBZ()
|
||||
comicInfo.nextCBZToDoneChapter()
|
||||
|
||||
@classmethod
|
||||
def Onechapter(cls,chapter_url,scramble=None,sleep=None):
|
||||
if not str(chapter_url).startswith("http"):
|
||||
chapter_url = comicInfo.getBaseUrl() + chapter_url
|
||||
chapter_dir = cls.comicChapterDownload(chapter_url)
|
||||
if sleep == None:
|
||||
print("not sleep")
|
||||
#time.sleep(3)
|
||||
else:
|
||||
time.sleep(int(sleep))
|
||||
#下载完成后, 开始解密图片
|
||||
if scramble:
|
||||
dirs = os.listdir(chapter_dir)
|
||||
for img in dirs:
|
||||
isScramble = str(img).startswith("scramble=")
|
||||
if isScramble:
|
||||
c_path = os.path.join(chapter_dir, img)
|
||||
imageUtils.getScrambleImage(c_path)
|
||||
#进入下一阶段
|
||||
comicInfo.nextInfoToImgChapter()
|
||||
|
||||
@classmethod
|
||||
def comicChapterDownload(cls,chapter_url):
|
||||
#漫画名
|
||||
data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=chapter_url)
|
||||
# #
|
||||
data = json.loads(data[0])
|
||||
data = data.get("props")
|
||||
x = data.get("pageProps")
|
||||
bookName = x.get("bookName")
|
||||
chapterName = x.get("chapterName")
|
||||
#fileUtils.saveConfComicChapterInfo(chapterName,x,bookName)
|
||||
#if comicInfo.nextExistsGetPath("info_"):
|
||||
# print(f"{bookName} {chapterName} info文件已存在跳过")
|
||||
comicInfo.nextSaveInfoChapter(chapterName,x)
|
||||
alias = x.get("alias")
|
||||
description = x.get("description")
|
||||
images = x.get("images")
|
||||
chapterAPIPath = x.get("chapterAPIPath")
|
||||
comicInfo.setComicName(bookName)
|
||||
comicInfo.setChapterName(chapterName)
|
||||
comicInfo.setDep(description)
|
||||
pathComicInfo = comicInfo.getPathComicInfoXML()
|
||||
if not os.path.exists(pathComicInfo):
|
||||
print("不存在ComicInfo.xml 生成中...")
|
||||
comicInfo.writeComicInfoXML(chapterName)
|
||||
|
||||
if not chapterAPIPath == None:
|
||||
base_url = comicUtils.getBaseUrl(chapter_url)
|
||||
chapterAPIUrl = base_url+chapterAPIPath
|
||||
data = htmlUtils.getJSON(chapterAPIUrl)
|
||||
data = data.get("chapter")
|
||||
chapterName = data.get("name")
|
||||
images = data.get("images")
|
||||
if images == None:
|
||||
print(f"未获取到章节图像 comic_name={bookName} chapter={chapterName}")
|
||||
totalChapter = x.get("totalChapter")
|
||||
tags = x.get("tags")
|
||||
x = tags
|
||||
print(x)
|
||||
count_image = 1
|
||||
list_img = []
|
||||
list_scramble = []
|
||||
for image in images:
|
||||
image_src = image.get("src")
|
||||
scramble = image.get("scramble")
|
||||
print("count=",count_image)
|
||||
list_img.append(image_src)
|
||||
list_scramble.append(scramble)
|
||||
print(image_src)
|
||||
print(scramble)
|
||||
count_image+=1
|
||||
print(count_image)
|
||||
print(list_img)
|
||||
wait = input("暂停查看数据y/n")
|
||||
if not wait == "y":
|
||||
exit()
|
||||
return list_img
|
||||
print(totalChapter)
|
||||
#netUtils.downloadComicChapterImages(list_img,scrambles=list_scramble)
|
||||
download_images(list_img,comicInfo.getDirComicChapter(),scrambles=list_scramble)
|
||||
Loading…
Reference in New Issue
Block a user