This commit is contained in:
caiwx86 2022-12-04 22:29:04 +08:00
commit 499cb29fa3
11 changed files with 628 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
COMICOUT/
**/__pycache__/**

72
main.py Normal file
View File

@ -0,0 +1,72 @@
from utils.HtmlUtils import htmlUtils
from utils.comic.ComicStr import comicStr
from utils.FileUtils import fileUtils
import json,os
from utils.NetUtils import netUtils
from utils.ImageUtils import imageUtils
from utils.ComicUtils import comicUtils
comic_chapter_path = "COMICOUT"
def oneComic(c_url):
global comic_chapter_path
# #漫画名
data = htmlUtils.xpathData(comicStr.result,
'//script[@id="__NEXT_DATA__"]/text()',url=c_url)
# #
data = json.loads(data[0])
data = data.get("props")
data = data.get("pageProps")
print(data)
fileUtils.saveConfComicChapterInfo("1話 親子餐廳的媽媽們",data,"親子餐廳的媽媽們")
x = fileUtils.getInfoConfComicChapter("1話 親子餐廳的媽媽們","親子餐廳的媽媽們")
bookName = x.get("bookName")
alias = x.get("alias")
chapterName = x.get("chapterName")
description = x.get("description")
images = x.get("images")
chapterAPIPath = x.get("chapterAPIPath")
print(chapterAPIPath)
if not chapterAPIPath == None:
base_url = comicUtils.getBaseUrl(c_url)
chapterAPIUrl = base_url+chapterAPIPath
data = htmlUtils.getJSON(chapterAPIUrl)
data = data.get("chapter")
chapterName = data.get("name")
images = data.get("images")
print("test")
if images == None:
print("fsf")
totalChapter = x.get("totalChapter")
tags = x.get("tags")
x = tags
print(x)
count_image = 1
list_img = []
list_scramble = []
for image in images:
image_src = image.get("src")
scramble = image.get("scramble")
print("count=",count_image)
list_img.append(image_src)
list_scramble.append(scramble)
print(image_src)
print(scramble)
count_image+=1
print(count_image)
print(list_img)
print(totalChapter)
netUtils.downloadComicChapterImages(bookName,chapterName,list_img,scrambles=list_scramble)
comic_chapter_path = os.path.join("COMICOUT",bookName,chapterName)
if __name__ == '__main__':
oneComic("https://rm01.xyz/books/f08668a4-0cbc-488e-95a7-3c71de0c7a31/23")
# oneComic()
# path = "COMICOUT\好友的私生活\第1話 好友的私生活"
path = comic_chapter_path
dirs = os.listdir(path)
for dir in dirs:
isScramble = str(dir).startswith("scramble=")
if isScramble:
c_path = os.path.join(path, dir)
imageUtils.getScrambleImage(c_path)

88
utils/ComicUtils.py Normal file
View File

@ -0,0 +1,88 @@
from fake_useragent import UserAgent
import requests, os, shutil,re,json
from lxml import html
from utils.comic.ComicStr import comicStr
from utils.FileUtils import fileUtils
class comicUtils:
comic_title = None
temp_url = ""
@classmethod
def setKeyValue(cls,name, c_value,bool_list=None):
count = 0
len_value = len(c_value)
if bool_list:
count += 1
if cls.comics.get(count) == None:
cls.comics[count] = {name: c_value}
else:
cls.comics[count].update({name: c_value})
else:
for x in range(0, len_value):
value = c_value[x]
count += 1
if cls.comics.get(count) == None:
cls.comics[count] = {name: value}
else:
cls.comics[count].update({name: value})
@classmethod
def listImgDownload(cls,save_dir_name, links):
print("save_dir_name:", save_dir_name)
print("list_link:", links)
len_links = len(links)
count = 1
for link in links:
print("link:",count,":", link)
file_cout = ("{:0>3d}".format(count))
file_path = os.path.join("Comic", save_dir_name,str(file_cout)+".jpg")
netUtils.download(link, file_path)
count += 1
@classmethod
def downloadComic(cls,comic_name):
comic = fileUtils.read_comic(comic_name)
list_img = comic.get(comicStr.list_img)
len_list_img = len(list_img)
icon = comic.get(comicStr.icon)
for x in range(0, len_list_img):
chapter_img = list_img[x]
keys = chapter_img.keys()
for key in keys:
if not icon == None:
icon = netUtils.downloadComicIcon(comic_name,key,icon)
else:
print("icon已跳过")
netUtils.downloadComicChapterImages(comic_name, key, chapter_img[key])
'''
传入 comic_name读取配置文件中的各类信息
输出 "ComicInfo.xml" 并将图片打包成CBZ
'''
@classmethod
def packComicCBZ(cls,comic_name):
data = fileUtils.read_comic(comic_name)
title = data.get(comicStr.title)
author = data.get(comicStr.author)
author = str(author).replace("/",",").replace(" ","")
dep = data.get(comicStr.dep)
chapters = data.get(comicStr.chapters)
tags = "韩漫"
c_publisher = "韩漫"
print("title:", title,"author=",author,"dep=",dep,"chapters:",chapters)
for chapter in chapters:
CBZUtils.writeComicInfoXML(title, chapter, dep, author,
tags, c_publisher)
CBZUtils.packComicCBZ(title,chapter)
'''
获取网站主页
'''
@classmethod
def getBaseUrl(cls,url):
num = 3
index = 0
for x in range(0, num):
index = str(url).find("/",index)+1
return url[0:index-1]

77
utils/FileUtils.py Normal file
View File

@ -0,0 +1,77 @@
import json
import requests,os
class fileUtils:
base_path = "COMIC_OUT"
conf_path = os.path.join(base_path,".conf")
comic_path = os.path.join(base_path,".conf","comic")
comic_name = ""
#文件保存
@classmethod
def file_save(cls,path,data,mode=None):
result = {}
f = {}
dir_name = os.path.dirname(path)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
save_path = os.path.join(path)
data = json.dumps(data)
if mode == None:
mode = "w+"
try:
f = open(save_path, mode, encoding="utf-8")
f.write(data)
f.close()
print("data=",data)
result = path + "文件写入成功"
except:
result = path + "文件写入失败"
return result
@classmethod
def save_conf(cls,file, data, mode=None):
file = os.path.join(cls.conf_path,file)
cls.file_save(file,data,mode)
@classmethod
def save_comic(cls,name, data, mode=None):
print("comic_save=", name)
cls.file_save(cls.get_utl_save_comic(name),data,mode)
@classmethod
def saveConfComicChapterInfo(cls,chapter,data,comic_name=None,mode=None):
cls.file_save(cls.getPathConfComicChapterInfo(chapter,comic_name), data,mode)
@classmethod
def getPathConfComicChapterInfo(cls,chapter,comic_name=None):
if comic_name == None:
comic_name = cls.comic_name
return os.path.join(cls.comic_path,comic_name,"info_"+chapter)
@classmethod
def getInfoConfComicChapter(cls,chapter,comic_name=None):
data = None
path = cls.getPathConfComicChapterInfo(chapter,comic_name)
with open(path,"r",encoding="utf-8") as fs:
data = json.loads(fs.read())
return data
@classmethod
def read_comic(cls,name):
file = os.path.join(cls.comic_path,name)
data = None
try:
with open(file,"r",encoding="utf-8") as fs:
data = json.loads(fs.read())
except:
print("文件出错了 file=", file)
return data
'''
返回漫画保存路径
'''
@classmethod
def get_utl_save_comic(cls,name):
file = os.path.join(cls.comic_path,name)
return file

54
utils/HtmlUtils.py Normal file
View File

@ -0,0 +1,54 @@
from fake_useragent import UserAgent
import requests
from lxml import html
from utils.comic.ComicStr import comicStr
class htmlUtils:
headers = {'User-Agent': UserAgent().random}
url_data = {}
@classmethod
def getHTML(cls, curl):
rstr = r"[\/\\\:\*\?\"\<\>\|\.]" #  '/ \ : * ? " < > |'
#file_url = re.sub(rstr, "_", curl)
keys = cls.url_data.keys()
url_text = cls.url_data.get(curl)
#数据为空则获取数据
if url_text == None:
print("请求地址:",curl)
res = requests.get(curl, headers=cls.headers)
url_text = html.fromstring(res.text)
data = { curl : url_text}
cls.url_data.update(data)
return url_text
@classmethod
def getJSON(cls,curl):
res = requests.get(curl, headers=cls.headers)
data_json = res.json()
return data_json
@classmethod
def xpathData(cls,c_title, c_xpath,url=None,num=None,type=None,not_eq=None):
if url == None:
url = cls.temp_url
else:
cls.temp_url = url
result = []
#获取html实体数据
et = cls.getHTML(url)
#比对数据
count = 1
xpaths = et.xpath(c_xpath)
for x in xpaths:
if not x == not_eq:
result.append(x)
count +=1
data = {c_title : result}
if not num == None:
data = {c_title : result[num]}
if not type == None:
data = { type : result }
if c_title == comicStr.result:
data = result
return data

115
utils/ImageUtils.py Normal file
View File

@ -0,0 +1,115 @@
import base64,hashlib,os,shutil
from PIL import Image
class imageUtils:
@classmethod
def encodeImage(cls,enStr):
print("en",enStr)
enc = base64.b64decode(enStr)
print("解密:",enc)
m = hashlib.md5()
m.update(enc)
md5 = m.digest()
d = md5[-1]
print(md5)
try:
blocks = d % 10 + 5
except:
blocks = 0 %10 + 5
print("blocks=",blocks)
return blocks
@classmethod
def scrambleImage(cls,file_path):
file_str = str(file_path).split("=")
#10_29.jpg
baseDir = file_str[0].replace("scramble","")
baseName = file_str[-1]
baseFN = baseName.split("_")
save_name = baseFN[1]
save_name_delesu = baseName.split(".")[0]
blocks = int(baseFN[0])
save_file_path = os.path.join(baseDir,save_name)
print("sva",save_file_path)
if os.path.exists(save_file_path):
print("图片已解密,已跳过:", save_file_path)
return None
image_su = str(file_path).split(".")[-1]
img = Image.open(file_path)
width = img.width
height = img.height
#blocks = cls.encodeImage(enStr)
print("blocks=",blocks)
blockHeight = int(height / blocks)
blockWidth = int(width / blocks)
print("blockHeight=",blockHeight)
su = str(file_path).split(".")[-1]
split_path = os.path(baseDir,save_name_delesu+"split")
cls.splitimage(file_path,blocks,1,split_path)
cls.image_compose(split_path+"/",blocks,1,save_file_path,blockHeight,width)
#完成后清空
return file_path
@classmethod
def splitimage(cls,src,rownum,colnum,dstpath):
img=Image.open(src)
w,h=img.size
if rownum<= h and colnum<=w:
s=os.path.split(src)
if dstpath=='':
dstpath = s[0]
if not os.path.exists(dstpath):
os.makedirs(dstpath)
fn=s[1].split('.')
basename=fn[0]
ext=fn[-1]
num=0
rowheight=h//rownum
colwidth=w//colnum
for r in range(rownum):
for c in range(colnum):
box=(c*colwidth,r*rowheight,(c+1)*colwidth,(r+1)*rowheight)
file_path = os.path.join(dstpath,basename+'_'+str(num)+'.'+ext)
print("file_path=",file_path)
img.crop(box).save(file_path)
num=num+1
else:
print('不数!')
img.close
@classmethod
def image_compose(cls,src,row,column,save_path,image_height,image_width):
image_size = image_height
#image_height = 376
#image_width = 720
images_format = ['.png','.jpg']
image_names = [name for name in os.listdir(src) for item in images_format if
os.path.splitext(name)[1] == item][::-1]
# 简单的对于参数的设定和实际图片集的大小进行数量判断
if len(image_names) < row * column:
raise ValueError("合成图片的参数和要求的数量不能匹配!")
to_image = Image.new('RGB', (column * image_width, row * image_height)) #创建一个新图
# 循环遍历,把每张图片按顺序粘贴到对应位置上
for y in range(1, row + 1):
for x in range(1, column + 1):
#1 * (row=1 -1) col=1 -1
image_path = src + image_names[column * (y - 1) + x - 1]
print("split_image=",image_path)
from_image = Image.open(image_path)
#保持原图片大小
#.resize(
# (image_size, image_size),Image.ANTIALIAS)
to_image.paste(from_image, ((x - 1) * image_size, (y - 1) * image_size))
to_image.save(save_path)
print("图片合并完成:", save_path)
shutil.rmtree(src)
# 保存新图
@classmethod
def getScrambleImage(cls,path):
scrambleFileCache = cls.scrambleImage(path)
if not scrambleFileCache == None:
os.remove(str(scrambleFileCache))

86
utils/NetUtils.py Normal file
View File

@ -0,0 +1,86 @@
import os.path,shutil
import requests
from concurrent.futures import ThreadPoolExecutor,wait,ALL_COMPLETED
import time
import imghdr
from utils.comic.PathStr import pathStr
from utils.ImageUtils import imageUtils
class netUtils:
@classmethod
# 定义下载函数
def download(cls,url,path,fileType=None):
if os.path.exists(path):
if imghdr.what(path):
msg = "已存在同路径文件,已跳过:"+path
print(msg)
return msg
else:
print("文件已损坏,已重试:"+path)
tmp_file = path+".downloads"
if os.path.exists(tmp_file):
os.remove(tmp_file)
print("存在缓存文件,已删除:",tmp_file)
repair_count = 1
res = requests.get(url, stream=True)
while res.status_code != 200 and repair_count <= 5:
res = requests.get(url, stream=True)
print(f'重试:第{repair_count}{url}')
repair_count += 1
#判断是否为图片
if fileType == "image":
if 'image' not in res.headers.get("content-type",""):
print(f"url= {url} Error: URL doesnot appear to be an image")
basedir= os.path.dirname(path)
if not os.path.exists(basedir):
os.makedirs(basedir)
with open(tmp_file, 'wb') as f:
for ch in res:
f.write(ch)
f.close()
shutil.move(tmp_file, path)
print(f"url={url} 保存至:{path}")
return path
@classmethod
def threadDownload(cls,url,path,fileType=None):
executor = ThreadPoolExecutor(max_workers=3)
tasks = executor.submit(cls.download, url,path,fileType)
#wait(tasks, return_when=ALL_COMPLETED)
@classmethod
def downloadComicChapterImages(cls,comic_name, chapter_name, imgs,scrambles=None):
file_path = os.path.join(pathStr.base_comic_out, comic_name, chapter_name)
print("files=",file_path)
count_img = 1
for img in imgs:
count = ("{:0>3d}".format(count_img))
file_name = count + os.path.splitext(img)[-1]
save_file_path = os.path.join(file_path, file_name)
if scrambles[count_img -1]:
su = "."+str(img).split(".")[-1]
de_str = str(img).split("/")[-1].replace(su,"==")
blockInt = imageUtils.encodeImage(de_str)
save_file_path = os.path.join(file_path,"scramble="+str(blockInt)+"_"+file_name)
cls.threadDownload(img, save_file_path, fileType="image")
time.sleep(0.1)
count_img += 1
return os.path.dirname(save_file_path)
@classmethod
def downloadComicIcon(cls,comic_name,chapter,img):
file_su = os.path.splitext(img)[-1]
icon_name = "cover"+ file_su
save_file_path = os.path.join(cls.save_comic_img_basePath,comic_name,icon_name)
if os.path.exists(save_file_path):
"已存在,跳过下载"
return None
else:
cls.download(img,save_file_path,fileType="image")
target_dir = os.path.join(cls.CBZ_path,comic_name)
target_file = os.path.join(target_dir,chapter+file_su)
if not os.path.exists(target_dir):
os.makedirs(target_dir)
shutil.copy(save_file_path,target_file)

37
utils/comic/ComicInfo.py Normal file
View File

@ -0,0 +1,37 @@
from xml.dom.minidom import Document
document = Document()
class comicInfoXmlNode():
chapter = "Title"
comic_name = "Series"
dep = "Summary"
author = "Writer"
tags = "Genre"
cbs = "Publisher"
lang = "LanguageISO"
@classmethod
def setNodeAndValue(cls,node,value):
node = document.createElement(node)
node_text = document.createTextNode(value)
node.appendChild(node_text)
@classmethod
def setChapter(cls,value):
cls.setNodeAndValue(cls.chapter,value)
@classmethod
def setComicName(cls,value):
cls.setNodeAndValue(cls.comic_name, value)
@classmethod
def getComicInfoXML(cls):
cls.setChapter()
class comicInfo():
#输出xml
@classmethod
def writeComicInfoXML(cls,c_title,chapter,dep,author,tags="韩漫",c_publisher="韩漫",language="zh"):
#file 预期 CBZ/"comic_name"/chapter
print()

28
utils/comic/ComicStr.py Normal file
View File

@ -0,0 +1,28 @@
class comicStr:
url = "url"
xpath = "xpath"
title = "title"
icon = "icon"
author = "author"
dep = "dep"
chapters = "chapters"
homepage = "homepage"
chapter_href = "chapter_href"
list_img = "list_img"
img = "img"
last_update = "last_update"
alias = "alias"
tags = "tags"
action = "action"
chapters_href = "chapters_href"
base_url = ""
data = "data"
result = "result"
@classmethod
def setBaseUrl(cls,url):
cls.base_url = url
@classmethod
def getBaseUrl(cls):
return cls.base_url

6
utils/comic/PathStr.py Normal file
View File

@ -0,0 +1,6 @@
import os
class pathStr:
base_comic_out = "COMICOUT"
base_CBZ = os.path.join(base_comic_out,"CBZ")
base_comic_img = os.path.join(base_comic_out,"outputComic")

63
utils/entity/RouMan.py Normal file
View File

@ -0,0 +1,63 @@
import json,os
from utils.comic.ComicStr import comicStr
from utils.ComicUtils import comicUtils
from utils.FileUtils import fileUtils
class comicEntityRM:
@classmethod
def oneComic(cls,c_url):
#漫画名
title = comicUtils.xpathData(comicStr.title,
'//div[@class="col"]/h5/text()',url=c_url,num=0)
#别名
alias = comicUtils.xpathData(comicStr.alias,
'//span[contains(@class,"bookid_alias")]/text()',num=1)
icon = comicUtils.xpathData(comicStr.icon,
'//img[@class="img-thumbnail"]/@src')
author = comicUtils.xpathData(comicStr.author,
'//div[contains(@class,"bookid_bookInfo")]/p[1]/text()',num=1)
tags = comicUtils.xpathData(comicStr.tags,
'//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()')
action = comicUtils.xpathData(comicStr.action,
'//div[contains(@class,"bookid_bookInfo")]/p[2]/text()',num=1)
dep = comicUtils.xpathData(comicStr.dep,
'//div[contains(@class,"bookid_bookInfo")]/p[4]/text()',num=1)
update_date = comicUtils.xpathData(comicStr.last_update,
'//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()',num=1)
chapters = comicUtils.xpathData(comicStr.chapters,
'//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()')
chapter_href = comicUtils.xpathData(comicStr.chapter_href,
'//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/@href')
comicUtils.setComic(title,alias,icon,author,tags,action,dep,update_date,chapters,chapter_href)
#{'title': ['社區重建協會']}
#主页
#homepage = {comicStr.homepage : [c_url] }
#图片
#comicUtils.setComic(titles,homepage,icons,authors,deps,chapters, chapter_hrefs,last_update)
comicData = comicUtils.getComic()
print(comicData)
wait = input("数据暂停查看y/n")
if not wait == "y":
exit()
return comicData
'''
读取某章节下所有图片
'''
@classmethod
def comicChapter(cls,c_url,chapter):
xpath_str = '//img[contains(@class,"id_comicImage")]/@src'
not_eq = "/loading.jpg"
#章节下所有图片链接
list_img = comicUtils.xpathData(comicStr.list_img,
xpath_str,url=c_url,type=chapter,not_eq=not_eq)
print(list_img)
wait = input("暂停查看数据y/n")
if not wait == "y":
exit()
return list_img