This commit is contained in:
caiwx86 2023-01-23 09:40:53 +08:00
parent 4f0023881e
commit 7aa548214d
15 changed files with 245 additions and 518 deletions

View File

@ -2,7 +2,7 @@ import json
from utils.HtmlUtils import htmlUtils
from utils.comic.ComicInfo import comicInfo
from utils.downloader import download_images
from utils.entity.BaseComicEntity import baseComic
from utils.base.BaseComicEntity import baseComic
class comicEntity:
@classmethod
@ -33,21 +33,20 @@ class comicEntity:
@classmethod
def oneComic(cls,c_url,sleep=None):
#漫画名
title = htmlUtils.xpathData('//div[@class="gb-inside-container"]/h1[contains(@class,"gb-headline-text")]/text()',url=c_url,update=True)
title = htmlUtils.xpathData('//div[@class="gb-inside-container"]/h1[contains(@class,"gb-headline-text")]/text()',url=c_url,update=True,num=0)
#别名
#alias = htmlUtils.xpathData('//span[contains(@class,"bookid_alias")]/text()',num=1)
#icon = htmlUtils.xpathData('//div[contains(@class,"gb-container")]/div[@class="gb-inside-container"]/img[contains(@class,"dynamic-featured-image")]/@src')
icon = htmlUtils.xpathData('//noscript/img[contains(@class,"dynamic-featured-image")]/@src')
author = htmlUtils.xpathData('//div[@class="gb-inside-container"]/div[@class="author-content"]/a/text()')
icon = htmlUtils.xpathData('//noscript/img[contains(@class,"dynamic-featured-image")]/@src',num=0)
author = htmlUtils.xpathData('//div[@class="gb-inside-container"]/div[@class="author-content"]/a/text()', num=0)
tags = htmlUtils.xpathData('//div[@class="gb-inside-container"]/div[@class="genres-content"]/a/text()')
dep = htmlUtils.xpathData('//div[@class="gb-inside-container"]/div[@class="descrip_manga_info"]//p/text()')
update_date = htmlUtils.xpathData('//div[@class="chapter-content-listing"]//a[@class="wp-manga-chapter"]//span[@class="chapter-release-date"]/i/text()')
c_url = str(c_url).replace("manga","chapterlist")+"#"
chapters = htmlUtils.xpathData('//div[@class="chapter-content-listing"]//a[@class="wp-manga-chapter"]/text()',not_eq=' ',url=c_url)
chapter_href = htmlUtils.xpathData('//div[@class="chapter-content-listing"]//a[@class="wp-manga-chapter"]/@href',url=c_url)
c_url = c_url+"#"
chapters = htmlUtils.xpathData('//div[@class="chapter-content-listing"]//a[@class="wp-manga-chapter"]/text()',not_eq=' ',url=c_url)
chapter_href = htmlUtils.xpathData('//div[@class="chapter-content-listing"]//a[@class="wp-manga-chapter"]/@href',url=c_url)
dep = htmlUtils.xpathData('//div[@class="gb-inside-container"]/div/p/text()')
dep = "".join(dep)
#update_date = htmlUtils.xpathData('//div[@class="chapter-content-listing"]//a[@class="wp-manga-chapterlist"]//span[@class="chapter-release-date"]/i/text()')
chapters_url = htmlUtils.xpathData('//div[@class="chapter-content-listing"]//a[@class="c-chapter-readmore"]/@href',url=c_url,num=0)
chapters = htmlUtils.xpathData('//div[@class="chapter-content-listing"]//a[@class="wp-manga-chapterlist"]/text()',not_eq=' ',url=chapters_url)
chapter_href = htmlUtils.xpathData('//div[@class="chapter-content-listing"]//a[@class="wp-manga-chapterlist"]/@href',url=chapters_url)
#chapter_href = htmlUtils.xpathData('//div[@class="chapter-content-listing"]//a[@class="wp-manga-chapter"]/@href',url=c_url,next_xpath='//div[@class="chapter-content-listing"]//a[@class="c-chapter-readmore"]/@href')
baseComic.oneComic(url=c_url,title=title,author=author,
icon=icon,tags=tags,dep=dep,chapters=chapters,chapter_href=chapter_href,

View File

@ -3,7 +3,7 @@ import re
from utils.HtmlUtils import htmlUtils
from utils.comic.ComicInfo import comicInfo
from utils.downloader import download_images
from utils.entity.BaseComicEntity import baseComic
from utils.base.BaseComicEntity import baseComic
class comicEntity:
@classmethod

View File

@ -2,7 +2,7 @@ import json
from utils.HtmlUtils import htmlUtils
from utils.comic.ComicInfo import comicInfo
from utils.downloader import download_images
from utils.entity.BaseComicEntity import baseComic
from utils.base.BaseComicEntity import baseComic
class comicEntity:
@classmethod

26
entity/down/BaoZi.py Normal file
View File

@ -0,0 +1,26 @@
import hashlib
import json
import os
from utils.HtmlUtils import htmlUtils
from utils.FileUtils import imageUtils
from utils.comic.ComicInfo import comicInfo
from utils.Ntfy import ntfy
class comicCommon:
@classmethod
def comicChapterDownload(cls,chapter_url):
imgs_url =htmlUtils.xpathData("//div[@class='gb-inside-container']/img/@data-src",url=chapter_url,update=True)
# pages_imgs =htmlUtils.xpathData("//div[@class='center scramble-page']/@id",url=chapter_url)
#print("img_list:",len(img_list))
list_img = []
list_file_name = []
count = 1
for i in imgs_url:
img_su = str(i).split('.')[-1]
count_image = "{:0>3d}".format(count)
list_file_name.append(count_image+"."+img_su)
list_img.append(count_image)
count += 1
comicInfo.setPages(list_img)
comicInfo.writeComicInfoXML(comicInfo.str_chapter)
comicInfo.comicChapterDownload(imgs_url,list_file_name)

32
main.py
View File

@ -1,19 +1,35 @@
import os
from utils.entity.RouMan import comicEntity
from utils.comic.PathStr import pathStr
#from utils.entity.JMTI import comicEntity
from utils.comic.ComicInfo import comicInfo
from entity.BaoZi import comicEntity as baoziEntity
from entity.RouMan import comicEntity as roumanEntity
def comics():
def roumanskip():
# comicInfo.setComicNameSkips("虐美人 1-117話")
# comicInfo.setComicNameSkips("夢遊")
# comicInfo.setComicNameSkips("療癒女孩")
comicInfo.setComicNameSkips("深度交流會")
comicInfo.setComicNameSkips("心機女教授")
comicInfo.setComicNameSkips("天降惡魔 Devil Drop デビルドロップ")
comicInfo.setComicNameSkips("穿越異世界之後救了我的人是個少年殺人犯少年暗殺者×倒黴催的姐姐順水推舟在異世界做起了愛第2話.zip")
comicInfo.setComicNameSkips("幫人家畫嘛 第二季 Cartoonists-NSFW Season2")
# comicInfo.setComicNameSkips("霸道主管要我IN")
# comicInfo.setComicNameSkips("正妹小主管")
comicInfo.setComicNameSkips("反烏托邦遊戲")
def rouman():
pathStr.setComicMainAndPath(pathStr.comic_rm)
roumanskip()
for x in range(0,52):
comicEntity.downladsComcis("https://rm01.xyz/books?&page="+str(x))
roumanEntity.downladsComcis("https://rm01.xyz/books?&page="+str(x))
def skip():
comicInfo.setComicNameSkips("")
def baozi():
pathStr.setComicMainAndPath(pathStr.comic_bz)
baoziEntity.oneComic("https://baozimh.org/manga/biaoren-xinmanhua/")
if __name__ == '__main__':
# pathStr.setComicMainPath("JM")
comics()
rouman()
# baozi()
# os.environ["http_proxy"] = "http://127.0.0.1:7890"
# os.environ["https_proxy"] = "http://127.0.0.1:7890"
# comicEntity.oneComic("https://18comic.vip/album/358870/")

View File

@ -10,12 +10,10 @@ from utils.FileUtils import fileUtils
class CBZUtils:
@classmethod
def getCBZ_Dir(cls):
return comicInfo.getNewCBZComicChapter()
def getCBZ_Dir(cls): return comicInfo.getNewCBZComicChapter()
@classmethod
def getCBZ_Path(cls):
return cls.getCBZ_Dir()+".CBZ"
def getCBZ_Path(cls): return cls.getCBZ_Dir()+".CBZ"
@classmethod
def readDirsOrFiles(cls,dir,type):

View File

@ -6,9 +6,9 @@ from PIL import Image
class imageUtils:
@classmethod
def encodeImage(cls,enStr):
print("en",enStr)
enc = base64.b64decode(enStr)
def encodeImage(cls,str_en):
print("en",str_en)
enc = base64.b64decode(str_en)
print("解密:",enc)
m = hashlib.md5()
m.update(enc)
@ -30,13 +30,13 @@ class imageUtils:
return None
file_str = str(file_path).split("=")
#10_29.jpg
baseDir = file_str[0].replace("scramble","")
baseName = file_str[-1]
baseFN = baseName.split("_")
save_name = baseFN[1]
base_dir = file_str[0].replace("scramble","")
base_name = file_str[-1]
base_fn = base_name.split("_")
save_name = base_fn[1]
save_name_delesu = save_name.split(".")[0]
blocks = int(baseFN[0])
save_file_path = os.path.join(baseDir,save_name)
blocks = int(base_fn[0])
save_file_path = os.path.join(base_dir,save_name)
print("sva",save_file_path)
if os.path.exists(save_file_path):
print("图片已解密,已跳过:", save_file_path)
@ -50,16 +50,16 @@ class imageUtils:
height = img.height
#blocks = cls.encodeImage(enStr)
print("blocks=",blocks)
blockHeight = int(height / blocks)
blockWidth = int(width / blocks)
print("blockHeight=",blockHeight)
su = str(file_path).split(".")[-1]
split_path = os.path.join(baseDir,save_name_delesu+"split")
block_height = int(height / blocks)
block_width = int(width / blocks)
print("blockHeight=",block_height)
prefix = str(file_path).split(".")[-1]
split_path = os.path.join(base_dir,save_name_delesu+"split")
if image_su == "downloads":
return None
is_split = cls.splitimage(file_path,blocks,1,split_path)
if not is_split == None:
cls.image_compose(split_path,blocks,1,save_file_path,blockHeight,width)
if is_split != None:
cls.image_compose(split_path,blocks,1,save_file_path,block_height,width)
else:
if os.path.exists(split_path):
shutil.rmtree(split_path)
@ -141,10 +141,8 @@ class imageUtils:
@classmethod
def getScrambleImage(cls,path):
scrambleFileCache = cls.scrambleImage(path)
if not scrambleFileCache == None:
if os.path.exists(scrambleFileCache):
os.remove(scrambleFileCache)
scramble_file_cache = cls.scrambleImage(path)
if scramble_file_cache != None and os.path.exists(scramble_file_cache): os.remove(scramble_file_cache)
@classmethod
def encode_scramble_image(cls,imgpath):

View File

@ -20,7 +20,7 @@ class htmlUtils:
file_url = re.sub(rstr, "", url)
except:
file_url = "error_cache"
file_path = os.path.join(pathStr.base_html_cache,file_url)
file_path = os.path.join(pathStr.base_html_cache(),file_url)
if type == "new":
return file_path
if os.path.exists(file_path):

View File

@ -7,7 +7,10 @@ from utils.downloader import download_images
from utils.downloader import download_comic_icon
from utils.Ntfy import ntfy
from utils.CBZUtils import verUtils
from utils.entity.down.RouMan import comicCommon
from entity.down.RouMan import comicCommon as RouManComicCommon
from entity.down.JM import comicCommon as JMComicCommon
from entity.down.BaoZi import comicCommon as BaoZiComicCommon
from utils.comic.PathStr import pathStr
class baseComic:
count_chapter = 0
@ -155,7 +158,16 @@ class baseComic:
@classmethod
def comicChapterDownload(cls,url):
comicCommon.comicChapterDownload(url)
comic_main = pathStr.getComicMain()
if comic_main == pathStr.comic_jm:
JMComicCommon.comicChapterDownload(url)
if comic_main == pathStr.comic_bz:
BaoZiComicCommon.comicChapterDownload(url)
if comic_main == pathStr.comic_rm:
RouManComicCommon.comicChapterDownload(url)
if comic_main == None:
print("comic_main为空退出中...")
exit()
list_img = comicInfo.getChapterListImg()
files_name = comicInfo.getChapterFilesName()
#netUtils.downloadComicChapterImages(list_img,scrambles=list_scramble)
@ -199,7 +211,7 @@ class baseComic:
is_next = verUtils.verNextCBZ(list_img)
repeat = 1
while not is_next and repeat <= 2:
download_images(list_img,comicInfo.getDirComicChapter(), filesName=files_name,timeout=20)
download_images(list_img,comicInfo.getDirComicChapter(), files_name=files_name,timeout=20)
file_imgs = os.listdir(comicInfo.getDirComicChapter())
count_jpg = ",".join(file_imgs).split(".jpg")
is_next = len(count_jpg)-1 == len(list_img)

View File

@ -6,7 +6,7 @@ import json,shutil
class comicInfo():
document = Document()
pathComicInfo = None
path_comic_info = None
root = "ComicInfo"
chapter = "Title"
@ -25,13 +25,13 @@ class comicInfo():
pages = "Pages"
web = "Web"
str_comicName = None
str_comic_name = None
str_chapter = None
str_icon = None
str_homePage = None
str_listChapter = None
str_homepage = None
str_listchapter = None
str_chapter_imgs = None
str_updateAt = None
str_update_at = None
str_date_year = None
str_date_month = None
str_date_day = None
@ -44,13 +44,13 @@ class comicInfo():
list_skip = []
chapter_node = None
comicName_node = None
comic_name_node = None
dep_node = None
author_node = None
genre_node = None
cbs_node = None
lang_node = None
comicNames_node = None
comic_names_node = None
tags_node = None
date_year_node = None
date_month_node = None
@ -69,8 +69,7 @@ class comicInfo():
return None
@classmethod
def Root(cls):
return cls.document.createElement(cls.root)
def root_node(cls): return cls.document.createElement(cls.root)
@classmethod
def setChapterName(cls,value):
@ -78,86 +77,52 @@ class comicInfo():
cls.chapter_node = cls.setNodeAndValue(cls.chapter,value)
@classmethod
def setListChapter(cls, value):
cls.str_listChapter = value
def setListChapter(cls, value): cls.str_list_chapter = value
@classmethod
def setChapterImgs(cls, value):
cls.str_chapter_imgs = value
def setChapterImgs(cls, value): cls.str_chapter_imgs = value
@classmethod
def getChapterImgs(cls):
return cls.str_chapter_imgs
def getChapterImgs(cls): return cls.str_chapter_imgs
@classmethod
def getLenChapters(cls):
return len(cls.str_listChapter)
def getLenChapters(cls): return len(cls.str_list_chapter)
@classmethod
def setComicName(cls,value):
cls.str_comicName = cls.fixFileName(value)
cls.comicName_node = cls.setNodeAndValue(cls.comic_name, value)
cls.str_comic_name = cls.fixFileName(value)
cls.comic_name_node = cls.setNodeAndValue(cls.comic_name, value)
@classmethod
def setComicNames(cls,value):
cls.comicNames_node = cls.setNodeAndValue(cls.comic_names,value)
def setComicNames(cls,value): cls.comic_names_node = cls.setNodeAndValue(cls.comic_names,value)
@classmethod
def setDep(cls,value):
cls.dep_node = cls.setNodeAndValue(cls.dep, value)
def setDep(cls,value): cls.dep_node = cls.setNodeAndValue(cls.dep, value)
@classmethod
def setAuthor(cls,value):
cls.author_node = cls.setNodeAndValue(cls.author,cls.getListToString(value))
def setAuthor(cls,value): cls.author_node = cls.setNodeAndValue(cls.author,cls.getListToString(value))
@classmethod
def setLang(cls,value):
cls.lang_node = cls.setNodeAndValue(cls.lang, value)
def setLang(cls,value): cls.lang_node = cls.setNodeAndValue(cls.lang, value)
@classmethod
def setGenre(cls,value):
cls.genre_node = cls.setNodeAndValue(cls.genre, cls.getListToString(value))
def setGenre(cls,value): cls.genre_node = cls.setNodeAndValue(cls.genre, cls.getListToString(value))
@classmethod
def setTags(cls,value):
cls.tags_node = cls.setNodeAndValue(cls.tags,cls.getListToString(value))
def setTags(cls,value): cls.tags_node = cls.setNodeAndValue(cls.tags,cls.getListToString(value))
@classmethod
def setCBS(cls,value):
cls.cbs_node = cls.setNodeAndValue(cls.cbs,value)
def setCBS(cls,value): cls.cbs_node = cls.setNodeAndValue(cls.cbs,value)
@classmethod
def setWeb(cls,value):
cls.str_web = value
cls.web_node = cls.setNodeAndValue(cls.web,value)
@classmethod
def setChapterListImg(cls,value):
cls.str_list_img=value
def setChapterListImg(cls,value): cls.str_list_img=value
@classmethod
def setValue1(cls,value):
cls.str_value1 = value
def setValue1(cls,value): cls.str_value1 = value
@classmethod
def getValue1(cls):
return cls.str_value1
def getValue1(cls): return cls.str_value1
@classmethod
def getChapterListImg(cls):
return cls.str_list_img
def getChapterListImg(cls): return cls.str_list_img
@classmethod
def setChapterFilesName(cls,value):
cls.str_files_img=value
def setChapterFilesName(cls,value): cls.str_files_img=value
@classmethod
def getChapterFilesName(cls):
return cls.str_files_img
def getChapterFilesName(cls): return cls.str_files_img
@classmethod
def getWeb(cls):
return cls.str_web
def getWeb(cls): return cls.str_web
@classmethod
def setPageCount(cls,value):
@ -196,21 +161,13 @@ class comicInfo():
return cls.str_icon
@classmethod
def setHomePage(cls, value):
cls.str_homePage = value
def setHomePage(cls, value): cls.str_homepage = value
@classmethod
def getHomePage(cls):
return cls.str_homePage
def getHomePage(cls): return cls.str_homepage
@classmethod
def setUpdateAt(cls, value):
cls.str_updateAt = value
def setUpdateAt(cls, value): cls.str_update_at = value
@classmethod
def getUpdateAt(cls):
return cls.str_updateAt
def getUpdateAt(cls): return cls.str_update_at
@classmethod
def getListToString(cls,to_list):
value = to_list
@ -219,28 +176,18 @@ class comicInfo():
return value
@classmethod
def setChapterIndex(cls,value):
cls.str_chapter_index = value
def setChapterIndex(cls,value): cls.str_chapter_index = value
@classmethod
def getChapterIndex(cls):
return cls.str_chapter_index
def getChapterIndex(cls): return cls.str_chapter_index
@classmethod
def setComicNameSkips(cls,value):
return cls.list_skip.append(value)
def setComicNameSkips(cls,value): return cls.list_skip.append(value)
@classmethod
def getIsComicNameSkips(cls,value):
return value in ",".join(cls.list_skip)
def getIsComicNameSkips(cls,value): return value in ",".join(cls.list_skip)
'''
获取网站主页
'''
@classmethod
def getBaseUrl(cls,url=None):
if url == None:
url = cls.str_homePage
url = cls.str_homepage
num = 3
index = 0
for x in range(0, num):
@ -248,65 +195,65 @@ class comicInfo():
return url[0:index-1]
@classmethod
def getIcon(cls):
return cls.str_icon
def getIcon(cls): return cls.str_icon
@classmethod
def getComicName(cls):
return cls.str_comicName
def getComicName(cls): return cls.str_comic_name
@classmethod
def getChapter(cls):
return cls.str_chapter
def getChapter(cls): return cls.str_chapter
@classmethod
def fixFileName(cls,filename,replace=None):
intab = r'[?*/\|.:><]'
str_replace = ""
if not replace == None:
if replace != None:
str_replace = replace
filename = re.sub(intab, str_replace, filename)
count = 1
while True:
str_file = filename[0-count]
if str_file == " ":
count += 1
else:
filename = filename[0:len(filename)+1-count]
break
return filename
@classmethod
def getDirConfComic(cls):
if not cls.str_comicName == None:
return os.path.join(pathStr.base_conf_path, cls.str_comicName)
if cls.str_comic_name != None:
return os.path.join(pathStr.base_conf_path(), cls.str_comic_name)
else:
print("comicName不存在退出中")
exit()
@classmethod
def getDirCBZComic(cls):
if not cls.str_comicName == None:
return os.path.join(pathStr.base_cbz, cls.str_comicName)
if cls.str_comic_name != None:
return os.path.join(pathStr.base_cbz(), cls.str_comic_name)
else:
print("comicName不存在退出中 getDirCBZComic")
exit()
@classmethod
def getDirCBZComicChapter(cls):
if cls.str_comicName != None and cls.str_chapter != None:
return os.path.join(pathStr.base_cbz,cls.str_comicName,cls.str_chapter)
if cls.str_comic_name != None and cls.str_chapter != None:
return os.path.join(pathStr.base_cbz(),cls.str_comic_name,cls.str_chapter)
else:
print("comicName不存在退出中 getDirCBZComicChapter")
exit()
@classmethod
def getSortDirCBZComicChapter(cls):
if cls.str_comicName != None and cls.str_chapter != None and cls.str_chapter_index != None:
return os.path.join(pathStr.base_cbz,cls.str_comicName,str(cls.str_chapter_index)+" "+cls.str_chapter)
if cls.str_comic_name != None and cls.str_chapter != None and cls.str_chapter_index != None:
return os.path.join(pathStr.base_cbz(),cls.str_comic_name,str(cls.str_chapter_index)+" "+cls.str_chapter)
else:
print("comicName不存在退出中 getSortDirCBZComicChapter")
return None
@classmethod
def getNewCBZComicChapter(cls,type="dir"):
return cls.getNewToComicChapter(".CBZ", type)
def getNewCBZComicChapter(cls,type="dir"): return cls.getNewToComicChapter(".CBZ", type)
@classmethod
def getNewIconComicChapter(cls,type="dir"):
return cls.getNewToComicChapter(".jpg", type)
def getNewIconComicChapter(cls,type="dir"): return cls.getNewToComicChapter(".jpg", type)
@classmethod
def getNewToComicChapter(cls,su,type="dir"):
@ -323,43 +270,40 @@ class comicInfo():
@classmethod
def getDirComic(cls):
if cls.str_comicName != None:
return os.path.join(pathStr.base_comic_img, cls.str_comicName)
if cls.str_comic_name != None:
return os.path.join(pathStr.base_comic_img(), cls.str_comic_name)
else:
print("comicName不存在退出中")
exit()
@classmethod
def getDirComicChapter(cls):
if cls.str_comicName != None and cls.str_chapter != None:
return os.path.join(pathStr.base_comic_img,cls.str_comicName,cls.str_chapter)
if cls.str_comic_name != None and cls.str_chapter != None:
return os.path.join(pathStr.base_comic_img(),cls.str_comic_name,cls.str_chapter)
else:
print("comicName与chapter 不存在,退出中")
exit()
@classmethod
def getPathComicInfoXML(cls):
try:
cls.pathComicInfo = os.path.join(pathStr.base_comic_img,cls.str_comicName,cls.str_chapter,"ComicInfo.xml")
cls.path_comic_info = os.path.join(pathStr.base_comic_img(),cls.str_comic_name,cls.str_chapter,"ComicInfo.xml")
except:
return None
return cls.pathComicInfo
return cls.path_comic_info
@classmethod
def writeComicInfoXML(cls,chapter,path=None):
if cls.chapter == cls.fixFileName(chapter):
print(f"cls.chapter {cls.chapter} 与 chapter {chapter} 不相等,已自动跳过")
root = cls.Root()
newDocument = Document()
newDocument.appendChild(root)
root = cls.root_node()
new_document = Document()
new_document.appendChild(root)
if cls.chapter_node != None: root.appendChild(cls.chapter_node)
if cls.comicName_node != None: root.appendChild(cls.comicName_node)
if cls.comic_name_node != None: root.appendChild(cls.comic_name_node)
if cls.dep_node != None: root.appendChild(cls.dep_node)
if cls.author_node != None: root.appendChild(cls.author_node)
if cls.genre_node != None: root.appendChild(cls.genre_node)
if cls.cbs_node != None: root.appendChild(cls.cbs_node)
if cls.lang_node != None: root.appendChild(cls.lang_node)
if cls.comicNames_node != None: root.appendChild(cls.comicNames_node)
if cls.comic_names_node != None: root.appendChild(cls.comic_names_node)
if cls.tags_node != None: root.appendChild(cls.tags_node)
if cls.date_year_node != None: root.appendChild(cls.date_year_node)
if cls.date_month_node != None: root.appendChild(cls.date_month_node)
@ -367,18 +311,16 @@ class comicInfo():
if cls.page_count_node != None: root.appendChild(cls.page_count_node)
if cls.pages_node != None: root.appendChild(cls.pages_node)
cls.getPathComicInfoXML()
if path != None:
cls.pathComicInfo = os.path.join(path,"ComicInfo.xml")
base_dir = os.path.dirname(cls.pathComicInfo)
if not os.path.exists(base_dir):
os.makedirs(base_dir)
if os.path.exists(cls.pathComicInfo):
if path != None: cls.path_comic_info = os.path.join(path,"ComicInfo.xml")
base_dir = os.path.dirname(cls.path_comic_info)
if not os.path.exists(base_dir): os.makedirs(base_dir)
if os.path.exists(cls.path_comic_info):
print("ComicInfo.xml 已存在")
return None
with open(cls.pathComicInfo , "w", encoding="utf-8") as fo:
newDocument.writexml(fo, indent='', addindent='\t', newl='\n', encoding="utf-8")
with open(cls.path_comic_info , "w", encoding="utf-8") as fo:
new_document.writexml(fo, indent='', addindent='\t', newl='\n', encoding="utf-8")
fo.close()
print("ComicInfo.xml 已生成 pathd=", cls.pathComicInfo)
print("ComicInfo.xml 已生成 pathd=", cls.path_comic_info)
#文件保存
@classmethod
@ -409,13 +351,13 @@ class comicInfo():
@classmethod
def nextSavePath(cls,next,data=None):
save_path = cls.getDirConfComic()+"/"+next+cls.str_chapter
if not data == None:
if data != None:
cls.file_save(save_path, data)
return save_path
@classmethod
def nextSaveInfoChapter(cls,chapter,data):
if not cls.str_chapter == chapter:
if cls.str_chapter != chapter:
print(f"chapter {cls.str_chapter}{chapter} 不一致,已自动跳过")
cls.nextSavePath("info_",data)
@ -444,8 +386,7 @@ class comicInfo():
shutil.move(cbz,done)
@classmethod
def nextDoneSave(cls,data):
cls.nextSavePath("done_",data)
def nextDoneSave(cls,data): cls.nextSavePath("done_",data)
@classmethod
def nextExistsGetPath(cls,msg,remove=False):
@ -454,31 +395,26 @@ class comicInfo():
if remove and is_exists:
os.remove(path)
return is_exists
@classmethod
def saveConfComicData(cls,fileName,data,comicName=None):
if not comicName == None:
cls.setComicName(comicName)
dirConfComic = cls.getDirConfComic()
save_path = os.path.join(dirConfComic,fileName)
cls.file_save(save_path, data)
def setConfDirComicPath(cls,file_name,comic_name=None):
if comic_name != None:
cls.setComicName(comic_name)
return os.path.join(cls.getDirConfComic(),file_name)
@classmethod
def getPathInitConfComicData(cls,fileName,comicName=None):
if not comicName == None:
cls.setComicName(comicName)
dirConfComic = cls.getDirConfComic()
save_path = os.path.join(dirConfComic,fileName)
return save_path
def saveConfComicData(cls,file_name,data,comic_name=None): cls.file_save(cls.setConfDirComicPath(file_name,comic_name), data)
@classmethod
def getPathInitConfComicData(cls,file_name,comic_name=None): return cls.setConfDirComicPath(file_name,comic_name)
@classmethod
def updateComicDate(cls,date=None):
data = {}
update_at = cls.str_updateAt
update_at = cls.str_update_at
if date != None:
update_at = date
comic_name = cls.str_comicName
update_path = pathStr.base_comic_update
comic_name = cls.str_comic_name
update_path = pathStr.base_comic_update()
update_dir = os.path.dirname(update_path)
if not os.path.exists(update_dir):
os.makedirs(update_dir)
@ -495,9 +431,9 @@ class comicInfo():
def isUpdateComic(cls):
is_update = True
data = {}
c_update_at = cls.str_updateAt
comic_name = cls.str_comicName
update_path = pathStr.base_comic_update
c_update_at = cls.str_update_at
comic_name = cls.str_comic_name
update_path = pathStr.base_comic_update()
update_dir = os.path.dirname(update_path)
if not os.path.exists(update_dir):
os.makedirs(update_dir)

View File

@ -1,22 +1,45 @@
import os,datetime
from time import strftime
class pathStr:
comic_name = None
comic_jm="JM"
comic_bz="BZ"
comic_rm="RM"
comic_url_main = None
#base_comic_out = "COMICOUT"
#base_comic_out = os.path.join("/mnt", "bigTComics","JM")
if comic_url_main != None:
base_comic_out = os.path.join("/mnt", "bigTComics", comic_url_main)
base_comic_out = os.path.join("/mnt", "bigTComics")
base_cbz = os.path.join(base_comic_out,"CBZ")
base_comic_img = os.path.join(base_comic_out,"outputComic")
base_conf_path = os.path.join(base_comic_out,".conf")
base_html_cache = os.path.join(base_comic_out,"html_cache")
base_html_chapter = os.path.join(base_comic_out,"html_updated")
base_comic_update = os.path.join(base_conf_path,"comic_update")
@classmethod
def base_cbz(cls): return cls.getBaseComicPath("CBZ")
@classmethod
def base_comic_img(cls): return cls.getBaseComicPath("outputComic")
@classmethod
def base_conf_path(cls): return cls.getBaseComicPath(".conf")
@classmethod
def base_html_cache(cls): return cls.getBaseComicPath("html_cache")
@classmethod
def base_html_chapter(cls): return cls.getBaseComicPath("html_updated")
@classmethod
def base_comic_update(cls): return cls.getBaseComicPath("comic_update")
@classmethod
def getBaseComicPath(cls,join_path): return os.path.join(cls.base_comic_out,join_path)
@classmethod
def setComicMainAndPath(cls,value):
cls.setComicMain(value)
cls.setComicMainPath(value)
@classmethod
def setComicMain(cls,value): cls.comic_name = value
@classmethod
def getComicMain(cls): return cls.comic_name
@classmethod
def setComicMainPath(cls,value):
cls.comic_url_main = value
if value != cls.comic_rm: cls.base_comic_out = os.path.join(cls.base_comic_out, value)
@classmethod
def base_html_week(cls):

View File

@ -68,7 +68,7 @@ def download_image(image_url, dst_dir, file_name, timeout=20, proxy_type=None, p
print("## Fail: {} {}".format(image_url, e.args))
def download_images(image_urls, dst_dir, file_prefix="img", concurrency=50, timeout=20, proxy_type=None, proxy=None,filesName=None):
def download_images(image_urls, dst_dir,timeout=20, proxy_type=None, proxy=None,files_name=None):
"""
Download image according to given urls and automatically rename them in order.
:param timeout:
@ -87,7 +87,7 @@ def download_images(image_urls, dst_dir, file_prefix="img", concurrency=50, time
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
for image_url in image_urls:
file_name = filesName[count]
file_name = files_name[count]
future_list.append(executor.submit(
download_image, image_url, dst_dir, file_name, timeout, proxy_type, proxy))
count += 1
@ -99,22 +99,22 @@ def download_comic_icon():
print("icon 不存在,已跳过")
return None
save_name = "cover"
icon_su = "."+str(icon_url).split(".")[-1]
icon_su = icon_su.split("?")[0]
icon_prefix = "."+str(icon_url).split(".")[-1]
icon_prefix = icon_prefix.split("?")[0]
#判断漫画名路径是否已存在comicname/cover.jpg, 存在跳过
pathComicIcon = os.path.join(comicInfo.getDirConfComic(),save_name+icon_su)
if not os.path.exists(pathComicIcon):
download(icon_url, pathComicIcon)
pathCBZComic = comicInfo.getDirCBZComic()
if not os.path.exists(pathCBZComic):
os.makedirs(pathCBZComic)
save_path = os.path.join(pathCBZComic,comicInfo.getChapter()+icon_su)
shutil.copy(pathComicIcon, save_path)
print(f"{pathComicIcon} 已复制至: {save_path}")
path_comic_icon = os.path.join(comicInfo.getDirConfComic(),save_name+icon_prefix)
if not os.path.exists(path_comic_icon):
download(icon_url, path_comic_icon)
path_cbz_comic = comicInfo.getDirCBZComic()
if not os.path.exists(path_cbz_comic):
os.makedirs(path_cbz_comic)
save_path = os.path.join(path_cbz_comic,comicInfo.getChapter()+icon_prefix)
shutil.copy(path_comic_icon, save_path)
print(f"{path_comic_icon} 已复制至: {save_path}")
comicInfo.nextDownloadToCBZChapter()
# 定义下载函数
def download(url,path,fileType=None):
def download(url,path,file_type=None):
if os.path.exists(path):
if imghdr.what(path):
msg = "已存在同路径文件,已跳过:"+path
@ -134,7 +134,7 @@ def download(url,path,fileType=None):
print(f'重试:第{repair_count}{url}')
repair_count += 1
#判断是否为图片
if fileType == "image":
if file_type == "image":
if 'image' not in res.headers.get("content-type",""):
print(f"url= {url} Error: URL doesnot appear to be an image")
basedir= os.path.dirname(path)

View File

@ -1,281 +0,0 @@
import hashlib
import json,os,time,random,shutil
import re,math
from utils.HtmlUtils import htmlUtils
from utils.FileUtils import imageUtils
from utils.comic.ComicInfo import comicInfo
from utils.CBZUtils import CBZUtils
from utils.CBZUtils import verUtils
from utils.downloader import download_images
from utils.Ntfy import ntfy
from PIL import Image
class comicEntity:
count_chapter = 0
aid = None
repeat = 0
@classmethod
def baseReUrl(cls,url):
newurl_list=""
if newurl_list:
if re.findall(r'https://(.*?)/\w+/\d+/',url)[0] not in newurl_list:
for newurl in newurl_list:
url = re.sub(re.findall(r'https://(.*?)/\w+/\d+/', url)[0], newurl, url)
return url
@classmethod
def downladsComcis(cls,url):
#漫画名
comic_href_list = htmlUtils.xpathData("//div[@class='thumb-overlay-albums']/a/@href",url,update=True)
comics_name = htmlUtils.xpathData("//span[@class='video-title title-truncate m-t-5']/text()")
len_books = len(comic_href_list)
base_url = comicInfo.getBaseUrl(url)
for x in range(0, len_books):
book_name = comics_name[x]
comicInfo.setComicName(book_name)
comic_href = base_url+comic_href_list[x]
random_int = random.randint(5,20)
dir_conf_comic = comicInfo.getDirConfComic()
if not os.path.exists(dir_conf_comic):
ntfy.sendMsg(f"{random_int}秒后开始下载 漫画:{book_name}")
time.sleep(random_int)
else:
ntfy.sendMsg(f"已存在 漫画:{book_name}")
cls.oneComic(comic_href, random.uniform(0,10))
#print(books)
#for comicHref in comicsHref:
# cls.oneComic(comicHref,random.uniform(10,20))
@classmethod
def oneComic(cls,c_url,sleep=None):
nums = htmlUtils.xpathData("//div[@class='row']/div[6]/div[1]/div[1]/ul[contains(@class,'btn-toolbar')]/a",url=c_url,update=True)
book_name = htmlUtils.xpathData("//div[@itemprop='name']/h1[@id='book-name']/text()",num=0)
book_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\\-\*\<\>]', '', book_name)
tags = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='tags']/a[@class='btn btn-sm btn-primary']/text()")
author = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='author']/a[@class='btn btn-sm btn-primary']/text()")
book_msg = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='p-t-5 p-b-5']/text()")
jmid = book_msg[0]
dep = str(book_msg[1]).replace("叙述:","")
icon = htmlUtils.xpathData("//div[@class='row']//div[@class='thumb-overlay']/img/@src",num=0)
comicInfo.setIcon(icon)
comicInfo.setHomePage(c_url)
comicInfo.setComicName(book_name)
comicInfo.setAuthor(author)
comicInfo.setDep(dep)
comicInfo.setTags(tags)
comicInfo.setCBS("韩漫")
comicInfo.setLang("zh")
albumid = re.search(r'/album/(\d+)', c_url).group(1)
referer = re.search(r'(https://\w+\.\w+)/', c_url).group(1)
if nums:
list_chapter_name = []
list_chapter_href = []
cls.count_chapter = 0
for i in nums:
photo_name_list = i.xpath("li/text()")[0].split()
photo_date = i.xpath("li/span/text()")[0].split()
#print(re.findall(r'[\u4E00-\u9FA5]+.*?', i.xpath("li/text()")[0]))
try:
if re.findall(r'[\u4E00-\u9FA5]', photo_name_list[2]):
photo_name=re.sub(r'\s','',photo_name_list[0])+' '+photo_name_list[2]
else:photo_name=re.sub(r'\s','',photo_name_list[0])
except Exception as e:
photo_name = re.sub(r'\s', '', photo_name_list[0])
photo_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\\-\*\<\>\-]', '',photo_name)
#print(photo_name)
photoid=i.attrib['data-album']
cls.aid = photoid
comicInfo.setChapterName(photo_name)
comicInfo.setDate(photo_date[0],split='-')
comicInfo.setWeb(referer+i.attrib['href'])
is_scramble = False
if int(photoid) > 220980:
is_scramble = True
if not comicInfo.nextExistsGetPath("done_"):
comicEntity.comicChapter(referer+i.attrib['href'],scramble=is_scramble,sleep=random.randint(5,15))
#存在就校验CBZ包是否完整
if comicInfo.nextExistsGetPath("done_"):
verUtils.verCBZ()
cls.count_chapter += 1
#一本漫画下载后等待
#清空文件夹
path_dir_comic = comicInfo.getDirComic()
if os.path.exists(path_dir_comic):
shutil.rmtree(path_dir_comic)
if sleep != None:
time.sleep(sleep)
'''
读取某章节下所有图片
'''
@classmethod
def comicChapter(cls,chapter_url,scramble=None,sleep=None):
is_next = True
# try:
is_next = cls.Onechapter(chapter_url,scramble)
#进入下个阶段
if comicInfo.nextExistsGetPath("down_"):
#章节图片全部下载后,调用下载封面
netUtils.downloadComicIcon()
#下个阶段
if comicInfo.nextExistsGetPath("cbz_"):
time.sleep(0.1)
#下载后自动打包
is_next = CBZUtils.packAutoComicChapterCBZ()
#完成删除原文件
remove_path = comicInfo.getDirComicChapter()
shutil.rmtree(remove_path)
print(f"文件已删除: {remove_path}")
# except:
# ntfy.sendMsg(f"{comicInfo.getComicName()} 下载出错了")
# is_next = False
#ntfy.sendMsg(f"预计总章节大小:{cls.count_chapter + 1} / "+ str(comicInfo.getLenChapters()))
if sleep != None and is_next == True:
ntfy.sendMsg(f"{sleep} 秒后开始下载下一个章节")
time.sleep(sleep)
@classmethod
def Onechapter(cls,chapter_url,scramble=None):
if not str(chapter_url).startswith("http"):
chapter_url = comicInfo.getBaseUrl() + chapter_url
try:
is_next = cls.comicChapterDownload(chapter_url,scramble)
except:
htmlUtils.remove_HtmlCache(chapter_url)
cls.repeat = 0
is_next = cls.comicChapterDownload(chapter_url,scramble)
comicInfo.nextInfoToImgChapter()
#下载完成后, 开始解密图片
if scramble:
#获取章节图片路径
chapter_dir = comicInfo.getDirComicChapter()
dirs = os.listdir(chapter_dir)
for img in dirs:
is_scramble = str(img).startswith("scramble=")
if is_scramble:
c_path = os.path.join(chapter_dir, img)
#imageUtils.getScrambleImage(c_path)
cls.encode_scramble_image(c_path)
#进入下一阶段
comicInfo.nextImgToDownloadChapter()
return is_next
@classmethod
def comicChapterDownload(cls,chapter_url,c_scramble):
img_list =htmlUtils.xpathData("//div[@class='panel-body']/div/div[contains(@class,'center')]/img/@data-original",url=chapter_url,update=True)
pages_imgs =htmlUtils.xpathData("//div[@class='center scramble-page']/@id",url=chapter_url)
comicInfo.setPages(pages_imgs)
comicInfo.writeComicInfoXML(comicInfo.str_chapter)
#print("img_list:",len(img_list))
list_img = []
list_file_name = []
for i in img_list:
img_url= i
img_name = os.path.basename(img_url).split('.')[0]
if c_scramble:
img_name = "scramble="+str(cls.get_scramble_num(cls.aid,img_name))+"_"+img_name
#path_img = "%s\\%s.jpg" % (cls.aid, img_name)
path_img = "%s.jpg" % (img_name)
list_img.append(img_url)
list_file_name.append(path_img)
comicInfo.setChapterImgs(list_img)
#保存图像
comicInfo.nextSaveInfoChapter(comicInfo.str_chapter, list_img)
is_next = verUtils.verNextCBZ(list_img)
list_shunt = ["?shunt=2","?shunt=1","?shunt=3",""]
while not is_next:
time.sleep(10)
download_images(list_img,comicInfo.getDirComicChapter(), filesName=list_file_name,timeout=1500)
file_imgs = os.listdir(comicInfo.getDirComicChapter())
count_jpg = ",".join(file_imgs).split(".jpg")
is_next = len(count_jpg)-1 == len(list_img)
cls.repeat += 1
if cls.repeat > 3:
url = list_shunt[cls.repeat % len(list_shunt)]
print("分流中=",url)
cls.comicChapterDownload(str(chapter_url).split("?")[0]+url,c_scramble)
return True
@classmethod
def get_md5(cls,num):
result1 = hashlib.md5(num.encode()).hexdigest()
print('get_md5-', result1)
return result1
@classmethod
def get_scramble_num(cls,e, t):
#print(type(e),e, type(t),t)
a = 10
try:
num_dict = {}
for i in range(10):
num_dict[i] = i * 2 + 2
if (int(e) >= 268850):
n = str(e) + t;
# switch(n=(n = (n = md5(n)).substr(-1)), n %= 10) {
#print("n=",n)
tmp = ord(cls.get_md5(n)[-1])
result = num_dict[tmp % 10]
a = result
return a
except Exception as e:
print(e.__traceback__.tb_lineno,e)
return False
@classmethod
def encode_scramble_image(cls,imgpath):
image = Image.open(imgpath)
w, h = image.size
#image.show()
file_str = str(imgpath).split("=")
#10_29.jpg
base_dir = file_str[0].replace("scramble","")
base_name = file_str[-1]
base_fn = base_name.split("_")
save_name = base_fn[1]
save_name_delesu = save_name.split(".")[0]
blocks = int(base_fn[0])
img_type = os.path.basename(imgpath).split('.')[-1]
save_path = os.path.join(os.path.dirname(imgpath),save_name_delesu+"."+img_type)
# print(type(aid),type(img_name))
if blocks:
s = blocks # 随机值
# print(s)
l = h % s # 切割最后多余的值
box_list = []
hz = 0
for i in range(s):
c = math.floor(h / s)
g = i * c
hz += c
h2 = h - c * (i + 1) - l
if i == 0:
c += l;hz += l
else:
g += l
box_list.append((0, h2, w, h - g))
# print(box_list,len(box_list))
item_width = w
# box_list.reverse() #还原切图可以倒序列表
# print(box_list, len(box_list))
newh = 0
image_list = [image.crop(box) for box in box_list]
# print(box_list)
newimage = Image.new("RGB", (w, h))
for image in image_list:
# image.show()
b_w, b_h = image.size
newimage.paste(image, (0, newh))
newh += b_h
newimage.save(save_path)
if os.path.exists(imgpath):
os.remove(imgpath)