update version

This commit is contained in:
caiwx86 2023-04-04 06:29:49 +08:00
parent 57a0ae6841
commit c62151bc71
25 changed files with 1271 additions and 1408 deletions

3
.gitignore vendored
View File

@ -1,3 +1,4 @@
COMICOUT/ COMICOUT/
.conf/ .conf/
**/__pycache__/** **/__pycache__/**
.vscode

View File

@ -1,16 +1,16 @@
import json,os,time,random,shutil import json,os,time,random,shutil
from utils.HtmlUtils import htmlUtils from utils.NetUtils import htmlUtils
from utils.FileUtils import imageUtils from utils.FileUtils import imageUtils
from utils.comic.ComicInfo import comicInfo as ci from utils.ComicUtils import CBZUtils
from utils.CBZUtils import CBZUtils from utils.NetUtils import downloadUtils
from utils.downloader import download_images from utils.ComicUtils import ntfy
from utils.downloader import download_comic_icon
from utils.Ntfy import ntfy
from entity.down.RouMan import comicCommon as RouManComicCommon
from entity.down.JM import comicCommon as JMComicCommon
from entity.down.BaoZi import comicCommon as BaoZiComicCommon
from utils.comic.PathStr import pathStr
from utils.FileUtils import fileUtils as fu from utils.FileUtils import fileUtils as fu
from domain.Domains import domains
from common.ComicInfo import ComicInfoUtils as ciUtils
from common.ComicInfo import ComicInfo as ci
from common.Comic import Comic
from common.Comic import ListComic
from common.Constant import ComicPath
class baseComic: class baseComic:
count_chapter = 0 count_chapter = 0
@ -18,28 +18,37 @@ class baseComic:
#校验该漫画是否为最新 #校验该漫画是否为最新
# Y/跳过 N/下载 返回下载链接 # Y/跳过 N/下载 返回下载链接
@classmethod @classmethod
def updateComics(cls): def updateComics(cls,chapters_xpath):
(book_name,comic_href,updated) = [ci.getComicName(),ci.getCurrentChapterImg(),ci.getUpdateAt()] comics = ListComic.getListComicsLinksUpdateAt()
try:
(book_name,comic_href,updated) = [comics[0],comics[1],comics[2]]
except:
return False
cls.updateOneComic(book_name,comic_href,updated,chapters_xpath)
return True
@classmethod
def updateOneComic(cls,book_name,comic_href,update_at,chapters_xpath):
#白名单跳过 #白名单跳过
if ci.getIsComicNameSkips(book_name): return None if ciUtils.getIsComicNameSkips(book_name): return None
if not ci.isUpdateComic(): if not ciUtils.isUpdateComic():
ntfy.sendMsg(f"开始下载 漫画:{book_name}") ntfy.sendMsg(f"开始下载 漫画:{book_name}")
return comic_href Comic.setCurrentDownLink(comic_href)
else: else:
ntfy.sendMsg(f"{book_name} 已是最新") ntfy.sendMsg(f"{book_name} 已是最新")
chapters = htmlUtils.xpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()',url=comic_href,update=False) chapters = htmlUtils.xpathData(chapters_xpath,url=comic_href,update=False)
chapter_index = 1 chapter_index = 1
for chapter in chapters: for chapter in chapters:
ci.setChapterIndex(chapter_index) Comic.setNumber(chapter_index)
ci.setChapterName(chapter) Comic.setChapter(chapter)
cbz_path = ci.getNewCBZComicChapter("file") cbz_path = ComicPath.getNewCBZComicChapter("file")
icon_path = ci.getNewIconComicChapter("file") icon_path = ComicPath.getNewIconComicChapter("file")
CBZUtils.replaceZip(cbz_path) CBZUtils.replaceZip(cbz_path)
#判断漫画是否完成 #判断漫画是否完成
if ci.isProgress(ci.PROGRESS_DONE) and not os.path.exists(cbz_path): ci.isProgress(ci.PROGRESS_DONE,remove=True) if ciUtils.isProgress(ciUtils.PROGRESS_DONE) and not os.path.exists(cbz_path): ciUtils.isProgress(ciUtils.PROGRESS_DONE,remove=True)
if not os.path.exists(cbz_path): if not os.path.exists(cbz_path):
ci.updateComicDate("0") ciUtils.updateLastDate("0")
return comic_href Comic.setCurrentDownLink(comic_href)
chapter_index = chapter_index + 1 chapter_index = chapter_index + 1
return None return None
@ -48,17 +57,21 @@ class baseComic:
ci.setComicInfo(homepage=url,comicname=title,author=author,icon=icon,tags=tags,dep=dep,genre=genre,lang=lang,age_rating=age_rating,chapters=chapters) ci.setComicInfo(homepage=url,comicname=title,author=author,icon=icon,tags=tags,dep=dep,genre=genre,lang=lang,age_rating=age_rating,chapters=chapters)
cls.count_chapter = 0 cls.count_chapter = 0
for href in chapter_href: for href in chapter_href:
ci.setChapterName(chapters[cls.count_chapter]) Comic.setChapterName(chapters[cls.count_chapter])
ci.setChapterIndex(cls.count_chapter+1) Comic.setNumber(cls.count_chapter+1)
#存在完成配置文件 但文件不存在 将清空完成配置文件 #存在完成配置文件 但文件不存在 将清空完成配置文件
if ci.isProgress(ci.PROGRESS_DONE) and not fu.exists(ci.getNewCBZComicChapter("file")): ci.isProgress(ci.PROGRESS_DONE,remove=True) if ciUtils.isProgress(ciUtils.PROGRESS_DONE) and not fu.exists(ComicPath.getNewCBZComicChapter("file")):
ciUtils.isProgress(ciUtils.PROGRESS_DONE,remove=True)
#不存在完成配置文件 则允许下载 #不存在完成配置文件 则允许下载
if not ci.isProgress(ci.PROGRESS_DONE): cls.comicChapters(href,scramble=True,sleep=random.randint(1,5)) if not ciUtils.isProgress(ciUtils.PROGRESS_DONE):
cls.comicChapters(href,scramble=True,sleep=random.randint(1,5))
cls.count_chapter += 1 cls.count_chapter += 1
#一本漫画下载后等待 #一本漫画下载后等待
#清空文件夹 #清空文件夹
if os.path.exists(ci.getDirComic()): shutil.rmtree(ci.getDirComic()) if os.path.exists(ComicPath.getDirComic()): shutil.rmtree(ComicPath.getDirComic())
if sleep != None: time.sleep(sleep) if sleep != None: time.sleep(sleep)
#完成 更新最近一次时间
ciUtils.updateLastDate()
''' '''
@ -66,16 +79,17 @@ class baseComic:
''' '''
@classmethod @classmethod
def comicChapters(cls,chapter_url,scramble=None,sleep=None): def comicChapters(cls,chapter_url,scramble=None,sleep=None):
is_next = False
#try: #try:
cls.Onechapter(chapter_url,scramble) cls.Onechapter(chapter_url,scramble)
#进入下个阶段 #进入下个阶段
#章节图片全部下载后,调用下载封面 #章节图片全部下载后,调用下载封面
if ci.isProgress(ci.PROGRESS_DOWN): download_comic_icon() if ciUtils.isProgress(ciUtils.PROGRESS_DOWN): downloadUtils.download_comic_icon()
#下个阶段 #下个阶段
if ci.isProgress(ci.PROGRESS_CBZ): is_next = CBZUtils.packAutoComicChapterCBZ() if ciUtils.isProgress(ciUtils.PROGRESS_CBZ): is_next = CBZUtils.packAutoComicChapterCBZ()
#except Exception as e: is_next = ntfy.sendMsg(f"{ci.getComicName()} 下载出错了",error=e) #except Exception as e: is_next = ntfy.sendMsg(f"{ci.getComicName()} 下载出错了",error=e)
ntfy.sendMsg(f"预计总章节大小:{cls.count_chapter + 1} / "+ str(ci.getLenChapters())) ntfy.sendMsg(f"预计总章节大小:{cls.count_chapter + 1} / "+ str(Comic.getLenChapters()))
ci.setChapterIndex(cls.count_chapter + 1) Comic.setNumber(cls.count_chapter + 1)
if sleep != None and is_next: ntfy.sendMsg(f"{sleep} 秒后开始下载下一个章节",sleep=sleep) if sleep != None and is_next: ntfy.sendMsg(f"{sleep} 秒后开始下载下一个章节",sleep=sleep)
#根据章节地址下载全部图片并将文件名scramble开头的图片进行解密 #根据章节地址下载全部图片并将文件名scramble开头的图片进行解密
@ -85,49 +99,46 @@ class baseComic:
if not str(chapter_url).startswith("http"): chapter_url = ci.getBaseUrl() + chapter_url if not str(chapter_url).startswith("http"): chapter_url = ci.getBaseUrl() + chapter_url
#下载图片 #下载图片
is_next = cls.comicChapterDownload(chapter_url) is_next = cls.comicChapterDownload(chapter_url)
ci.nextInfoToImgChapter() ciUtils.nextInfoToImgChapter()
#下载完成后, 开始解密图片 #下载完成后, 开始解密图片
chapter_dir = ci.getDirComicChapter() chapter_dir = ComicPath.getDirComicChapter()
if scramble and os.path.exists(chapter_dir): #获取章节图片路径 if scramble and os.path.exists(chapter_dir): #获取章节图片路径
dirs = os.listdir(chapter_dir) dirs = os.listdir(chapter_dir)
for img in dirs: for img in dirs:
if img.startswith("scramble="): if img.startswith("scramble="):
imageUtils.encode_scramble_image(os.path.join(chapter_dir,img)) imageUtils.encode_scramble_image(os.path.join(chapter_dir,img))
#进入下一阶段 #进入下一阶段
ci.nextImgToDownloadChapter() ciUtils.nextImgToDownloadChapter()
return is_next return is_next
@classmethod @classmethod
def comicChapterDownload(cls,url,is_next=True): def comicChapterDownload(cls,url,is_next=True):
#获取本次工程的HOME目录 #获取本次工程的HOME目录
comic_main = pathStr.getComicMain()
try: try:
if comic_main == pathStr.comic_jm: JMComicCommon.comicChapterDownload(url) domains.setdomain(url)
if comic_main == pathStr.comic_bz: BaoZiComicCommon.comicChapterDownload(url)
if comic_main == pathStr.comic_rm: RouManComicCommon.comicChapterDownload(url)
except: except:
htmlUtils.remove_HtmlCache(url) htmlUtils.remove_HtmlCache(url)
cls.comicChapterDownload(url,is_next) cls.comicChapterDownload(url,is_next)
if comic_main == None: print("comic_main为空退出中...") & exit()
(list_img,files_name,chapter_name,book_name) = [ci.getChapterListImg(),ci.getChapterFilesName(),ci.getChapter(),ci.getComicName()] (list_img,files_name,chapter_name,book_name) = [Comic.chapter_imgs,Comic.file_chapter_imgs,
Comic.chapter,Comic.comic_name]
#保存信息 #保存信息
ci.nextSaveInfoChapter(chapter_name,list_img) ciUtils.nextSaveInfoChapter(chapter_name,list_img)
#验证数据是已存在且是否完整 #验证数据是已存在且是否完整
cbz_file = ci.getNewFileCBZComicChapter() cbz_file = ComicPath.getNewFileCBZComicChapter()
#更新Icon #更新Icon
ci.getNewIconComicChapter() ComicPath.getNewIconComicChapter()
#检验CBZ文件 #检验CBZ文件
CBZUtils.verCBZComic(cbz_file) CBZUtils.verCBZComic(cbz_file)
is_next = CBZUtils.nextCBZ() is_next = CBZUtils.nextCBZ()
is_old=CBZUtils.updateOldCBZ(files_name) is_old=CBZUtils.updateOldCBZ(files_name)
#不存在ComicInfo.xml则生成 #不存在ComicInfo.xml则生成
if is_next and fu.notExists(ci.getPathComicInfoXML()): ci.writeComicInfoXML(chapter_name) if is_next and fu.notExists(ci.getPathComicInfoXML()): ci.writeComicInfoXML(chapter_name)
if is_next and not is_old: if is_next:
# ntfy.sendMsg(f"{book_name} {chapter_name} 下载中") # ntfy.sendMsg(f"{book_name} {chapter_name} 下载中")
download_images(list_img,ci.getDirComicChapter(), files_name=files_name,concurrency=None,timeout=8) downloadUtils.download_images(list_img,ComicPath.getDirComicChapter(),files_name=files_name,concurrency=None,timeout=8)
# ntfy.sendMsg("等待数据检验中...",sleep=0.5) # ntfy.sendMsg("等待数据检验中...",sleep=0.5)
is_next = fu.equImages(ci.getDirComicChapter(),list_img) is_next = fu.equImages(ComicPath.getDirComicChapter(),list_img)
# if not is_next: ntfy.sendMsg(msg=f"下载数据(不完整,{int(repeat*2)}秒钟后尝试第{repeat}次",sleep=int(repeat*2)) # if not is_next: ntfy.sendMsg(msg=f"下载数据(不完整,{int(repeat*2)}秒钟后尝试第{repeat}次",sleep=int(repeat*2))
# repeat += 1 # repeat += 1
return is_next return is_next

206
common/Comic.py Normal file
View File

@ -0,0 +1,206 @@
import json,re
from opencc import OpenCC
from queue import Queue
from utils.OldUtils import OldUtils
class Comic:
#章节名 漫画名 编号 概述 作者
(chapter,comic_name,number,dep,author) = [None,None,None,None,None]
#流派 语言 年龄分级 标签 总页数
(genre,language,agerating,tags,page_count) = [None,None,None,None,None]
#页数 出版社 年 月 日
(pages,cbs,year,month,day) = [None,None,None,None,None]
#主页 别名
(homepage,comic_names) = [None,None]
CURRENT_DOWN_LINK = None
#繁体中文转简体中文
@classmethod
def ChineseConvert(cls, text,convert='t2s'): return OpenCC(convert).convert(str(text))
#处理成符合规定的文件名
@classmethod
def fixFileName(cls,filename,replace=None):
if not isinstance(filename,str): return filename
intab = r'[?*/\|.:><]'
str_replace = ""
if replace != None: str_replace = replace
filename = re.sub(intab, str_replace, filename)
count = 1
while True:
str_file = filename[0-count]
if str_file == " ": count += 1
else:
filename = filename[0:len(filename)+1-count]
break
return filename
@classmethod
def setValue(cls,value):
if value != None: value = cls.ChineseConvert(value)
return value
@classmethod
def getValue(cls,dict,exec=None):
if exec != None: return cls.parseExec(dict,exec=exec)
return dict
@classmethod
def getChapter(cls): return cls.chapter
@classmethod
def getComicName(cls): return cls.comic_name
@classmethod
def getNumber(cls): return cls.number
#章节名
@classmethod
def setChapterName(cls,value,exec=None):
value = cls.fixFileName(cls.parseExec(value,exec=exec))
OldUtils.setOldChapter(value)
cls.chapter = cls.setValue(value)
@classmethod
def getChapterName(cls): return cls.getValue(cls.chapter)
#漫画名
@classmethod
def setComicName(cls,value,exec=None):
value = cls.fixFileName(cls.parseExec(value,exec=exec))
OldUtils.setOldComicName(value)
cls.comic_name = cls.setValue(value)
@classmethod
def getComicName(cls): return cls.getValue(cls.comic_name)
#编号
@classmethod
def setNumber(cls,value): cls.number = cls.setValue(value)
@classmethod
def getNumber(cls): return cls.getValue(cls.number)
#概述
@classmethod
def setDep(cls,value,exec=None):
cls.dep = cls.setValue(cls.parseExec(value,exec=exec))
@classmethod
def getDep(cls): return cls.getValue(cls.dep)
#作者
@classmethod
def setAuthor(cls,value): cls.author = cls.setValue(value)
@classmethod
def getAuthor(cls): return cls.getValue(cls.author)
#流派
@classmethod
def setGenre(cls,value): cls.genre = cls.setValue(value)
@classmethod
def getGenre(cls): return cls.getValue(Comic.genre)
#语言
@classmethod
def setLanguage(cls,value): cls.language = cls.setValue(value)
@classmethod
def getLanguage(cls): return cls.getValue(Comic.language)
#年龄分级
@classmethod
def setAgeRating(cls,value): cls.agerating = cls.setValue(value)
@classmethod
def getAgeRating(cls): return cls.getValue(Comic.agerating)
#标签
@classmethod
def setTags(cls,value): cls.tags = cls.setValue(value)
@classmethod
def getTags(cls): return cls.getValue(Comic.tags)
#总页数
@classmethod
def setPageCount(cls,value): cls.page_count = cls.setValue(value)
@classmethod
def getPageCount(cls): return cls.getValue(Comic.page_count)
#主页
(homepage,icon,list_chapter,chapter_imgs,
update_at,current_chapter_img,file_chapter_imgs) = [None,None,None,None,None,None,None]
@classmethod
def parseExec(cls,data,exec,item=True):
if data !=None and exec != None:
dots = str(exec).split(".")
if not isinstance(data,dict): data = json.loads(data)
for dot in dots:
data = data.get(dot)
return data
@classmethod
def setHomePage(cls,value): cls.homepage = value
@classmethod
def getHomePage(cls): return cls.homepage
@classmethod
def setIcon(cls,value): cls.icon = value
@classmethod
def getIcon(cls): return cls.icon
@classmethod
def setListChapter(cls,value): cls.list_chapter = value
@classmethod
def getListChapter(cls): return cls.list_chapter
@classmethod
def getLenChapters(cls): return len(cls.list_chapter)
@classmethod
def setChapterImgs(cls,value,exec=None,item=None): cls.chapter_imgs = cls.parseExec(value,exec=exec,item=item)
@classmethod
def getChapterImgs(cls): return cls.chapter_imgs
@classmethod
def setUpdateAt(cls,value): cls.update_at = value
@classmethod
def getUpdateAt(cls): return cls.update_at
@classmethod
def setCurrentChapterImg(cls,value): cls.current_chapter_img = value
@classmethod
def getCurrentChapterImg(cls): return cls.current_chapter_img
@classmethod
def setChapterFilesName(cls,value): cls.file_chapter_imgs= value
@classmethod
def getChapterFilesName(cls): return cls.file_chapter_imgs
@classmethod
def setCurrentDownLink(cls,value): cls.CURRENT_DOWN_LINK = value
@classmethod
def getCurrentDownLink(cls): return cls.CURRENT_DOWN_LINK
class ListComic:
LIST_COMIC_QUEUE = Queue()
(LIST_COMIC_NAME,LIST_COMIC_LINK,LIST_COMIC_UPDATEAT) = [None,None,None]
@classmethod
def setListComicsLinksUpdateAt(cls,names,links,update_at):
if isinstance(names,list) and isinstance(links,list) and isinstance(update_at,list):
for x in range(0,len(names)):
cls.LIST_COMIC_QUEUE.put([names[x],links[x],update_at[x]])
@classmethod
def getListComicsLinksUpdateAt(cls):
if cls.LIST_COMIC_NAME != None and cls.LIST_COMIC_LINK != None:
cls.setListComicsLinksUpdateAt(cls.LIST_COMIC_NAME,cls.LIST_COMIC_LINK,cls.LIST_COMIC_UPDATEAT)
(cls.LIST_COMIC_NAME,cls.LIST_COMIC_LINK,cls.LIST_COMIC_UPDATEAT) = [None,None,None]
return cls.LIST_COMIC_QUEUE.get(False)
@classmethod
def addListComicChapterLink(cls,name,link,update_at):
if name != None and link != None:
cls.LIST_COMIC_QUEUE.put(name,link,update_at)
@classmethod
def getListValue(cls,result,type,start_add=None,result_type="list"):
if result == None: return None
if type == None: return result
if result_type == "list" and type != None:
data = []
for x in range(0, len(result)):
if start_add != None:
data.append(start_add+result[x].get(type))
else:
data.append(result[x].get(type))
return data
return result
@classmethod
def setListComicName(cls,value,type=None): cls.LIST_COMIC_NAME = cls.getListValue(value,type)
@classmethod
def getListComicName(cls): return cls.LIST_COMIC_NAME
@classmethod
def setListComicChapterLink(cls,value,type=None,start_add=None): cls.LIST_COMIC_LINK = cls.getListValue(value,type,start_add)
@classmethod
def getListComicChapterLink(cls): return cls.LIST_COMIC_LINK
@classmethod
def setListComicUpdateAt(cls,value,type=None): cls.LIST_COMIC_UPDATEAT = cls.getListValue(value,type)
@classmethod
def getListComicUpdateAt(cls): return cls.LIST_COMIC_UPDATEAT
@classmethod
def getListComicChapterLink(cls): return cls.LIST_COMIC_QUEUE.get(False)
#domain end....

254
common/ComicInfo.py Normal file
View File

@ -0,0 +1,254 @@
import json,os
from xml.dom.minidom import Document
from common.Constant import pathStr
from utils.FileUtils import dbUtils
from utils.FileUtils import fileUtils
from common.Comic import Comic
from common.Constant import ComicPath
class ComicInfoEntity:
#章节名 value node child
@classmethod
def getNodes(cls):
#web [Comic.homepage,"Web"]
nodes = []
#章节名
nodes.append([Comic.chapter,"Title"])
#漫画名
nodes.append([Comic.comic_name,"Series"])
#编号
nodes.append([Comic.number,"Number"])
#别名
nodes.append([Comic.comic_names,"SeriesGroup"])
#概述
nodes.append([Comic.dep,"Summary"])
#年
nodes.append([Comic.year,"Year"])
#月
nodes.append([Comic.month,"Month"])
#日
nodes.append([Comic.day,"Day"])
#作者
nodes.append([Comic.author,"Writer"])
#出版社
nodes.append([Comic.cbs,"Publisher"])
#流派
nodes.append([Comic.genre,"Genre"])
#标签
nodes.append([Comic.tags,"Tags"])
#主页
#nodes.append([Comic.homepage,"Web"])
#总页数
nodes.append([Comic.page_count,"PageCount"])
#语言
nodes.append([Comic.language,"LanguageISO"])
#年龄分级
nodes.append([Comic.agerating,"AgeRating"])
#页码
nodes.append([Comic.pages,"Pages"])
return nodes
class ComicInfo:
COMIC_ICON_NAME = "000"
COMIC_INFO_XML = "ComicInfo.xml"
IS_NEW_ICON = False
document = Document()
path_comic_info = None
@classmethod
def parseExec(cls,data,exec,start_add=None,item=True):
if data !=None and exec != None:
dots = str(exec).split(".")
if not isinstance(data,dict): data = json.loads(data)
for dot in dots:
data = data.get(dot)
if start_add != None and data != None:
data = start_add+data
return data
@classmethod
def setNodeAndValue(cls,node,value):
if value != None:
if isinstance(value,str):
c_node = cls.document.createElement(node)
child_node = cls.document.createTextNode(value)
c_node.appendChild(child_node)
return c_node
else: return value
return None
#页数
@classmethod
def setPages(cls,values):
if values != None and isinstance(values,list):
suffix = "."+str(values[0]).split(".")[-1]
join_list=",".join(values).replace(suffix,"")
values = join_list.split(",")
Comic.setPageCount(len(values)+1 if cls.IS_NEW_ICON else len(values))
root_node = cls.document.createElement("Pages")
if cls.IS_NEW_ICON:
#添加封面
icon_node = cls.document.createElement("Page")
icon_node.setAttribute("Image",cls.COMIC_ICON_NAME)
icon_node.setAttribute("Type","FrontCover")
root_node.appendChild(icon_node)
for page in values:
c_node = cls.document.createElement("Page")
page = page.split("_")[-1]
c_node.setAttribute("Image",page)
root_node.appendChild(c_node)
Comic.pages = root_node
@classmethod
def getBaseUrl(cls,url=None):
if url == None:
url = Comic.homepage
(num,index) = [3,0]
for x in range(0, num):
index = str(url).find("/",index)+1
return url[0:index-1]
@classmethod
def getPathComicInfoXML(cls):
try:
cls.path_comic_info = os.path.join(pathStr.base_comic_img(),
Comic.comic_name,Comic.chapter, cls.COMIC_INFO_XML)
except:
return None
return cls.path_comic_info
#XML根文档
@classmethod
def root_node(cls,root_value): return cls.document.createElement(root_value)
@classmethod
def add_nodes(cls,root,list_value):
if len(list_value) == 0: return list_value
for value in list_value:
#Comic.chapter
if value[0] != None: root.appendChild(cls.setNodeAndValue(value[1],value[0]))
@classmethod
def writeComicInfoXML(cls,chapter=None,path=None,overlay=False):
root = cls.root_node("ComicInfo")
new_document = Document()
new_document.appendChild(root)
cls.add_nodes(root,ComicInfoEntity.getNodes())
cls.getPathComicInfoXML()
if path != None: cls.path_comic_info = os.path.join(path,cls.COMIC_INFO_XML)
base_dir = os.path.dirname(cls.path_comic_info)
if not os.path.exists(base_dir): os.makedirs(base_dir)
if os.path.exists(cls.path_comic_info) and not overlay:
print(f"{cls.COMIC_INFO_XML} 已存在")
return None
with open(cls.path_comic_info , "w", encoding="utf-8") as fo:
new_document.writexml(fo, indent='', addindent='\t', newl='\n', encoding="utf-8")
fo.close()
print(f"{cls.COMIC_INFO_XML} 已生成 pathd=", cls.path_comic_info)
@classmethod
def setComicInfo(cls,comicname=None,homepage=None,alias=None,author=None,icon=None,tags=None,
dep=None,genre=None,lang=None,age_rating=None,chapters=None,update_at=None,current_chapter_img=None):
author = ",".join(set(str(str(author).replace("&",",").replace(" ",",")).split(",")))
Comic.setHomePage(homepage)
Comic.setIcon(icon)
Comic.setListChapter(chapters)
Comic.setUpdateAt(update_at)
Comic.setComicName(str(comicname))
#if alias != None: comicInfo.setComicNames(alias)
Comic.setAuthor(author)
Comic.setTags(tags)
Comic.setDep(dep)
#comicInfo.setCBS("韩漫")
if genre != None: Comic.setGenre(genre)
Comic.setLanguage(lang)
Comic.setAgeRating(age_rating)
Comic.setCurrentChapterImg(current_chapter_img)
class ComicInfoUtils:
PROGRESS_INFO = "info"
PROGRESS_DOWN = "download"
PROGRESS_IMG = "download"
PROGRESS_CBZ = "cbz"
PROGRESS_DONE = "done"
PROGRESS_NONE = "none"
IS_NEW_ICON = False
list_skip = []
@classmethod
def getListToString(cls,to_list):
value = to_list
if isinstance(to_list,list):
value = ",".join(to_list)
return value
@classmethod
def setComicNameSkips(cls,value): return cls.list_skip.append(value)
@classmethod
def getIsComicNameSkips(cls,value): return value in ",".join(cls.list_skip)
@classmethod
def nextSavePath(cls,next,data=None):
save_path = ComicPath.getDirConfComic()+"/"+next+Comic.getChapterName()
if data != None: fileUtils.file_save(save_path, data)
return save_path
@classmethod
def nextSaveInfoChapter(cls,chapter,data=None):
if data == None: data = Comic.getChapterImgs()
if Comic.getChapterName() != chapter:
print(f"chapter {Comic.getChapterName()}{chapter} 不一致,已自动跳过")
cls.setProgress(cls.PROGRESS_INFO)
cls.nextSavePath("info_",data)
@classmethod
def nextInfoToImgChapter(cls): cls.setProgress(cls.PROGRESS_IMG)
@classmethod
def nextImgToDownloadChapter(cls): cls.setProgress(cls.PROGRESS_DOWN)
@classmethod
def nextDownloadToCBZChapter(cls): cls.setProgress(cls.PROGRESS_CBZ)
@classmethod
def nextCBZToDoneChapter(cls): cls.setProgress(cls.PROGRESS_DONE)
@classmethod
def nextDoneSave(cls,data): cls.nextSavePath("done_",data)
@classmethod
def setProgress(cls,progress):
dbUtils.setComic(Comic.getChapterName(),progress,Comic.getComicName())
@classmethod
def isProgress(cls,progress,remove=None):
if remove: cls.setProgress("None")
return dbUtils.query(Comic.getChapter(),progress,Comic.getComicName())
@classmethod
def iconDB(cls): dbUtils.setComic(Comic.getComicName(),Comic.getIcon(),"icons")
@classmethod
def equIcon(cls): return dbUtils.query(Comic.getComicName(),Comic.getIcon(),"icons")
@classmethod
def setConfDirComicPath(cls,file_name,comic_name=None):
if comic_name != None: Comic.setComicName(comic_name)
return os.path.join(ComicPath.getDirConfComic(),file_name)
@classmethod
def saveConfComicData(cls,file_name,data,comic_name=None): fileUtils.file_save(cls.setConfDirComicPath(file_name,comic_name), data)
@classmethod
def getPathInitConfComicData(cls,file_name,comic_name=None): return cls.setConfDirComicPath(file_name,comic_name)
@classmethod
def updateLastDate(cls,date=None):
update_at = Comic.getUpdateAt()
if date != None: update_at = date
dbUtils.setComic(Comic.getComicName(), update_at, "update")
@classmethod
def isUpdateComic(cls):
return dbUtils.query(Comic.getComicName(), Comic.getUpdateAt(),"update")
@classmethod
def comicChapterDownload(cls,imgs,names):
Comic.setChapterImgs(imgs)
#Comic.setChapterListImg(imgs)
ComicInfo.setPages(names)
Comic.setChapterFilesName(names)

133
common/Constant.py Normal file
View File

@ -0,0 +1,133 @@
import os,datetime,shutil
from time import strftime
from common.Comic import Comic
class pathStr:
comic_name = None
comic_jm="JM"
comic_bz="BZ"
comic_rm="RM"
comic_url_main = None
base_comic_out = os.path.join("/mnt", "Comics")
old_cbz_path = os.path.join("/mnt","OldComics")
@classmethod
def base_cbz(cls): return cls.getBaseComicPath("CBZ")
@classmethod
def base_comic_img(cls): return cls.getBaseComicPath("outputComic")
@classmethod
def base_conf_path(cls): return cls.getBaseComicPath(".conf")
@classmethod
def base_html_cache(cls): return cls.getBaseComicPath("html_cache")
@classmethod
def base_html_chapter(cls): return cls.getBaseComicPath("html_updated")
@classmethod
def base_comic_update(cls): return cls.getBaseComicPath("comic_update")
@classmethod
def base_db(cls): return cls.getBaseComicPath("db")
@classmethod
def getBaseUrl(cls,url=None):
if url == None:
url = Comic.homepage
num = 3
index = 0
for x in range(0, num):
index = str(url).find("/",index)+1
return url[0:index-1]
@classmethod
def getBaseComicPath(cls,join_path): return os.path.join(cls.base_comic_out,join_path)
@classmethod
def setComicMainAndPath(cls,value):
cls.setComicMain(value)
cls.setComicMainPath(value)
@classmethod
def setComicMain(cls,value): cls.comic_name = value
@classmethod
def getComicMain(cls): return cls.comic_name
@classmethod
def setComicMainPath(cls,value):
#if value != cls.comic_rm: cls.base_comic_out = os.path.join(cls.base_comic_out, value)
cls.base_comic_out = os.path.join(cls.base_comic_out, value)
@classmethod
def base_html_week(cls):
date_path = cls.getDatePath()
return os.path.join(cls.base_comic_out,"html_"+str(date_path))
@classmethod
def getDatePath(cls):
date = datetime.datetime.now()
year = int(date.strftime("%Y"))
month = int(date.strftime("%m"))
day = int(date.strftime("%d"))
week = cls.get_week_of_month(year, month, day)
return f"{year}{month}{week}"
@classmethod
def get_week_of_month(cls, year, month, day):
begin = int(datetime.date(year, month, 1).strftime("%W"))
end = int(datetime.date(year, month, day).strftime("%W"))
week = "{:0>2d}".format(end - begin + 1)
return week
class ComicPath:
#顶级路径
@classmethod
def setJoinPathDir(cls,path,dir="",prefix=None):
result = dir
if isinstance(path,dict) or isinstance(path,list):
for x in path:
result = os.path.join(result,x)
else: result = os.path.join(result,path)
if prefix != None: result += "."+prefix
return result
@classmethod
def setDirConf(cls,path,prefix=None): return cls.setJoinPathDir(path,pathStr.base_conf_path(),prefix=prefix)
@classmethod
def setDirCBZ(cls,path,prefix=None): return cls.setJoinPathDir(path,pathStr.base_cbz(),prefix=prefix)
@classmethod
def setDirImg(cls,path,prefix=None): return cls.setJoinPathDir(path,pathStr.base_comic_img(),prefix=prefix)
#漫画配置文件路径
@classmethod
def getDirConfComic(cls): return cls.setDirConf(Comic.comic_name)
#漫画CBZ路径
@classmethod
def getDirCBZComic(cls): return cls.setDirCBZ(Comic.comic_name)
#漫画章节CBZ路径
@classmethod
def getDirCBZComicChapter(cls): return cls.setDirCBZ([Comic.comic_name,Comic.chapter])
#排序
@classmethod
def getSortDirCBZComicChapter(cls): return cls.setDirCBZ([Comic.comic_name],str(Comic.number)+" "+Comic.chapter)
@classmethod
def getNewCBZComicChapter(cls,type="dir"): return cls.getNewToComicChapter(".CBZ", type)
@classmethod
def getNewIconComicChapter(cls,type="dir"): return cls.getNewToComicChapter(".jpg", type)
@classmethod
def getNewFileCBZComicChapter(cls,type="file"): return cls.getNewToComicChapter(".CBZ", type)
@classmethod
def getNewFileIconComicChapter(cls,type="file"): return cls.getNewToComicChapter(".jpg", type)
@classmethod
def getNewToComicChapter(cls,su,type="dir"):
c_dir = cls.getDirCBZComicChapter()
s_dir = cls.getSortDirCBZComicChapter()
c_path = cls.getDirCBZComicChapter()+su
s_path = cls.getSortDirCBZComicChapter()+su
if os.path.exists(s_path) and s_path != None:
shutil.move(s_path, c_path)
print("文件已移动至:", c_path)
if type == "file":
return c_path
return c_dir
@classmethod
def getDirComic(cls): return cls.setDirImg(Comic.comic_name)
@classmethod
def getDirComicChapter(cls): return cls.setJoinPathDir(Comic.chapter,cls.getDirComic())

View File

@ -1,8 +1,7 @@
import json import json
from utils.HtmlUtils import htmlUtils from utils.NetUtils import htmlUtils
from utils.comic.ComicInfo import comicInfo from common.ComicInfo import ComicInfoUtils as ciUtils
from utils.downloader import download_images from common.BaseComicEntity import baseComic
from utils.base.BaseComicEntity import baseComic
class comicEntity: class comicEntity:
@classmethod @classmethod
@ -19,7 +18,7 @@ class comicEntity:
x = cls.baseComicData(url) x = cls.baseComicData(url)
books = x.get("books") books = x.get("books")
len_books = len(books) len_books = len(books)
base_url = comicInfo.getBaseUrl(url) base_url = ciUtils.getBaseUrl(url)
for x in range(0, len_books): for x in range(0, len_books):
book = books[x] book = books[x]
book_id = book.get("id") book_id = book.get("id")
@ -28,7 +27,7 @@ class comicEntity:
comic_href = base_url+"/books/"+book_id comic_href = base_url+"/books/"+book_id
href = baseComic.downladsComcis(book_name=book_name,comic_href=comic_href,updated=updated) href = baseComic.downladsComcis(book_name=book_name,comic_href=comic_href,updated=updated)
cls.oneComic(href) cls.oneComic(href)
comicInfo.updateComicDate() ciUtils.updateComicDate()
@classmethod @classmethod
def oneComic(cls,c_url,sleep=None): def oneComic(cls,c_url,sleep=None):

11
domain/Domains.py Normal file
View File

@ -0,0 +1,11 @@
from domain.down.Baozi import DomainDown as baozi
from domain.down.RouMan import DomainDown as rouman
from common.Constant import pathStr
class domains:
@classmethod
def setdomain(cls,url):
comic_main = pathStr.getComicMain()
if comic_main == pathStr.comic_bz: baozi.comicChapterDownload(url)
if comic_main == pathStr.comic_rm: rouman.comicChapterDownload(url)
if comic_main == None: print("comic_main为空退出中...") & exit()

47
domain/RouMan.py Normal file
View File

@ -0,0 +1,47 @@
from common.Constant import pathStr
from common.Comic import ListComic
from common.Comic import Comic
from common.BaseComicEntity import baseComic
from utils.NetUtils import htmlUtils
class comicEntity:
@classmethod
def downladsComcis(cls,url):
str_xpath='//script[@id="__NEXT_DATA__"]/text()'
str_exec="props.pageProps.books"
books = htmlUtils.setXpathData(url,xpath=str_xpath,num=0,exec=str_exec)
#comic_names = htmlUtils.setXpathData(url,xpath=str_xpath,num=0,exec=str_exec,result_type="list",type="name")
#chapter_links = htmlUtils.setXpathData(url,xpath=str_xpath,num=0,exec=str_exec,result_type="list",type="id"
# ,start_add=pathStr.getBaseUrl(url)+"/books/")
#update_at= htmlUtils.setXpathData(url,xpath=str_xpath,num=0,exec=str_exec,result_type="list",type="updateAt")
#ciUtils.setListComicsLinksUpdateAt(comic_names,chapter_links,update_at)
ListComic.setListComicName(books,"name")
ListComic.setListComicChapterLink(books,"id",start_add=pathStr.getBaseUrl(url)+"/books/")
ListComic.setListComicUpdateAt(books,"updateAt")
return baseComic.updateComics(chapters_xpath='//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()')
@classmethod
def oneComic(cls,sleep=None):
c_url = Comic.getCurrentDownLink()
if c_url == None: return None
title = htmlUtils.getXpathData('//div[@class="col"]/h5/text()',url=c_url,num=0,update=True)
#别名
#alias = htmlUtils.xpathData('//span[contains(@class,"bookid_alias")]/text()',num=1)
icon = htmlUtils.getXpathData('//img[@class="img-thumbnail"]/@src',num=0)
author = htmlUtils.getXpathData('//div[contains(@class,"bookid_bookInfo")]/p[1]/text()',num=1)
tags = htmlUtils.getXpathData('//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()',num=0)
action = htmlUtils.getXpathData('//div[contains(@class,"bookid_bookInfo")]/p[2]/text()',num=1)
dep = htmlUtils.getXpathData('//div[contains(@class,"bookid_bookInfo")]/p[4]/text()',num=1)
update_date = htmlUtils.getXpathData('//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()',num=1)
chapters = htmlUtils.getXpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()')
chapter_href = htmlUtils.getXpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/@href')
baseComic.oneComic(url=c_url,title=title,author=author,
icon=icon,tags=tags,dep=dep,chapters=chapters,chapter_href=chapter_href,
genre="韩漫",age_rating="R18+")
@classmethod
def start(cls,url):
pathStr.setComicMainAndPath(pathStr.comic_rm)
cls.downladsComcis(url)
cls.oneComic()

View File

@ -1,12 +1,9 @@
import hashlib from common.ComicInfo import ComicInfoUtils as ciUtils
import json from common.ComicInfo import ComicInfo as ci
import os from common.Comic import Comic
from utils.HtmlUtils import htmlUtils from utils.NetUtils import htmlUtils
from utils.FileUtils import imageUtils
from utils.comic.ComicInfo import comicInfo
from utils.Ntfy import ntfy
class comicCommon: class DomainDown:
@classmethod @classmethod
def comicChapterDownload(cls,chapter_url): def comicChapterDownload(cls,chapter_url):
imgs_url =htmlUtils.xpathData("//div[@class='gb-inside-container']/img/@data-src",url=chapter_url,update=True) imgs_url =htmlUtils.xpathData("//div[@class='gb-inside-container']/img/@data-src",url=chapter_url,update=True)
@ -21,6 +18,6 @@ class comicCommon:
list_file_name.append(count_image+"."+img_su) list_file_name.append(count_image+"."+img_su)
list_img.append(count_image) list_img.append(count_image)
count += 1 count += 1
comicInfo.setPages(list_img) ci.setPages(list_img)
comicInfo.writeComicInfoXML(comicInfo.str_chapter) ci.writeComicInfoXML(Comic.getChapterName())
comicInfo.comicChapterDownload(imgs_url,list_file_name) ciUtils.comicChapterDownload(imgs_url,list_file_name)

44
domain/down/RouMan.py Normal file
View File

@ -0,0 +1,44 @@
from common.ComicInfo import ComicInfo as ci
from common.ComicInfo import Comic
from common.ComicInfo import ComicInfoUtils as ciUtils
from common.Constant import pathStr
from utils.FileUtils import imageUtils
from utils.NetUtils import htmlUtils
from utils.ComicUtils import ntfy
class DomainDown:
@classmethod
def comicChapterDownload(cls,chapter_url):
str_xpath='//script[@id="__NEXT_DATA__"]/text()'
str_exec="props.pageProps"
book = htmlUtils.setXpathData(chapter_url,xpath=str_xpath,num=0,exec=str_exec)
Comic.setComicName(book,"bookName")
Comic.setChapterName(book,"chapterName")
#alias = x.get("alias")
Comic.setDep(book,"description")
images = Comic.getValue(book,"images")
chapter_api_url = ci.parseExec(book,"chapterAPIPath",start_add=pathStr.getBaseUrl(chapter_url))
if chapter_api_url != None:
ntfy.sendMsg(f"chapterApiUrl= {chapter_api_url}",alert=False)
data = htmlUtils.getJSON(chapter_api_url,update=True)
if data != None:
Comic.setChapterName(data,"chapter.name")
images = Comic.getValue(data,"chapter.images")
if len(images) == 0:
ntfy.sendMsg(f"未获取到章节图像 comic_name={Comic.getComicName()} chapter={Comic.getChapterName()}")
count = 1
list_img,list_file_name = [[],[]]
for image in images:
(image_src,scramble) = [image.get("src"),image.get("scramble")]
count_image = "{:0>3d}".format(count)
list_img.append(image_src)
image_src_prefix = "."+str(image_src).split(".")[-1]
if scramble:
de_str = str(image_src).split("/")[-1].replace(image_src_prefix,"==")
blocks_num = imageUtils.encodeImage(de_str)
count_image = "scramble="+str(blocks_num)+"_"+count_image
list_file_name.append(count_image+image_src_prefix)
count+=1
ciUtils.comicChapterDownload(list_img,list_file_name)

View File

@ -1,74 +0,0 @@
import json
import re
from utils.HtmlUtils import htmlUtils
from utils.comic.ComicInfo import comicInfo
from utils.downloader import download_images
from utils.base.BaseComicEntity import baseComic
class comicEntity:
@classmethod
def baseComicData(cls,url,update=False):
data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url,update=update)
data = json.loads(data[0])
data = data.get("props")
x = data.get("pageProps")
return x
@classmethod
def downladsComcis(cls,url):
#漫画名
x = cls.baseComicData(url)
books = x.get("books")
len_books = len(books)
base_url = comicInfo.getBaseUrl(url)
for x in range(0, len_books):
book = books[x]
book_id = book.get("id")
book_name = book.get("name")
updated = book.get("updatedAt")
comic_href = base_url+"/books/"+book_id
href = baseComic.downladsComcis(book_name=book_name,comic_href=comic_href,updated=updated)
cls.oneComic(href)
comicInfo.updateComicDate()
@classmethod
def oneComic(cls,c_url,sleep=None):
nums = htmlUtils.xpathData("//div[@class='row']/div[6]/div[1]/div[1]/ul[contains(@class,'btn-toolbar')]/a",url=c_url,update=True)
book_name = htmlUtils.xpathData("//div[@itemprop='name']/h1[@id='book-name']/text()",num=0)
title = re.sub(r'[\\\/\|\(\)\~\?\.\:\\-\*\<\>]', '', book_name)
tags = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='tags']/a[@class='btn btn-sm btn-primary']/text()")
author = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='author']/a[@class='btn btn-sm btn-primary']/text()")
book_msg = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='p-t-5 p-b-5']/text()")
jmid = book_msg[0]
dep = str(book_msg[1]).replace("叙述:","")
icon = htmlUtils.xpathData("//div[@class='row']//div[@class='thumb-overlay']/img/@src",num=0)
referer = re.search(r'(https://\w+\.\w+)/', c_url).group(1)
if nums:
list_chapter_name = []
list_chapter_href = []
list_chapter_update = []
cls.count_chapter = 0
for i in nums:
photo_name_list = i.xpath("li/text()")[0].split()
photo_date = i.xpath("li/span/text()")[0].split()
#print(re.findall(r'[\u4E00-\u9FA5]+.*?', i.xpath("li/text()")[0]))
try:
if re.findall(r'[\u4E00-\u9FA5]', photo_name_list[2]):
photo_name=re.sub(r'\s','',photo_name_list[0])+' '+photo_name_list[2]
else:photo_name=re.sub(r'\s','',photo_name_list[0])
except Exception as e:
photo_name = re.sub(r'\s', '', photo_name_list[0])
photo_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\\-\*\<\>\-]', '',photo_name)
#print(photo_name)
photoid=i.attrib['data-album']
cls.aid = photoid
comicInfo.setValue1(cls.aid)
list_chapter_name.append(photo_name)
list_chapter_href.append(referer+i.attrib['href'])
list_chapter_update.append(photo_date[0])
baseComic.oneComic(url=c_url,title=title,author=author,
icon=icon,tags=tags,dep=dep,chapters=list_chapter_name,chapter_href=list_chapter_href,
alias=None)

View File

@ -1,42 +0,0 @@
import json
from utils.HtmlUtils import htmlUtils
from utils.comic.ComicInfo import comicInfo
from utils.downloader import download_images
from utils.base.BaseComicEntity import baseComic
class comicEntity:
@classmethod
def booksJson(cls,url,update=False):
data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url,update=update)
return json.loads(data[0]).get("props").get("pageProps").get("books")
@classmethod
def downladsComcis(cls,url):
#漫画名
books = cls.booksJson(url,update=True)
for x in range(0, len(books)):
comicInfo.setComicInfo(comicname=books[x].get("name"),
current_chapter_img=comicInfo.getBaseUrl(url)+"/books/"+books[x].get("id"),
update_at=books[x].get("updatedAt"))
cls.oneComic(baseComic.updateComics())
@classmethod
def oneComic(cls,c_url,sleep=None,date=comicInfo.getUpdateAt()):
if c_url == None: return None
title = htmlUtils.xpathData('//div[@class="col"]/h5/text()',url=c_url,num=0,update=True)
#别名
#alias = htmlUtils.xpathData('//span[contains(@class,"bookid_alias")]/text()',num=1)
icon = htmlUtils.xpathData('//img[@class="img-thumbnail"]/@src',num=0)
author = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[1]/text()',num=1)
tags = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()',num=0)
action = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[2]/text()',num=1)
dep = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[4]/text()',num=1)
update_date = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()',num=1)
chapters = htmlUtils.xpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()')
chapter_href = htmlUtils.xpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/@href')
baseComic.oneComic(url=c_url,title=title,author=author,
icon=icon,tags=tags,dep=dep,chapters=chapters,chapter_href=chapter_href,
genre="韩漫",age_rating="R18+")
comicInfo.updateComicDate(date=date)

View File

@ -1,108 +0,0 @@
import hashlib
import json
import os
from utils.HtmlUtils import htmlUtils
from utils.FileUtils import imageUtils
from utils.comic.ComicInfo import comicInfo
from utils.Ntfy import ntfy
class comicCommon:
@classmethod
def comicChapterDownload(cls,chapter_url):
chapter_url = chapter_url+"?shunt=2"
img_list =htmlUtils.xpathData("//div[@class='panel-body']/div/div[contains(@class,'center scramble-page')]/img/@data-original",url=chapter_url,update=True)
pages_imgs =htmlUtils.xpathData("//div[@class='center scramble-page']/@id",url=chapter_url)
comicInfo.setPages(pages_imgs)
comicInfo.writeComicInfoXML(comicInfo.str_chapter)
#print("img_list:",len(img_list))
list_img = []
list_file_name = []
for i in img_list:
img_url= i
img_name = os.path.basename(img_url).split('.')[0]
aid = int(comicInfo.getValue1())
if aid > 220980:
#if is_scramble:
img_name = "scramble="+str(cls.get_scramble_num(aid,img_name))+"_"+img_name
#path_img = "%s\\%s.jpg" % (cls.aid, img_name)
path_img = "%s.jpg" % (img_name)
list_img.append(img_url)
list_file_name.append(path_img)
comicInfo.comicChapterDownload(list_img,list_file_name)
@classmethod
def get_md5(cls,num):
result1 = hashlib.md5(num.encode()).hexdigest()
print('get_md5-', result1)
return result1
@classmethod
def get_scramble_num(cls,e, t):
#print(type(e),e, type(t),t)
a = 10
try:
num_dict = {}
for i in range(10):
num_dict[i] = i * 2 + 2
if (int(e) >= 268850):
n = str(e) + t;
# switch(n=(n = (n = md5(n)).substr(-1)), n %= 10) {
#print("n=",n)
tmp = ord(cls.get_md5(n)[-1])
result = num_dict[tmp % 10]
a = result
return a
except Exception as e:
print(e.__traceback__.tb_lineno,e)
return False
@classmethod
def encode_scramble_image(cls,imgpath):
image = Image.open(imgpath)
w, h = image.size
#image.show()
file_str = str(imgpath).split("=")
#10_29.jpg
base_dir = file_str[0].replace("scramble","")
base_name = file_str[-1]
base_fn = base_name.split("_")
save_name = base_fn[1]
save_name_delesu = save_name.split(".")[0]
blocks = int(base_fn[0])
img_type = os.path.basename(imgpath).split('.')[-1]
save_path = os.path.join(os.path.dirname(imgpath),save_name_delesu+"."+img_type)
# print(type(aid),type(img_name))
if blocks:
s = blocks # 随机值
# print(s)
l = h % s # 切割最后多余的值
box_list = []
hz = 0
for i in range(s):
c = math.floor(h / s)
g = i * c
hz += c
h2 = h - c * (i + 1) - l
if i == 0:
c += l;hz += l
else:
g += l
box_list.append((0, h2, w, h - g))
# print(box_list,len(box_list))
item_width = w
# box_list.reverse() #还原切图可以倒序列表
# print(box_list, len(box_list))
newh = 0
image_list = [image.crop(box) for box in box_list]
# print(box_list)
newimage = Image.new("RGB", (w, h))
for image in image_list:
# image.show()
b_w, b_h = image.size
newimage.paste(image, (0, newh))
newh += b_h
newimage.save(save_path)
if os.path.exists(imgpath):
os.remove(imgpath)

View File

@ -1,58 +0,0 @@
import json
from utils.HtmlUtils import htmlUtils
from utils.FileUtils import imageUtils
from utils.comic.ComicInfo import comicInfo
from utils.Ntfy import ntfy
class comicCommon:
@classmethod
def baseComicData(cls,url,update=False):
data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url,update=update)
data = json.loads(data[0])
data = data.get("props")
x = data.get("pageProps")
return x
@classmethod
def comicChapterDownload(cls,chapter_url):
x = cls.baseComicData(chapter_url,update=True)
book_name = x.get("bookName")
chapter_name = x.get("chapterName")
alias = x.get("alias")
description = x.get("description")
images = x.get("images")
chapter_api_path = x.get("chapterAPIPath")
comicInfo.setComicName(book_name)
comicInfo.setChapterName(chapter_name)
comicInfo.setDep(description)
if chapter_api_path != None:
chapter_api_path = str(chapter_api_path).encode('utf-8').decode('unicode_escape')
chapter_api_url = comicInfo.getBaseUrl(chapter_url)+chapter_api_path
ntfy.sendMsg(f"chapterApiUrl= {chapter_api_url}",alert=False)
data = htmlUtils.getJSON(chapter_api_url,update=True)
if data != None:
data = data.get("chapter")
(chapter_name,images) = [data.get("name"),data.get("images")]
if images == None:
ntfy.sendMsg(f"未获取到章节图像 comic_name={book_name} chapter={chapter_name}")
count = 1
list_img = []
list_file_name = []
for image in images:
image_src = image.get("src")
scramble = image.get("scramble")
count_image = "{:0>3d}".format(count)
list_img.append(image_src)
image_src_prefix = "."+str(image_src).split(".")[-1]
if scramble:
su = "."+str(image_src).split(".")[-1]
de_str = str(image_src).split("/")[-1].replace(su,"==")
blocks = imageUtils.encodeImage(de_str)
count_image = "scramble="+str(blocks)+"_"+count_image
list_file_name.append(count_image+image_src_prefix)
count+=1
#print("count_all_img=", count)
#netUtils.downloadComicChapterImages(list_img,scrambles=list_scramble)
comicInfo.comicChapterDownload(list_img,list_file_name)

17
main.py
View File

@ -1,20 +1,21 @@
import os,skip import os,skip
from utils.comic.PathStr import pathStr from common.Constant import pathStr
from entity.BaoZi import comicEntity as baoziEntity from domain.BaoZi import comicEntity as baoziEntity
from entity.RouMan import comicEntity as roumanEntity from domain.RouMan import comicEntity as roumanEntity
def rouman(): def rouman():
pathStr.setComicMainAndPath(pathStr.comic_rm)
skip.roumanskip() skip.roumanskip()
for x in range(0,52): for x in range(0,52):
roumanEntity.downladsComcis("https://rm01.xyz/books?&page="+str(x)) roumanEntity.start("https://rm01.xyz/books?&page="+str(x))
def baozi(): def baozi():
pathStr.setComicMainAndPath(pathStr.comic_bz) pathStr.setComicMainAndPath(pathStr.comic_bz)
baoziEntity.oneComic("https://baozimh.org/manga/biaoren-xinmanhua/") baoziEntity.oneComic("https://baozimh.org/manga/biaoren-xinmanhua/")
def proxy():
os.environ["http_proxy"] = "http://127.0.0.1:7890"
os.environ["https_proxy"] = "http://127.0.0.1:7890"
if __name__ == '__main__': if __name__ == '__main__':
rouman() rouman()
# baozi() # baozi()
# os.environ["http_proxy"] = "http://127.0.0.1:7890"
# os.environ["https_proxy"] = "http://127.0.0.1:7890"

14
skip.py
View File

@ -1,14 +1,14 @@
from utils.comic.ComicInfo import comicInfo from common.ComicInfo import ComicInfoUtils as ciUtils
def roumanskip(): def roumanskip():
# comicInfo.setComicNameSkips("虐美人 1-117話") # comicInfo.setComicNameSkips("虐美人 1-117話")
# comicInfo.setComicNameSkips("夢遊") # comicInfo.setComicNameSkips("夢遊")
# comicInfo.setComicNameSkips("療癒女孩") # comicInfo.setComicNameSkips("療癒女孩")
comicInfo.setComicNameSkips("深度交流會") ciUtils.setComicNameSkips("深度交流會")
comicInfo.setComicNameSkips("心機女教授") ciUtils.setComicNameSkips("心機女教授")
comicInfo.setComicNameSkips("天降惡魔 Devil Drop デビルドロップ") ciUtils.setComicNameSkips("天降惡魔 Devil Drop デビルドロップ")
comicInfo.setComicNameSkips("穿越異世界之後救了我的人是個少年殺人犯少年暗殺者×倒黴催的姐姐順水推舟在異世界做起了愛第2話.zip") ciUtils.setComicNameSkips("穿越異世界之後救了我的人是個少年殺人犯少年暗殺者×倒黴催的姐姐順水推舟在異世界做起了愛第2話.zip")
comicInfo.setComicNameSkips("幫人家畫嘛 第二季 Cartoonists-NSFW Season2") ciUtils.setComicNameSkips("幫人家畫嘛 第二季 Cartoonists-NSFW Season2")
# comicInfo.setComicNameSkips("霸道主管要我IN") # comicInfo.setComicNameSkips("霸道主管要我IN")
# comicInfo.setComicNameSkips("正妹小主管") # comicInfo.setComicNameSkips("正妹小主管")
comicInfo.setComicNameSkips("反烏托邦遊戲") ciUtils.setComicNameSkips("反烏托邦遊戲")

View File

@ -1,162 +0,0 @@
import json
import os,shutil,time
from datetime import datetime
from pathlib import Path
from zipfile import ZipFile
from utils.comic.ComicInfo import comicInfo
from utils.Ntfy import ntfy
from utils.FileUtils import fileUtils as fu
from utils.comic.PathStr import pathStr
from utils.OldUtils import OldUtils
class CBZUtils:
@classmethod
def getCBZ_Dir(cls): return comicInfo.getNewCBZComicChapter()
@classmethod
def getCBZ_Path(cls): return comicInfo.getNewFileCBZComicChapter()
@classmethod
def readDirsOrFiles(cls,dir,type):
data = []
files = os.listdir(dir)
for file in files:
path = os.path.join(dir,file)
if type == "files" and os.path.isfile(path):
data.append(path)
if type == "dirs" and os.path.isdir(path):
data.append(path)
return data
@classmethod
def zip_compression(cls,source_dir=None, target_file=None, remove=True):
target_dir = os.path.dirname(target_file)
if not os.path.exists(target_dir):
os.makedirs(target_dir)
if not os.path.exists(target_file) and source_dir != None:
with ZipFile(target_file, mode='w') as zf:
for path, dir_names, filenames in os.walk(source_dir):
path = Path(path)
arc_dir = path.relative_to(source_dir)
y = 0
for filename in filenames:
y = y + 1
print("打包中:" + str(y) + "/" + str(len(filenames)), os.path.join(source_dir, filename))
zf.write(path.joinpath(filename), arc_dir.joinpath(filename))
zf.close()
ntfy.sendMsg(f"打包完成:{target_file}")
cls.verCBZComic(target_file)
@classmethod
def packAutoComicChapterCBZ(cls):
chapter_path = comicInfo.getDirComicChapter()
if os.path.exists(chapter_path):
dirs = os.listdir(chapter_path)
for file in dirs:
if file.startswith("scramble="):
try:
os.remove(file)
except:
print(f"删除 {file} 发生错误,已跳过")
return False
cls.zip_compression(comicInfo.getDirComicChapter(), cls.getCBZ_Path())
time.sleep(0.1)
fu.remove(comicInfo.getDirComicChapter())
return True
@classmethod
def replaceZip(cls,filepath,unpack_dir=None):
if not cls.compareFileDate(filepath): return None
if unpack_dir == None:
unpack_dir = str(filepath).split(".")[0]
fz = ZipFile(filepath, 'r')
for file in fz.namelist():
if file.endswith(".jpg"):
data = fz.read(file)
if len(data) < 500 and os.path.exists(filepath):
os.remove(filepath)
print(f"数据不完整,已删除:{filepath}")
if cls.compareFileDate(filepath):
os.utime(filepath)
print(f"已更新文件时间 {filepath}")
if os.path.exists(unpack_dir):
shutil.rmtree(unpack_dir)
@classmethod
def compareFileDate(cls,filepath):
if os.path.exists(filepath):
ctime = os.path.getmtime(filepath)
str_ctime = datetime.fromtimestamp(int(ctime))
file_ctime = str(str_ctime.year)+"{:0>2d}".format(str_ctime.month)+"{:0>2d}".format(str_ctime.day)+"{:0>2d}".format(str_ctime.hour)
c_ctime = 2023011603
else:
return False
if int(file_ctime) < c_ctime:
return True
return False
@classmethod
def zip_info(cls,path,filter=True):
result = None
try:
with ZipFile(path, "r") as zip_file:
result = zip_file.namelist()
if filter:
filter_icon = comicInfo.COMIC_ICON_NAME+".jpg"
filter_info_xml = comicInfo.COMIC_INFO_XML
if filter_icon in result: result.remove(filter_icon)
if filter_info_xml in result: result.remove(filter_info_xml)
except Exception as e:
print(e)
return result
#CBZ检验是否完整
@classmethod
def verCBZComic(cls,path=None,list_img=None,min_size=300000):
#数据检验
if path == None: path = cls.getCBZ_Path()
#文件不存在 则返回
if fu.notExists(path): return False
if list_img == None: list_img = comicInfo.getChapterImgs()
if fu.exists(path) and len(cls.zip_info(path)) == len(list_img):
print(f"文件校验成功:{path}")
comicInfo.setProgress(comicInfo.PROGRESS_DONE)
return True
else:
try:
if len(cls.zip_info(path)) < len(list_img) or os.path.getsize(path) < min_size:
fu.remove(path)
comicInfo.setProgress(comicInfo.PROGRESS_NONE)
except Exception as e:
print(e)
return False
@classmethod
def updateOldCBZ(cls,filesname,result=False):
old_zipfile_path = os.path.join(pathStr.old_cbz_path,OldUtils.getOldComicName(),OldUtils.getOldChapter()+".CBZ")
#判断是否存在已下载CBZ文件
if fu.exists(old_zipfile_path) and fu.notExists(CBZUtils.getCBZ_Path()):
print(f"存在CBZ文件{old_zipfile_path},解压中...")
zip_file = ZipFile(old_zipfile_path)
#CBZ中文件数量剔除ComicInfo.xml
if len(filesname) == len(zip_file.namelist())-1:
unzip_path = comicInfo.getDirComicChapter()
zip_file.extractall(unzip_path)
zip_file.close()
print(f"解压完成: CBZ文件{old_zipfile_path}")
print("文件校验中...")
for file in os.listdir(unzip_path):
#检验图片损坏则删除
if file.endswith(".jpg") and not fu.ver_file(os.path.join(unzip_path,file),type="image"):
fu.remove(unzip_path)
return False
comicInfo.writeComicInfoXML(overlay=True)
result = True
return result
@classmethod
def nextCBZ(cls,list_img=None):
if list_img == None: list_img = comicInfo.getChapterImgs()
return not cls.verCBZComic(list_img=list_img)

View File

@ -1,6 +1,181 @@
from opencc import OpenCC import os,shutil,time,requests
from datetime import datetime
class fontUtils: from pathlib import Path
from zipfile import ZipFile
from common.ComicInfo import ComicInfoUtils as ciUtils
from common.ComicInfo import ComicInfo as ci
from common.ComicInfo import Comic
from utils.FileUtils import fileUtils as fu
from common.Constant import pathStr
from common.Constant import ComicPath
from utils.OldUtils import OldUtils
class ntfy:
@classmethod @classmethod
def ChineseConvert(cls, text,convert='t2s'): def sendMsg(cls, msg,alert=True,sleep=None,error=None):
return OpenCC(convert).convert(text) # convert from Simplified Chinese to Traditional Chinese try:
print(f"#ntfy: {msg}")
if alert:
requests.post("https://ntfy.caiwenxiu.cn/PyComic",
data=msg.encode(encoding='utf-8'))
except:
print(f"#ntfy error: {msg}")
if sleep != None:
time.sleep(int(sleep))
if error != None:
print(f"#ntfy Error: {error}")
return False
else:
return True
class CBZUtils:
@classmethod
def getCBZ_Dir(cls): return ComicPath.getNewCBZComicChapter()
@classmethod
def getCBZ_Path(cls): return ComicPath.getNewFileCBZComicChapter()
@classmethod
def readDirsOrFiles(cls,dir,type):
data = []
files = os.listdir(dir)
for file in files:
path = os.path.join(dir,file)
if type == "files" and os.path.isfile(path):
data.append(path)
if type == "dirs" and os.path.isdir(path):
data.append(path)
return data
@classmethod
def zip_compression(cls,source_dir=None, target_file=None, remove=True):
target_dir = os.path.dirname(target_file)
if not os.path.exists(target_dir):
os.makedirs(target_dir)
if not os.path.exists(target_file) and source_dir != None:
with ZipFile(target_file, mode='w') as zf:
for path, dir_names, filenames in os.walk(source_dir):
path = Path(path)
arc_dir = path.relative_to(source_dir)
y = 0
for filename in filenames:
y = y + 1
print("打包中:" + str(y) + "/" + str(len(filenames)), os.path.join(source_dir, filename))
zf.write(path.joinpath(filename), arc_dir.joinpath(filename))
zf.close()
ntfy.sendMsg(f"打包完成:{target_file}")
cls.verCBZComic(target_file)
@classmethod
def packAutoComicChapterCBZ(cls):
chapter_path = ComicPath.getDirComicChapter()
if os.path.exists(chapter_path):
dirs = os.listdir(chapter_path)
for file in dirs:
if file.startswith("scramble="):
try:
os.remove(file)
except:
print(f"删除 {file} 发生错误,已跳过")
return False
cls.zip_compression(ComicPath.getDirComicChapter(), cls.getCBZ_Path())
time.sleep(0.1)
fu.remove(ComicPath.getDirComicChapter())
return True
@classmethod
def replaceZip(cls,filepath,unpack_dir=None):
if not cls.compareFileDate(filepath): return None
if unpack_dir == None:
unpack_dir = str(filepath).split(".")[0]
fz = ZipFile(filepath, 'r')
for file in fz.namelist():
if file.endswith(".jpg"):
data = fz.read(file)
if len(data) < 500 and os.path.exists(filepath):
os.remove(filepath)
print(f"数据不完整,已删除:{filepath}")
if cls.compareFileDate(filepath):
os.utime(filepath)
print(f"已更新文件时间 {filepath}")
if os.path.exists(unpack_dir):
shutil.rmtree(unpack_dir)
@classmethod
def compareFileDate(cls,filepath):
if os.path.exists(filepath):
ctime = os.path.getmtime(filepath)
str_ctime = datetime.fromtimestamp(int(ctime))
file_ctime = str(str_ctime.year)+"{:0>2d}".format(str_ctime.month)+"{:0>2d}".format(str_ctime.day)+"{:0>2d}".format(str_ctime.hour)
c_ctime = 2023011603
else:
return False
if int(file_ctime) < c_ctime:
return True
return False
@classmethod
def zip_info(cls,path,filter=True):
result = None
try:
with ZipFile(path, "r") as zip_file:
result = zip_file.namelist()
if filter:
filter_icon = ci.COMIC_ICON_NAME+".jpg"
filter_info_xml = ci.COMIC_INFO_XML
if filter_icon in result: result.remove(filter_icon)
if filter_info_xml in result: result.remove(filter_info_xml)
except Exception as e:
print(e)
return result
#CBZ检验是否完整
@classmethod
def verCBZComic(cls,path=None,list_img=None,min_size=300000):
#数据检验
if path == None: path = cls.getCBZ_Path()
#文件不存在 则返回
if fu.notExists(path): return False
if list_img == None: list_img = Comic.getChapterImgs()
if fu.exists(path) and len(cls.zip_info(path)) == len(list_img):
print(f"文件校验成功:{path}")
ciUtils.setProgress(ciUtils.PROGRESS_DONE)
return True
else:
try:
if len(cls.zip_info(path)) < len(list_img) or os.path.getsize(path) < min_size:
fu.remove(path)
ciUtils.setProgress(ciUtils.PROGRESS_NONE)
except Exception as e:
print(e)
return False
@classmethod
def updateOldCBZ(cls,filesname,result=False):
old_zipfile_path = ComicPath.setJoinPathDir([OldUtils.getOldComicName(),OldUtils.getOldChapter()],
pathStr.old_cbz_path,prefix="CBZ")
#判断是否存在已下载CBZ文件
if fu.exists(old_zipfile_path) and fu.notExists(CBZUtils.getCBZ_Path()):
print(f"存在CBZ文件{old_zipfile_path},解压中...")
zip_file = ZipFile(old_zipfile_path)
#CBZ中文件数量剔除ComicInfo.xml
if len(filesname) == len(zip_file.namelist())-1:
unzip_path = ComicPath.getDirComicChapter()
zip_file.extractall(unzip_path)
zip_file.close()
print(f"解压完成: CBZ文件{old_zipfile_path}")
print("文件校验中...")
for file in os.listdir(unzip_path):
#检验图片损坏则删除
if file.endswith(".jpg") and not fu.ver_file(os.path.join(unzip_path,file),type="image"):
fu.remove(unzip_path)
return False
ci.writeComicInfoXML(overlay=True)
result = True
return result
@classmethod
def nextCBZ(cls,list_img=None):
if list_img == None: list_img = Comic.getChapterImgs()
return not cls.verCBZComic(list_img=list_img)

View File

@ -1,9 +1,8 @@
import base64,hashlib,os,shutil import base64,hashlib,os,shutil
import math,time import math,time,json
import numpy as np
from PIL import Image from PIL import Image
from tinydb import TinyDB, Query from tinydb import TinyDB, Query
from utils.comic.PathStr import pathStr from common.Constant import pathStr
class imageUtils: class imageUtils:
@ -273,7 +272,34 @@ class fileUtils:
except: except:
print(f"删除错误:{path}") print(f"删除错误:{path}")
return False return False
#文件保存
@classmethod
def file_save(cls,path,data,mode=None,print_msg=False):
result = {}
f = {}
dir_name = os.path.dirname(path)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
save_path = os.path.join(path)
if os.path.exists(save_path):
os.remove(save_path)
data = json.dumps(data)
if mode == None:
mode = "w+"
try:
f = open(save_path, mode, encoding="utf-8")
f.write(data)
f.close()
if print_msg:
print("data=",data)
result = path + "文件写入成功"
except:
result = path + "文件写入失败"
print(result)
return result
class dbUtils: class dbUtils:
@classmethod @classmethod
def base_path(cls,path): def base_path(cls,path):

View File

@ -1,123 +0,0 @@
from fake_useragent import UserAgent
import requests,os,json
from lxml import html
import traceback
import time,re
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
from utils.Ntfy import ntfy
from utils.comic.PathStr import pathStr
class htmlUtils:
headers = {'User-Agent': UserAgent().random}
url_data = {}
@classmethod
def getPathSaveHtml(cls,url,type=None):
rstr = r"[\/\\\:\*\?\"\<\>\|\.]" #  '/ \ : * ? " < > |'
try:
file_url = re.sub(rstr, "", url)
except:
file_url = "error_cache"
file_path = os.path.join(pathStr.base_html_cache(),file_url)
if type == "new":
return file_path
if os.path.exists(file_path):
if type == "read":
with open(file_path,"r",encoding="utf-8") as fs: return fs.read()
return file_path
else:
return None
@classmethod
def saveHtml(cls,url,data,type=None):
file_path = cls.getPathSaveHtml(url,type="new")
dir_name = os.path.dirname(file_path)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
with open(file_path,"w",encoding="utf-8") as fs:
if type== "json": data = json.dumps(data)
fs.write(str(data))
@classmethod
def remove_HtmlCache(cls,url):
file_path = cls.getPathSaveHtml(url,type="new")
if os.path.exists(file_path):
try:
os.remove(file_path)
print("已删除")
except:
print()
@classmethod
def getHTML(cls, curl,type=None,update=False):
url_text = None
if update: cls.remove_HtmlCache(curl)
retries = Retry(total=5, backoff_factor=0.1, status_forcelist=[ 500, 502, 503, 504 ])
s = requests.Session()
s.mount('http://', HTTPAdapter(max_retries=retries))
s.mount('https://', HTTPAdapter(max_retries=retries))
#数据为空则获取数据
try: url_text = cls.getPathSaveHtml(curl,"read")
except: url_text = None
if url_text != None and update == False: return html.fromstring(url_text)
else: url_text = None
repeat = 0
while url_text == None and repeat <=5:
try:
print(f"请求地址:{curl}")
res = s.get(curl,stream=True, headers=cls.headers, timeout=5,allow_redirects=True)
if type == "bytes":
url_text = res
if type == "json":
cls.saveHtml(curl,res.text,type="json")
return json.loads(res.text)
if type == None:
url_text = html.fromstring(res.text)
cls.saveHtml(curl,res.text)
except:
repeat += 1
ntfy.sendMsg(f"请求失败:{curl}",sleep=1)
return url_text
@classmethod
def getBytes(cls, url):
return cls.getHTML(url,type="bytes")
@classmethod
def getJSON(cls,url,update=False):
return cls.getHTML(url,type="json",update=update)
@classmethod
def xpathData(cls,c_xpath,url=None,num=None,not_eq=None,update=False):
if url == None:
url = cls.temp_url
else:
cls.temp_url = url
result = []
if update:
html_cache_path = cls.getPathSaveHtml(url,"new")
if os.path.exists(html_cache_path):
try:
os.remove(html_cache_path)
ntfy.sendMsg(f"html_cache更新成功 {html_cache_path}")
except:
ntfy.sendMsg(f"html_cache更新失败 {html_cache_path}")
#获取html实体数据
et = cls.getHTML(url)
if et == None:
return None
#比对数据
count = 1
xpaths = et.xpath(c_xpath)
for x in xpaths:
if x != not_eq:
result.append(x)
count +=1
if num != None:
try:
result = result[num]
except:
result = None
return result

278
utils/NetUtils.py Normal file
View File

@ -0,0 +1,278 @@
from __future__ import print_function
from queue import Queue
from fake_useragent import UserAgent
import shutil,imghdr,concurrent.futures
import requests,os,json,time,re
from lxml import html
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
from common.Constant import pathStr
from common.ComicInfo import ComicInfoUtils as ciUtils
from common.ComicInfo import ComicInfo as ci
from common.ComicInfo import Comic
from common.Constant import ComicPath
from utils.FileUtils import fileUtils as fu
class htmlUtils:
headers = {'User-Agent': UserAgent().random}
url_data = {}
#domain
@classmethod
def parseExec(cls,data,exec):
if data !=None and exec != None:
dots = str(exec).split(".")
if not isinstance(data,dict): data = json.loads(data)
for dot in dots:
data = data.get(dot)
return data
@classmethod
def getXpathData(cls,c_xpath,url=None,num=None,not_eq=None,update=False):
return htmlUtils.xpathData(c_xpath=c_xpath,url=url,num=num,not_eq=not_eq,update=update)
@classmethod
def setXpathData(cls,url,xpath,exec,num=None,result_type=None,type=None,start_add=None):
result = cls.parseExec(htmlUtils.xpathData(xpath,url=url,num=num),exec)
if result == None: return None
if result_type == "list" and type != None:
data = []
for x in range(0, len(result)):
if start_add != None:
data.append(start_add+result[x].get(type))
else:
data.append(result[x].get(type))
return data
return result
@classmethod
def getPathSaveHtml(cls,url,type=None):
rstr = r"[\/\\\:\*\?\"\<\>\|\.]" #  '/ \ : * ? " < > |'
try:
file_url = re.sub(rstr, "", url)
except:
file_url = "error_cache"
file_path = os.path.join(pathStr.base_html_cache(),file_url)
if type == "new":
return file_path
if os.path.exists(file_path):
if type == "read":
with open(file_path,"r",encoding="utf-8") as fs: return fs.read()
return file_path
else:
return None
@classmethod
def saveHtml(cls,url,data,type=None):
file_path = cls.getPathSaveHtml(url,type="new")
dir_name = os.path.dirname(file_path)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
with open(file_path,"w",encoding="utf-8") as fs:
if type== "json": data = json.dumps(data)
fs.write(str(data))
@classmethod
def remove_HtmlCache(cls,url):
file_path = cls.getPathSaveHtml(url,type="new")
if os.path.exists(file_path):
try:
os.remove(file_path)
print("已删除")
except:
print()
@classmethod
def getHTML(cls, curl,type=None,update=False):
url_text = None
if update: cls.remove_HtmlCache(curl)
retries = Retry(total=1, backoff_factor=0.5, status_forcelist=[ 500, 502, 503, 504 ])
s = requests.Session()
s.keep_alive = False
s.mount('http://', HTTPAdapter(max_retries=retries))
s.mount('https://', HTTPAdapter(max_retries=retries))
#数据为空则获取数据
try: url_text = cls.getPathSaveHtml(curl,"read")
except: url_text = None
if url_text != None and update == False: return html.fromstring(url_text)
else: url_text = None
repeat = 0
while url_text == None and repeat <=5:
try:
print(f"请求地址:{curl}")
res = s.get(curl,stream=True, headers=cls.headers, timeout=10,allow_redirects=True)
if type == "bytes":
url_text = res
if type == "json":
cls.saveHtml(curl,res.text,type="json")
return json.loads(res.text)
if type == None:
url_text = html.fromstring(res.text)
cls.saveHtml(curl,res.text)
res.close()
except Exception as e:
repeat += 1
print(f"请求失败Exception: {e} {curl}")
return url_text
@classmethod
def getBytes(cls, url):
return cls.getHTML(url,type="bytes")
@classmethod
def getJSON(cls,url,update=False):
return cls.getHTML(url,type="json",update=update)
@classmethod
def xpathData(cls,c_xpath,url=None,num=None,not_eq=None,update=False):
if url == None: url = cls.temp_url
else: cls.temp_url = url
result = []
if update:
html_cache_path = cls.getPathSaveHtml(url,"new")
if os.path.exists(html_cache_path):
try:
os.remove(html_cache_path)
print(f"html_cache更新成功 {html_cache_path}")
except:
print(f"html_cache更新失败 {html_cache_path}")
#获取html实体数据
et = cls.getHTML(url)
if et == None:
return None
#比对数据
count = 1
xpaths = et.xpath(c_xpath)
for x in xpaths:
if x != not_eq:
result.append(x)
count +=1
if num != None:
try:
result = result[num]
except:
result = None
return result
class downloadUtils:
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Proxy-Connection": "keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36",
"Accept-Encoding": "gzip, deflate, sdch",
# 'Connection': 'close',
}
down_queue = Queue()
@classmethod
def common_download(cls,file_name,image_url,dst_dir,timeout=10,proxy=None,proxy_type=None):
proxies = None
if proxy_type is not None:
proxies = {
"http": proxy_type + "://" + proxy,
"https": proxy_type + "://" + proxy }
response = None
file_path = os.path.join(dst_dir, file_name)
if os.path.exists(file_path):
print("download_image 文件已存在,已跳过=",file_path)
return None
temp_path = os.path.join(dst_dir, file_name+".downloads")
repair_count = 1
response = requests.get(
image_url, headers=cls.headers, timeout=timeout, proxies=proxies)
while response.status_code != 200 and repair_count <= 5:
time.sleep(0.7)
cls.download_image(image_url,dst_dir,file_name)
print(f'重试:第{repair_count}{image_url}')
repair_count += 1
with open(temp_path, 'wb') as f:
f.write(response.content)
response.close()
#验证是否是图像
if fu.ver_file(temp_path,type="image"):
shutil.move(temp_path, file_path)
print("## OK: {} {}".format(file_path, image_url))
else:
print("## Fail: {} {}".format(image_url, "图像损坏"))
cls.down_queue.put([file_name,image_url,dst_dir])
@classmethod
def download_image(cls,timeout=20, proxy_type=None, proxy=None,type="image"):
repeat = 0
while not cls.down_queue.empty() and repeat <= 10:
repeat += 1
data = cls.down_queue.get(False)
(file_name,image_url,dst_dir) = [data[0],data[1],data[2]]
if repeat > 1:
print(f"{repeat}次下载数据中... file_name={file_name}")
try:
cls.common_download(file_name,image_url,dst_dir)
except:
print(f"下载重试中 {file_name}={image_url}")
cls.down_queue.put([file_name,image_url,dst_dir])
@classmethod
def download_images(cls,image_urls, dst_dir,concurrency=None,timeout=20,proxy_type=None, proxy=None,files_name=None):
"""
Download image according to given urls and automatically rename them in order.
:param timeout:
:param proxy:
:param proxy_type:
:param image_urls: list of image urls
:param dst_dir: output the downloaded images to dst_dir
:param file_prefix: if set to "img", files will be in format "img_xxx.jpg"
:param concurrency: number of requests process simultaneously
:return: none
"""
if concurrency == None:
concurrency = len(image_urls)
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
future_list = list()
count = 0
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
for image_url in image_urls:
file_name = files_name[count]
cls.down_queue.put([file_name,image_url,dst_dir])
future_list.append(executor.submit(
cls.download_image,timeout, proxy_type, proxy))
count += 1
concurrent.futures.wait(future_list, timeout)
@classmethod
def download_comic_icon(cls,is_new=ciUtils.IS_NEW_ICON):
icon_url = Comic.getIcon()
if icon_url == None:
print("icon 不存在,已跳过")
return None
save_name = ci.COMIC_ICON_NAME
icon_prefix = "."+str(icon_url).split(".")[-1]
icon_prefix = icon_prefix.split("?")[0]
#判断漫画名路径是否已存在comicname/cover.jpg, 存在跳过
path_comic_icon = os.path.join(ComicPath.getDirConfComic(),save_name+icon_prefix)
if not ciUtils.equIcon() and fu.exists(path_comic_icon):
os.remove(path_comic_icon)
if fu.notExists(path_comic_icon):
cls.download_images([icon_url],ComicPath.getDirConfComic(),files_name=[save_name+icon_prefix])
#if not os.path.exists(path_cbz_comic):
# os.makedirs(path_cbz_comic)
save_path = os.path.join(ComicPath.getDirCBZComic(),Comic.getChapterName()+icon_prefix)
if is_new:
#历史版本ICON
if os.path.exists(save_path):
os.remove(save_path)
if os.path.exists(path_comic_icon):
base_dir = ComicPath.getDirComicChapter()
if not os.path.exists(base_dir): os.makedirs(base_dir)
shutil.copy(path_comic_icon,os.path.join(base_dir,save_name+icon_prefix))
else:
if fu.notExists(ComicPath.getDirCBZComic()): os.makedirs(ComicPath.getDirCBZComic())
shutil.copy(path_comic_icon,save_path)
print(f"{path_comic_icon} 已复制至: {save_path}")
#保存icon信息
ciUtils.iconDB()
ciUtils.nextDownloadToCBZChapter()
ciUtils.setProgress(ciUtils.PROGRESS_CBZ)

View File

@ -1,19 +0,0 @@
import requests,time
class ntfy:
@classmethod
def sendMsg(cls, msg,alert=True,sleep=None,error=None):
try:
print(f"#ntfy: {msg}")
if alert:
requests.post("https://ntfy.caiwenxiu.cn/PyComic",
data=msg.encode(encoding='utf-8'))
except:
print(f"#ntfy error: {msg}")
if sleep != None:
time.sleep(int(sleep))
if error != None:
print(f"#ntfy Error: {error}")
return False
else:
return True

View File

@ -1,494 +0,0 @@
from xml.dom.minidom import Document
import os,re
from utils.comic.PathStr import pathStr
import json,shutil
from utils.FileUtils import dbUtils
from utils.ComicUtils import fontUtils
from utils.OldUtils import OldUtils
class comicInfo():
COMIC_ICON_NAME = "000"
COMIC_INFO_XML = "ComicInfo.xml"
PROGRESS_INFO = "info"
PROGRESS_DOWN = "download"
PROGRESS_IMG = "download"
PROGRESS_CBZ = "cbz"
PROGRESS_DONE = "done"
PROGRESS_NONE = "none"
IS_NEW_ICON = False
document = Document()
path_comic_info = None
root = "ComicInfo"
chapter = "Title"
comic_name = "Series"
number = "Number"
dep = "Summary"
author = "Writer"
genre = "Genre"
cbs = "Publisher"
lang = "LanguageISO"
comic_names = "SeriesGroup"
tags = "Tags"
date_year = "Year"
date_month = "Month"
date_day = "Day"
page_count = "PageCount"
pages = "Pages"
web = "Web"
age_rating = "AgeRating"
str_comic_name = None
str_chapter = None
str_number = None
str_icon = None
str_homepage = None
str_listchapter = None
str_chapter_imgs = None
str_update_at = None
str_date_year = None
str_date_month = None
str_date_day = None
str_page_count = None
str_web = None
str_list_img = None
str_files_img = None
str_chapter_index= None
str_value1 = None
str_current_chapter_img = None
list_skip = []
chapter_node = None
comic_name_node = None
number_node = None
dep_node = None
author_node = None
genre_node = None
cbs_node = None
lang_node = None
comic_names_node = None
tags_node = None
date_year_node = None
date_month_node = None
date_day_node = None
page_count_node = None
pages_node = None
web_node = None
age_rating_node = None
@classmethod
def setNodeAndValue(cls,node,value):
if value != None:
c_node = cls.document.createElement(node)
node_text = cls.document.createTextNode(fontUtils.ChineseConvert(str(value).replace("\n","")))
c_node.appendChild(node_text)
return c_node
return None
@classmethod
def root_node(cls): return cls.document.createElement(cls.root)
@classmethod
def setChapterName(cls,value):
cls.str_chapter = fontUtils.ChineseConvert(cls.fixFileName(value))
OldUtils.setOldChapter(cls.fixFileName(value))
cls.chapter_node = cls.setNodeAndValue(cls.chapter,value)
@classmethod
def setComicValue(cls,value):
result = None
if value != None or value != "": result = value
return result
@classmethod
def setListChapter(cls, value): cls.str_list_chapter = cls.setComicValue(value)
@classmethod
def setChapterImgs(cls, value): cls.str_chapter_imgs = cls.setComicValue(value)
@classmethod
def setCurrentChapterImg(cls, value): cls.str_current_chapter_img = cls.setComicValue(value)
@classmethod
def getCurrentChapterImg(cls): return cls.str_current_chapter_img
@classmethod
def getChapterImgs(cls): return cls.str_chapter_imgs
@classmethod
def getLenChapters(cls): return len(cls.str_list_chapter)
@classmethod
def setComicName(cls,value):
cls.str_comic_name = fontUtils.ChineseConvert(cls.fixFileName(value))
OldUtils.setOldComicName(cls.fixFileName(value))
cls.comic_name_node = cls.setNodeAndValue(cls.comic_name, value)
@classmethod
def setComicNames(cls,value):
#去重
value = ",".join(set(str(fontUtils.ChineseConvert(value)).split(",")))
cls.comic_names_node = cls.setNodeAndValue(cls.comic_names,value)
@classmethod
def setNumber(cls,value):
cls.str_number = value
cls.number_node = cls.setNodeAndValue(cls.number, value)
@classmethod
def getNumber(cls): return cls.str_number
@classmethod
def setDep(cls,value): cls.dep_node = cls.setNodeAndValue(cls.dep, value)
@classmethod
def setAuthor(cls,value): cls.author_node = cls.setNodeAndValue(cls.author,cls.getListToString(value))
@classmethod
def setLang(cls,value): cls.lang_node = cls.setNodeAndValue(cls.lang, value)
@classmethod
def setAgeRating(cls,value): cls.age_rating_node = cls.setNodeAndValue(cls.age_rating, value)
@classmethod
def setGenre(cls,value): cls.genre_node = cls.setNodeAndValue(cls.genre, cls.getListToString(value))
@classmethod
def setTags(cls,value): cls.tags_node = cls.setNodeAndValue(cls.tags,cls.getListToString(value))
@classmethod
def setCBS(cls,value): cls.cbs_node = cls.setNodeAndValue(cls.cbs,value)
@classmethod
def setWeb(cls,value):
cls.str_web = cls.setComicValue(value)
cls.web_node = cls.setNodeAndValue(cls.web,cls.setComicValue(value))
@classmethod
def setChapterListImg(cls,value): cls.str_list_img=cls.setComicValue(value)
@classmethod
def setValue1(cls,value): cls.str_value1 = value
@classmethod
def getValue1(cls): return cls.str_value1
@classmethod
def getChapterListImg(cls): return cls.str_list_img
@classmethod
def setChapterFilesName(cls,value): cls.str_files_img=cls.setComicValue(value)
@classmethod
def getChapterFilesName(cls): return cls.str_files_img
@classmethod
def getWeb(cls): return cls.str_web
@classmethod
def setPageCount(cls,value):
cls.str_page_count = cls.setComicValue(int(value))
cls.page_count_node = cls.setNodeAndValue(cls.page_count,cls.str_page_count)
@classmethod
def setPages(cls,value):
if value != None:
su = "."+str(value[0]).split(".")[-1]
join_list=",".join(value).replace(su,"")
value = join_list.split(",")
cls.setPageCount(len(value)+1 if cls.IS_NEW_ICON else len(value))
root_node = cls.document.createElement(cls.pages)
if cls.IS_NEW_ICON:
#添加封面
icon_node = cls.document.createElement("Page")
icon_node.setAttribute("Image","000")
icon_node.setAttribute("Type","FrontCover")
root_node.appendChild(icon_node)
for page in value:
c_node = cls.document.createElement("Page")
page = page.split("_")[-1]
c_node.setAttribute("Image",page)
root_node.appendChild(c_node)
cls.pages_node = root_node
@classmethod
def setDate(cls,value,split):
values = str(value).split(split)
cls.str_date_year = values[0]
cls.str_date_month = values[1]
cls.str_date_day = values[2]
cls.date_year_node = cls.setNodeAndValue(cls.date_year,values[0])
cls.date_month_node = cls.setNodeAndValue(cls.date_month,values[1])
cls.date_day_node = cls.setNodeAndValue(cls.date_day,values[2])
@classmethod
def setIcon(cls,value):
cls.str_icon = cls.setComicValue(value)
return cls.str_icon
@classmethod
def setHomePage(cls, value): cls.str_homepage = cls.setComicValue(value)
@classmethod
def getHomePage(cls): return cls.str_homepage
@classmethod
def setUpdateAt(cls, value): cls.str_update_at = cls.setComicValue(value)
@classmethod
def getUpdateAt(cls): return cls.str_update_at
@classmethod
def getListToString(cls,to_list):
value = to_list
if isinstance(to_list,list):
value = ",".join(to_list)
return value
@classmethod
def setChapterIndex(cls,value):
cls.setNumber(value)
cls.str_chapter_index = cls.setComicValue(value)
@classmethod
def getChapterIndex(cls): return cls.str_chapter_index
@classmethod
def setComicNameSkips(cls,value): return cls.list_skip.append(value)
@classmethod
def getIsComicNameSkips(cls,value): return value in ",".join(cls.list_skip)
@classmethod
def getBaseUrl(cls,url=None):
if url == None:
url = cls.str_homepage
num = 3
index = 0
for x in range(0, num):
index = str(url).find("/",index)+1
return url[0:index-1]
@classmethod
def getIcon(cls): return cls.str_icon
@classmethod
def getComicName(cls): return cls.str_comic_name
@classmethod
def getChapter(cls): return cls.str_chapter
@classmethod
def fixFileName(cls,filename,replace=None):
intab = r'[?*/\|.:><]'
str_replace = ""
if replace != None:
str_replace = replace
filename = re.sub(intab, str_replace, filename)
count = 1
while True:
str_file = filename[0-count]
if str_file == " ":
count += 1
else:
filename = filename[0:len(filename)+1-count]
break
return filename
@classmethod
def getDirConfComic(cls):
if cls.str_comic_name != None:
return os.path.join(pathStr.base_conf_path(), cls.str_comic_name)
else:
print("comicName不存在退出中")
exit()
@classmethod
def getDirCBZComic(cls):
if cls.str_comic_name != None:
return os.path.join(pathStr.base_cbz(), cls.str_comic_name)
else:
print("comicName不存在退出中 getDirCBZComic")
exit()
@classmethod
def getDirCBZComicChapter(cls):
if cls.str_comic_name != None and cls.str_chapter != None:
return os.path.join(pathStr.base_cbz(),cls.str_comic_name,cls.str_chapter)
else:
print("comicName不存在退出中 getDirCBZComicChapter")
exit()
@classmethod
def getSortDirCBZComicChapter(cls):
if cls.str_comic_name != None and cls.str_chapter != None and cls.str_chapter_index != None:
return os.path.join(pathStr.base_cbz(),cls.str_comic_name,str(cls.str_chapter_index)+" "+cls.str_chapter)
else:
print("comicName不存在退出中 getSortDirCBZComicChapter")
return None
@classmethod
def getNewCBZComicChapter(cls,type="dir"): return cls.getNewToComicChapter(".CBZ", type)
@classmethod
def getNewIconComicChapter(cls,type="dir"): return cls.getNewToComicChapter(".jpg", type)
@classmethod
def getNewFileCBZComicChapter(cls,type="file"): return cls.getNewToComicChapter(".CBZ", type)
@classmethod
def getNewFileIconComicChapter(cls,type="file"): return cls.getNewToComicChapter(".jpg", type)
@classmethod
def getNewToComicChapter(cls,su,type="dir"):
c_dir = cls.getDirCBZComicChapter()
s_dir = cls.getSortDirCBZComicChapter()
c_path = cls.getDirCBZComicChapter()+su
s_path = cls.getSortDirCBZComicChapter()+su
if os.path.exists(s_path) and s_path != None:
shutil.move(s_path, c_path)
print("文件已移动至:", c_path)
if type == "file":
return c_path
return c_dir
@classmethod
def getDirComic(cls):
if cls.str_comic_name != None:
return os.path.join(pathStr.base_comic_img(), cls.str_comic_name)
else:
print("comicName不存在退出中")
exit()
@classmethod
def getDirComicChapter(cls):
if cls.str_comic_name != None and cls.str_chapter != None:
return os.path.join(pathStr.base_comic_img(),cls.str_comic_name,cls.str_chapter)
else:
print("comicName与chapter 不存在,退出中")
exit()
@classmethod
def getPathComicInfoXML(cls):
try:
cls.path_comic_info = os.path.join(pathStr.base_comic_img(),cls.str_comic_name,cls.str_chapter, cls.COMIC_INFO_XML)
except:
return None
return cls.path_comic_info
@classmethod
def writeComicInfoXML(cls,chapter=None,path=None,overlay=False):
root = cls.root_node()
new_document = Document()
new_document.appendChild(root)
if cls.chapter_node != None: root.appendChild(cls.chapter_node)
if cls.comic_name_node != None: root.appendChild(cls.comic_name_node)
if cls.number_node != None: root.appendChild(cls.number_node)
if cls.dep_node != None: root.appendChild(cls.dep_node)
if cls.author_node != None: root.appendChild(cls.author_node)
if cls.genre_node != None: root.appendChild(cls.genre_node)
if cls.cbs_node != None: root.appendChild(cls.cbs_node)
if cls.lang_node != None: root.appendChild(cls.lang_node)
if cls.age_rating_node != None: root.appendChild(cls.age_rating_node)
if cls.comic_names_node != None: root.appendChild(cls.comic_names_node)
if cls.tags_node != None: root.appendChild(cls.tags_node)
if cls.date_year_node != None: root.appendChild(cls.date_year_node)
if cls.date_month_node != None: root.appendChild(cls.date_month_node)
if cls.date_day_node != None: root.appendChild(cls.date_day_node)
if cls.page_count_node != None: root.appendChild(cls.page_count_node)
if cls.pages_node != None: root.appendChild(cls.pages_node)
cls.getPathComicInfoXML()
if path != None: cls.path_comic_info = os.path.join(path,cls.COMIC_INFO_XML)
base_dir = os.path.dirname(cls.path_comic_info)
if not os.path.exists(base_dir): os.makedirs(base_dir)
if os.path.exists(cls.path_comic_info) and not overlay:
print(f"{cls.COMIC_INFO_XML} 已存在")
return None
with open(cls.path_comic_info , "w", encoding="utf-8") as fo:
new_document.writexml(fo, indent='', addindent='\t', newl='\n', encoding="utf-8")
fo.close()
print(f"{cls.COMIC_INFO_XML} 已生成 pathd=", cls.path_comic_info)
#文件保存
@classmethod
def file_save(cls,path,data,mode=None,print_msg=False):
result = {}
f = {}
dir_name = os.path.dirname(path)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
save_path = os.path.join(path)
if os.path.exists(save_path):
os.remove(save_path)
data = json.dumps(data)
if mode == None:
mode = "w+"
try:
f = open(save_path, mode, encoding="utf-8")
f.write(data)
f.close()
if print_msg:
print("data=",data)
result = path + "文件写入成功"
except:
result = path + "文件写入失败"
print(result)
return result
@classmethod
def nextSavePath(cls,next,data=None):
save_path = cls.getDirConfComic()+"/"+next+cls.str_chapter
if data != None:
cls.file_save(save_path, data)
return save_path
@classmethod
def nextSaveInfoChapter(cls,chapter,data=None):
if data == None: data = cls.getChapterImgs()
if cls.str_chapter != chapter:
print(f"chapter {cls.str_chapter}{chapter} 不一致,已自动跳过")
cls.setProgress(cls.PROGRESS_INFO)
cls.nextSavePath("info_",data)
@classmethod
def nextInfoToImgChapter(cls): cls.setProgress(cls.PROGRESS_IMG)
@classmethod
def nextImgToDownloadChapter(cls): cls.setProgress(cls.PROGRESS_DOWN)
@classmethod
def nextDownloadToCBZChapter(cls): cls.setProgress(cls.PROGRESS_CBZ)
@classmethod
def nextCBZToDoneChapter(cls): cls.setProgress(cls.PROGRESS_DONE)
@classmethod
def nextDoneSave(cls,data): cls.nextSavePath("done_",data)
@classmethod
def setProgress(cls,progress):
dbUtils.setComic(cls.str_chapter,progress,cls.str_comic_name)
@classmethod
def isProgress(cls,progress,remove=None):
if remove: cls.setProgress("None")
return dbUtils.query(cls.str_chapter,progress,cls.str_comic_name)
@classmethod
def iconDB(cls): dbUtils.setComic(cls.str_comic_name,cls.str_icon,"icons")
@classmethod
def equIcon(cls): return dbUtils.query(cls.str_comic_name,cls.str_icon,"icons")
@classmethod
def setConfDirComicPath(cls,file_name,comic_name=None):
if comic_name != None: cls.setComicName(comic_name)
return os.path.join(cls.getDirConfComic(),file_name)
@classmethod
def saveConfComicData(cls,file_name,data,comic_name=None): cls.file_save(cls.setConfDirComicPath(file_name,comic_name), data)
@classmethod
def getPathInitConfComicData(cls,file_name,comic_name=None): return cls.setConfDirComicPath(file_name,comic_name)
@classmethod
def updateComicDate(cls,date=None):
update_at = cls.getUpdateAt()
if date != None:
update_at = date
dbUtils.setComic(cls.str_comic_name, update_at, "update")
@classmethod
def isUpdateComic(cls):
return dbUtils.query(cls.str_comic_name, cls.str_update_at,"update")
@classmethod
def comicChapterDownload(cls,imgs,names):
cls.setChapterImgs(imgs)
cls.setChapterListImg(imgs)
cls.setPages(names)
cls.setChapterFilesName(names)
@classmethod
def setComicInfo(cls,comicname=None,homepage=None,alias=None,author=None,icon=None,tags=None,
dep=None,genre=None,lang=None,age_rating=None,chapters=None,update_at=None,current_chapter_img=None):
author = str(author).replace("&",",").replace(" ",",")
cls.setHomePage(homepage)
cls.setComicName(str(comicname))
if alias != None: comicInfo.setComicNames(alias)
cls.setAuthor(author)
cls.setIcon(icon)
cls.setTags(tags)
cls.setDep(dep)
#comicInfo.setCBS("韩漫")
if genre != None: cls.setGenre(genre)
cls.setLang(lang)
cls.setAgeRating(age_rating)
cls.setListChapter(chapters)
cls.setUpdateAt(update_at)
cls.setCurrentChapterImg(current_chapter_img)

View File

@ -1,66 +0,0 @@
import os,datetime
from time import strftime
class pathStr:
comic_name = None
comic_jm="JM"
comic_bz="BZ"
comic_rm="RM"
comic_url_main = None
base_comic_out = os.path.join("/mnt", "Comics")
old_cbz_path = os.path.join("/mnt","OldComics")
@classmethod
def base_cbz(cls): return cls.getBaseComicPath("CBZ")
@classmethod
def base_comic_img(cls): return cls.getBaseComicPath("outputComic")
@classmethod
def base_conf_path(cls): return cls.getBaseComicPath(".conf")
@classmethod
def base_html_cache(cls): return cls.getBaseComicPath("html_cache")
@classmethod
def base_html_chapter(cls): return cls.getBaseComicPath("html_updated")
@classmethod
def base_comic_update(cls): return cls.getBaseComicPath("comic_update")
@classmethod
def base_db(cls): return cls.getBaseComicPath("db")
@classmethod
def getBaseComicPath(cls,join_path): return os.path.join(cls.base_comic_out,join_path)
@classmethod
def setComicMainAndPath(cls,value):
cls.setComicMain(value)
cls.setComicMainPath(value)
@classmethod
def setComicMain(cls,value): cls.comic_name = value
@classmethod
def getComicMain(cls): return cls.comic_name
@classmethod
def setComicMainPath(cls,value):
#if value != cls.comic_rm: cls.base_comic_out = os.path.join(cls.base_comic_out, value)
cls.base_comic_out = os.path.join(cls.base_comic_out, value)
@classmethod
def base_html_week(cls):
date_path = cls.getDatePath()
return os.path.join(cls.base_comic_out,"html_"+str(date_path))
@classmethod
def getDatePath(cls):
date = datetime.datetime.now()
year = int(date.strftime("%Y"))
month = int(date.strftime("%m"))
day = int(date.strftime("%d"))
week = cls.get_week_of_month(year, month, day)
return f"{year}{month}{week}"
@classmethod
def get_week_of_month(cls, year, month, day):
begin = int(datetime.date(year, month, 1).strftime("%W"))
end = int(datetime.date(year, month, day).strftime("%W"))
week = "{:0>2d}".format(end - begin + 1)
return week

View File

@ -1,174 +0,0 @@
""" Download image according to given urls and automatically rename them in order. """
# -*- coding: utf-8 -*-
# author: Yabin Zheng
# Email: sczhengyabin@hotmail.com
from __future__ import print_function
from queue import Queue
import shutil
import imghdr
import os
import concurrent.futures
import requests
import time
from utils.Ntfy import ntfy
from utils.comic.ComicInfo import comicInfo
from utils.HtmlUtils import htmlUtils
from utils.FileUtils import fileUtils as fu
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Proxy-Connection": "keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36",
"Accept-Encoding": "gzip, deflate, sdch",
# 'Connection': 'close',
}
down_queue = Queue()
def common_download(file_name,image_url,dst_dir,timeout=10,proxy=None,proxy_type=None):
proxies = None
if proxy_type is not None:
proxies = {
"http": proxy_type + "://" + proxy,
"https": proxy_type + "://" + proxy }
response = None
file_path = os.path.join(dst_dir, file_name)
if os.path.exists(file_path):
print("download_image 文件已存在,已跳过=",file_path)
return None
temp_path = os.path.join(dst_dir, file_name+".downloads")
repair_count = 1
response = requests.get(
image_url, headers=headers, timeout=timeout, proxies=proxies)
while response.status_code != 200 and repair_count <= 5:
time.sleep(0.7)
download_image(image_url,dst_dir,file_name)
ntfy.sendMsg(f'重试:第{repair_count}{image_url}')
repair_count += 1
with open(temp_path, 'wb') as f:
f.write(response.content)
response.close()
#验证是否是图像
if fu.ver_file(temp_path,type="image"):
shutil.move(temp_path, file_path)
print("## OK: {} {}".format(file_path, image_url))
else:
print("## Fail: {} {}".format(image_url, "图像损坏"))
down_queue.put([file_name,image_url,dst_dir])
def download_image(timeout=20, proxy_type=None, proxy=None,type="image"):
repeat = 0
while not down_queue.empty() and repeat <= 10:
repeat += 1
data = down_queue.get(False)
(file_name,image_url,dst_dir) = [data[0],data[1],data[2]]
if repeat > 1:
ntfy.sendMsg(f"{repeat}次下载数据中... file_name={file_name}")
try:
common_download(file_name,image_url,dst_dir)
except:
ntfy.sendMsg(f"下载重试中 {file_name}={image_url}")
down_queue.put([file_name,image_url,dst_dir])
def download_images(image_urls, dst_dir,concurrency=None,timeout=20,proxy_type=None, proxy=None,files_name=None):
"""
Download image according to given urls and automatically rename them in order.
:param timeout:
:param proxy:
:param proxy_type:
:param image_urls: list of image urls
:param dst_dir: output the downloaded images to dst_dir
:param file_prefix: if set to "img", files will be in format "img_xxx.jpg"
:param concurrency: number of requests process simultaneously
:return: none
"""
if concurrency == None:
concurrency = len(image_urls)
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
future_list = list()
count = 0
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
for image_url in image_urls:
file_name = files_name[count]
down_queue.put([file_name,image_url,dst_dir])
future_list.append(executor.submit(
download_image,timeout, proxy_type, proxy))
count += 1
concurrent.futures.wait(future_list, timeout)
def download_comic_icon(is_new=comicInfo.IS_NEW_ICON):
icon_url = comicInfo.getIcon()
if icon_url == None:
print("icon 不存在,已跳过")
return None
save_name = comicInfo.COMIC_ICON_NAME
icon_prefix = "."+str(icon_url).split(".")[-1]
icon_prefix = icon_prefix.split("?")[0]
#判断漫画名路径是否已存在comicname/cover.jpg, 存在跳过
path_comic_icon = os.path.join(comicInfo.getDirConfComic(),save_name+icon_prefix)
if not comicInfo.equIcon() and fu.exists(path_comic_icon):
os.remove(path_comic_icon)
if fu.notExists(path_comic_icon):
download(icon_url, path_comic_icon)
#if not os.path.exists(path_cbz_comic):
# os.makedirs(path_cbz_comic)
save_path = os.path.join(comicInfo.getDirCBZComic(),comicInfo.getChapter()+icon_prefix)
if is_new:
#历史版本ICON
if os.path.exists(save_path):
os.remove(save_path)
if os.path.exists(path_comic_icon):
base_dir = comicInfo.getDirComicChapter()
if not os.path.exists(base_dir): os.makedirs(base_dir)
shutil.copy(path_comic_icon,os.path.join(base_dir,save_name+icon_prefix))
else:
if fu.notExists(comicInfo.getDirCBZComic()): os.makedirs(comicInfo.getDirCBZComic())
shutil.copy(path_comic_icon,save_path)
print(f"{path_comic_icon} 已复制至: {save_path}")
#保存icon信息
comicInfo.iconDB()
comicInfo.nextDownloadToCBZChapter()
comicInfo.setProgress(comicInfo.PROGRESS_CBZ)
# 定义下载函数
def download(url,path,file_type=None):
if os.path.exists(path):
if imghdr.what(path):
msg = "已存在同路径文件,已跳过:"+path
print(msg)
return msg
else:
print("文件已损坏,已重试:"+path)
path = os.path.join(os.path.dirname(path),str(os.path.basename(path)).split("?")[0])
tmp_file = path+".downloads"
if os.path.exists(tmp_file):
os.remove(tmp_file)
print("存在缓存文件,已删除:",tmp_file)
repair_count = 1
res = htmlUtils.getBytes(url)
while res.status_code != 200 and repair_count <= 5:
res = htmlUtils.getBytes(url)
print(f'重试:第{repair_count}{url}')
repair_count += 1
#判断是否为图片
if file_type == "image":
if 'image' not in res.headers.get("content-type",""):
print(f"url= {url} Error: URL doesnot appear to be an image")
basedir= os.path.dirname(path)
if not os.path.exists(basedir):
os.makedirs(basedir)
#expected_length = res.headers.get('Content-Length')
#actual_length = res.raw.tell()
with open(tmp_file, 'wb') as f:
for ch in res:
f.write(ch)
f.close()
shutil.move(tmp_file, path)
print(f"url={url} 保存至:{path}")
return path