diff --git a/.gitignore b/.gitignore index 2d73077..a24a5ca 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ COMICOUT/ .conf/ -**/__pycache__/** \ No newline at end of file +**/__pycache__/** +.vscode \ No newline at end of file diff --git a/utils/base/BaseComicEntity.py b/common/BaseComicEntity.py similarity index 53% rename from utils/base/BaseComicEntity.py rename to common/BaseComicEntity.py index de747ce..6ee6ca0 100644 --- a/utils/base/BaseComicEntity.py +++ b/common/BaseComicEntity.py @@ -1,16 +1,16 @@ import json,os,time,random,shutil -from utils.HtmlUtils import htmlUtils +from utils.NetUtils import htmlUtils from utils.FileUtils import imageUtils -from utils.comic.ComicInfo import comicInfo as ci -from utils.CBZUtils import CBZUtils -from utils.downloader import download_images -from utils.downloader import download_comic_icon -from utils.Ntfy import ntfy -from entity.down.RouMan import comicCommon as RouManComicCommon -from entity.down.JM import comicCommon as JMComicCommon -from entity.down.BaoZi import comicCommon as BaoZiComicCommon -from utils.comic.PathStr import pathStr +from utils.ComicUtils import CBZUtils +from utils.NetUtils import downloadUtils +from utils.ComicUtils import ntfy from utils.FileUtils import fileUtils as fu +from domain.Domains import domains +from common.ComicInfo import ComicInfoUtils as ciUtils +from common.ComicInfo import ComicInfo as ci +from common.Comic import Comic +from common.Comic import ListComic +from common.Constant import ComicPath class baseComic: count_chapter = 0 @@ -18,28 +18,37 @@ class baseComic: #校验该漫画是否为最新 # Y/跳过 N/下载 返回下载链接 @classmethod - def updateComics(cls): - (book_name,comic_href,updated) = [ci.getComicName(),ci.getCurrentChapterImg(),ci.getUpdateAt()] + def updateComics(cls,chapters_xpath): + comics = ListComic.getListComicsLinksUpdateAt() + try: + (book_name,comic_href,updated) = [comics[0],comics[1],comics[2]] + except: + return False + cls.updateOneComic(book_name,comic_href,updated,chapters_xpath) + return True + + @classmethod + def updateOneComic(cls,book_name,comic_href,update_at,chapters_xpath): #白名单跳过 - if ci.getIsComicNameSkips(book_name): return None - if not ci.isUpdateComic(): + if ciUtils.getIsComicNameSkips(book_name): return None + if not ciUtils.isUpdateComic(): ntfy.sendMsg(f"开始下载 漫画:{book_name}") - return comic_href + Comic.setCurrentDownLink(comic_href) else: ntfy.sendMsg(f"{book_name} 已是最新") - chapters = htmlUtils.xpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()',url=comic_href,update=False) + chapters = htmlUtils.xpathData(chapters_xpath,url=comic_href,update=False) chapter_index = 1 for chapter in chapters: - ci.setChapterIndex(chapter_index) - ci.setChapterName(chapter) - cbz_path = ci.getNewCBZComicChapter("file") - icon_path = ci.getNewIconComicChapter("file") + Comic.setNumber(chapter_index) + Comic.setChapter(chapter) + cbz_path = ComicPath.getNewCBZComicChapter("file") + icon_path = ComicPath.getNewIconComicChapter("file") CBZUtils.replaceZip(cbz_path) #判断漫画是否完成 - if ci.isProgress(ci.PROGRESS_DONE) and not os.path.exists(cbz_path): ci.isProgress(ci.PROGRESS_DONE,remove=True) + if ciUtils.isProgress(ciUtils.PROGRESS_DONE) and not os.path.exists(cbz_path): ciUtils.isProgress(ciUtils.PROGRESS_DONE,remove=True) if not os.path.exists(cbz_path): - ci.updateComicDate("0") - return comic_href + ciUtils.updateLastDate("0") + Comic.setCurrentDownLink(comic_href) chapter_index = chapter_index + 1 return None @@ -48,17 +57,21 @@ class baseComic: ci.setComicInfo(homepage=url,comicname=title,author=author,icon=icon,tags=tags,dep=dep,genre=genre,lang=lang,age_rating=age_rating,chapters=chapters) cls.count_chapter = 0 for href in chapter_href: - ci.setChapterName(chapters[cls.count_chapter]) - ci.setChapterIndex(cls.count_chapter+1) + Comic.setChapterName(chapters[cls.count_chapter]) + Comic.setNumber(cls.count_chapter+1) #存在完成配置文件 但文件不存在 将清空完成配置文件 - if ci.isProgress(ci.PROGRESS_DONE) and not fu.exists(ci.getNewCBZComicChapter("file")): ci.isProgress(ci.PROGRESS_DONE,remove=True) + if ciUtils.isProgress(ciUtils.PROGRESS_DONE) and not fu.exists(ComicPath.getNewCBZComicChapter("file")): + ciUtils.isProgress(ciUtils.PROGRESS_DONE,remove=True) #不存在完成配置文件 则允许下载 - if not ci.isProgress(ci.PROGRESS_DONE): cls.comicChapters(href,scramble=True,sleep=random.randint(1,5)) + if not ciUtils.isProgress(ciUtils.PROGRESS_DONE): + cls.comicChapters(href,scramble=True,sleep=random.randint(1,5)) cls.count_chapter += 1 #一本漫画下载后等待 #清空文件夹 - if os.path.exists(ci.getDirComic()): shutil.rmtree(ci.getDirComic()) + if os.path.exists(ComicPath.getDirComic()): shutil.rmtree(ComicPath.getDirComic()) if sleep != None: time.sleep(sleep) + #完成 更新最近一次时间 + ciUtils.updateLastDate() ''' @@ -66,16 +79,17 @@ class baseComic: ''' @classmethod def comicChapters(cls,chapter_url,scramble=None,sleep=None): + is_next = False #try: cls.Onechapter(chapter_url,scramble) #进入下个阶段 #章节图片全部下载后,调用下载封面 - if ci.isProgress(ci.PROGRESS_DOWN): download_comic_icon() + if ciUtils.isProgress(ciUtils.PROGRESS_DOWN): downloadUtils.download_comic_icon() #下个阶段 - if ci.isProgress(ci.PROGRESS_CBZ): is_next = CBZUtils.packAutoComicChapterCBZ() + if ciUtils.isProgress(ciUtils.PROGRESS_CBZ): is_next = CBZUtils.packAutoComicChapterCBZ() #except Exception as e: is_next = ntfy.sendMsg(f"{ci.getComicName()} 下载出错了",error=e) - ntfy.sendMsg(f"预计总章节大小:{cls.count_chapter + 1} / "+ str(ci.getLenChapters())) - ci.setChapterIndex(cls.count_chapter + 1) + ntfy.sendMsg(f"预计总章节大小:{cls.count_chapter + 1} / "+ str(Comic.getLenChapters())) + Comic.setNumber(cls.count_chapter + 1) if sleep != None and is_next: ntfy.sendMsg(f"{sleep} 秒后开始下载下一个章节",sleep=sleep) #根据章节地址下载全部图片,并将文件名scramble开头的图片进行解密 @@ -85,49 +99,46 @@ class baseComic: if not str(chapter_url).startswith("http"): chapter_url = ci.getBaseUrl() + chapter_url #下载图片 is_next = cls.comicChapterDownload(chapter_url) - ci.nextInfoToImgChapter() + ciUtils.nextInfoToImgChapter() #下载完成后, 开始解密图片 - chapter_dir = ci.getDirComicChapter() + chapter_dir = ComicPath.getDirComicChapter() if scramble and os.path.exists(chapter_dir): #获取章节图片路径 dirs = os.listdir(chapter_dir) for img in dirs: if img.startswith("scramble="): imageUtils.encode_scramble_image(os.path.join(chapter_dir,img)) #进入下一阶段 - ci.nextImgToDownloadChapter() + ciUtils.nextImgToDownloadChapter() return is_next @classmethod def comicChapterDownload(cls,url,is_next=True): #获取本次工程的HOME目录 - comic_main = pathStr.getComicMain() try: - if comic_main == pathStr.comic_jm: JMComicCommon.comicChapterDownload(url) - if comic_main == pathStr.comic_bz: BaoZiComicCommon.comicChapterDownload(url) - if comic_main == pathStr.comic_rm: RouManComicCommon.comicChapterDownload(url) + domains.setdomain(url) except: htmlUtils.remove_HtmlCache(url) cls.comicChapterDownload(url,is_next) - if comic_main == None: print("comic_main为空,退出中...") & exit() - (list_img,files_name,chapter_name,book_name) = [ci.getChapterListImg(),ci.getChapterFilesName(),ci.getChapter(),ci.getComicName()] + (list_img,files_name,chapter_name,book_name) = [Comic.chapter_imgs,Comic.file_chapter_imgs, + Comic.chapter,Comic.comic_name] #保存信息 - ci.nextSaveInfoChapter(chapter_name,list_img) + ciUtils.nextSaveInfoChapter(chapter_name,list_img) #验证数据是已存在且是否完整 - cbz_file = ci.getNewFileCBZComicChapter() + cbz_file = ComicPath.getNewFileCBZComicChapter() #更新Icon - ci.getNewIconComicChapter() + ComicPath.getNewIconComicChapter() #检验CBZ文件 CBZUtils.verCBZComic(cbz_file) is_next = CBZUtils.nextCBZ() is_old=CBZUtils.updateOldCBZ(files_name) #不存在ComicInfo.xml则生成 if is_next and fu.notExists(ci.getPathComicInfoXML()): ci.writeComicInfoXML(chapter_name) - if is_next and not is_old: + if is_next: # ntfy.sendMsg(f"{book_name} {chapter_name} 下载中") - download_images(list_img,ci.getDirComicChapter(), files_name=files_name,concurrency=None,timeout=8) + downloadUtils.download_images(list_img,ComicPath.getDirComicChapter(),files_name=files_name,concurrency=None,timeout=8) # ntfy.sendMsg("等待数据检验中...",sleep=0.5) - is_next = fu.equImages(ci.getDirComicChapter(),list_img) + is_next = fu.equImages(ComicPath.getDirComicChapter(),list_img) # if not is_next: ntfy.sendMsg(msg=f"下载数据(不完整,{int(repeat*2)}秒钟后尝试第{repeat}次",sleep=int(repeat*2)) # repeat += 1 return is_next \ No newline at end of file diff --git a/common/Comic.py b/common/Comic.py new file mode 100644 index 0000000..daa2f92 --- /dev/null +++ b/common/Comic.py @@ -0,0 +1,206 @@ +import json,re +from opencc import OpenCC +from queue import Queue +from utils.OldUtils import OldUtils + +class Comic: + #章节名 漫画名 编号 概述 作者 + (chapter,comic_name,number,dep,author) = [None,None,None,None,None] + #流派 语言 年龄分级 标签 总页数 + (genre,language,agerating,tags,page_count) = [None,None,None,None,None] + #页数 出版社 年 月 日 + (pages,cbs,year,month,day) = [None,None,None,None,None] + #主页 别名 + (homepage,comic_names) = [None,None] + CURRENT_DOWN_LINK = None + + #繁体中文转简体中文 + @classmethod + def ChineseConvert(cls, text,convert='t2s'): return OpenCC(convert).convert(str(text)) + #处理成符合规定的文件名 + @classmethod + def fixFileName(cls,filename,replace=None): + if not isinstance(filename,str): return filename + intab = r'[?*/\|.:><]' + str_replace = "" + if replace != None: str_replace = replace + filename = re.sub(intab, str_replace, filename) + count = 1 + while True: + str_file = filename[0-count] + if str_file == " ": count += 1 + else: + filename = filename[0:len(filename)+1-count] + break + return filename + + @classmethod + def setValue(cls,value): + if value != None: value = cls.ChineseConvert(value) + return value + @classmethod + def getValue(cls,dict,exec=None): + if exec != None: return cls.parseExec(dict,exec=exec) + return dict + @classmethod + def getChapter(cls): return cls.chapter + @classmethod + def getComicName(cls): return cls.comic_name + @classmethod + def getNumber(cls): return cls.number + #章节名 + @classmethod + def setChapterName(cls,value,exec=None): + value = cls.fixFileName(cls.parseExec(value,exec=exec)) + OldUtils.setOldChapter(value) + cls.chapter = cls.setValue(value) + @classmethod + def getChapterName(cls): return cls.getValue(cls.chapter) + + #漫画名 + @classmethod + def setComicName(cls,value,exec=None): + value = cls.fixFileName(cls.parseExec(value,exec=exec)) + OldUtils.setOldComicName(value) + cls.comic_name = cls.setValue(value) + @classmethod + def getComicName(cls): return cls.getValue(cls.comic_name) + #编号 + @classmethod + def setNumber(cls,value): cls.number = cls.setValue(value) + @classmethod + def getNumber(cls): return cls.getValue(cls.number) + #概述 + @classmethod + def setDep(cls,value,exec=None): + cls.dep = cls.setValue(cls.parseExec(value,exec=exec)) + @classmethod + def getDep(cls): return cls.getValue(cls.dep) + #作者 + @classmethod + def setAuthor(cls,value): cls.author = cls.setValue(value) + @classmethod + def getAuthor(cls): return cls.getValue(cls.author) + #流派 + @classmethod + def setGenre(cls,value): cls.genre = cls.setValue(value) + @classmethod + def getGenre(cls): return cls.getValue(Comic.genre) + #语言 + @classmethod + def setLanguage(cls,value): cls.language = cls.setValue(value) + @classmethod + def getLanguage(cls): return cls.getValue(Comic.language) + #年龄分级 + @classmethod + def setAgeRating(cls,value): cls.agerating = cls.setValue(value) + @classmethod + def getAgeRating(cls): return cls.getValue(Comic.agerating) + #标签 + @classmethod + def setTags(cls,value): cls.tags = cls.setValue(value) + @classmethod + def getTags(cls): return cls.getValue(Comic.tags) + #总页数 + @classmethod + def setPageCount(cls,value): cls.page_count = cls.setValue(value) + @classmethod + def getPageCount(cls): return cls.getValue(Comic.page_count) + #主页 + (homepage,icon,list_chapter,chapter_imgs, + update_at,current_chapter_img,file_chapter_imgs) = [None,None,None,None,None,None,None] + + @classmethod + def parseExec(cls,data,exec,item=True): + if data !=None and exec != None: + dots = str(exec).split(".") + if not isinstance(data,dict): data = json.loads(data) + for dot in dots: + data = data.get(dot) + return data + @classmethod + def setHomePage(cls,value): cls.homepage = value + @classmethod + def getHomePage(cls): return cls.homepage + @classmethod + def setIcon(cls,value): cls.icon = value + @classmethod + def getIcon(cls): return cls.icon + @classmethod + def setListChapter(cls,value): cls.list_chapter = value + @classmethod + def getListChapter(cls): return cls.list_chapter + @classmethod + def getLenChapters(cls): return len(cls.list_chapter) + @classmethod + def setChapterImgs(cls,value,exec=None,item=None): cls.chapter_imgs = cls.parseExec(value,exec=exec,item=item) + @classmethod + def getChapterImgs(cls): return cls.chapter_imgs + @classmethod + def setUpdateAt(cls,value): cls.update_at = value + @classmethod + def getUpdateAt(cls): return cls.update_at + @classmethod + def setCurrentChapterImg(cls,value): cls.current_chapter_img = value + @classmethod + def getCurrentChapterImg(cls): return cls.current_chapter_img + @classmethod + def setChapterFilesName(cls,value): cls.file_chapter_imgs= value + @classmethod + def getChapterFilesName(cls): return cls.file_chapter_imgs + @classmethod + def setCurrentDownLink(cls,value): cls.CURRENT_DOWN_LINK = value + @classmethod + def getCurrentDownLink(cls): return cls.CURRENT_DOWN_LINK + +class ListComic: + LIST_COMIC_QUEUE = Queue() + (LIST_COMIC_NAME,LIST_COMIC_LINK,LIST_COMIC_UPDATEAT) = [None,None,None] + + @classmethod + def setListComicsLinksUpdateAt(cls,names,links,update_at): + if isinstance(names,list) and isinstance(links,list) and isinstance(update_at,list): + for x in range(0,len(names)): + cls.LIST_COMIC_QUEUE.put([names[x],links[x],update_at[x]]) + @classmethod + def getListComicsLinksUpdateAt(cls): + if cls.LIST_COMIC_NAME != None and cls.LIST_COMIC_LINK != None: + cls.setListComicsLinksUpdateAt(cls.LIST_COMIC_NAME,cls.LIST_COMIC_LINK,cls.LIST_COMIC_UPDATEAT) + (cls.LIST_COMIC_NAME,cls.LIST_COMIC_LINK,cls.LIST_COMIC_UPDATEAT) = [None,None,None] + return cls.LIST_COMIC_QUEUE.get(False) + + @classmethod + def addListComicChapterLink(cls,name,link,update_at): + if name != None and link != None: + cls.LIST_COMIC_QUEUE.put(name,link,update_at) + + @classmethod + def getListValue(cls,result,type,start_add=None,result_type="list"): + if result == None: return None + if type == None: return result + if result_type == "list" and type != None: + data = [] + for x in range(0, len(result)): + if start_add != None: + data.append(start_add+result[x].get(type)) + else: + data.append(result[x].get(type)) + return data + return result + + @classmethod + def setListComicName(cls,value,type=None): cls.LIST_COMIC_NAME = cls.getListValue(value,type) + @classmethod + def getListComicName(cls): return cls.LIST_COMIC_NAME + @classmethod + def setListComicChapterLink(cls,value,type=None,start_add=None): cls.LIST_COMIC_LINK = cls.getListValue(value,type,start_add) + @classmethod + def getListComicChapterLink(cls): return cls.LIST_COMIC_LINK + @classmethod + def setListComicUpdateAt(cls,value,type=None): cls.LIST_COMIC_UPDATEAT = cls.getListValue(value,type) + @classmethod + def getListComicUpdateAt(cls): return cls.LIST_COMIC_UPDATEAT + @classmethod + def getListComicChapterLink(cls): return cls.LIST_COMIC_QUEUE.get(False) + + #domain end.... \ No newline at end of file diff --git a/common/ComicInfo.py b/common/ComicInfo.py new file mode 100644 index 0000000..3b544b1 --- /dev/null +++ b/common/ComicInfo.py @@ -0,0 +1,254 @@ +import json,os +from xml.dom.minidom import Document +from common.Constant import pathStr +from utils.FileUtils import dbUtils +from utils.FileUtils import fileUtils +from common.Comic import Comic +from common.Constant import ComicPath + +class ComicInfoEntity: + #章节名 value node child + @classmethod + def getNodes(cls): + #web [Comic.homepage,"Web"] + nodes = [] + #章节名 + nodes.append([Comic.chapter,"Title"]) + #漫画名 + nodes.append([Comic.comic_name,"Series"]) + #编号 + nodes.append([Comic.number,"Number"]) + #别名 + nodes.append([Comic.comic_names,"SeriesGroup"]) + #概述 + nodes.append([Comic.dep,"Summary"]) + #年 + nodes.append([Comic.year,"Year"]) + #月 + nodes.append([Comic.month,"Month"]) + #日 + nodes.append([Comic.day,"Day"]) + #作者 + nodes.append([Comic.author,"Writer"]) + #出版社 + nodes.append([Comic.cbs,"Publisher"]) + #流派 + nodes.append([Comic.genre,"Genre"]) + #标签 + nodes.append([Comic.tags,"Tags"]) + #主页 + #nodes.append([Comic.homepage,"Web"]) + #总页数 + nodes.append([Comic.page_count,"PageCount"]) + #语言 + nodes.append([Comic.language,"LanguageISO"]) + #年龄分级 + nodes.append([Comic.agerating,"AgeRating"]) + #页码 + nodes.append([Comic.pages,"Pages"]) + return nodes + +class ComicInfo: + COMIC_ICON_NAME = "000" + COMIC_INFO_XML = "ComicInfo.xml" + IS_NEW_ICON = False + document = Document() + path_comic_info = None + + @classmethod + def parseExec(cls,data,exec,start_add=None,item=True): + if data !=None and exec != None: + dots = str(exec).split(".") + if not isinstance(data,dict): data = json.loads(data) + for dot in dots: + data = data.get(dot) + if start_add != None and data != None: + data = start_add+data + return data + + @classmethod + def setNodeAndValue(cls,node,value): + if value != None: + if isinstance(value,str): + c_node = cls.document.createElement(node) + child_node = cls.document.createTextNode(value) + c_node.appendChild(child_node) + return c_node + else: return value + return None + + #页数 + @classmethod + def setPages(cls,values): + if values != None and isinstance(values,list): + suffix = "."+str(values[0]).split(".")[-1] + join_list=",".join(values).replace(suffix,"") + values = join_list.split(",") + Comic.setPageCount(len(values)+1 if cls.IS_NEW_ICON else len(values)) + root_node = cls.document.createElement("Pages") + if cls.IS_NEW_ICON: + #添加封面 + icon_node = cls.document.createElement("Page") + icon_node.setAttribute("Image",cls.COMIC_ICON_NAME) + icon_node.setAttribute("Type","FrontCover") + root_node.appendChild(icon_node) + for page in values: + c_node = cls.document.createElement("Page") + page = page.split("_")[-1] + c_node.setAttribute("Image",page) + root_node.appendChild(c_node) + Comic.pages = root_node + + @classmethod + def getBaseUrl(cls,url=None): + if url == None: + url = Comic.homepage + (num,index) = [3,0] + for x in range(0, num): + index = str(url).find("/",index)+1 + return url[0:index-1] + + @classmethod + def getPathComicInfoXML(cls): + try: + cls.path_comic_info = os.path.join(pathStr.base_comic_img(), + Comic.comic_name,Comic.chapter, cls.COMIC_INFO_XML) + except: + return None + return cls.path_comic_info + + #XML根文档 + @classmethod + def root_node(cls,root_value): return cls.document.createElement(root_value) + + @classmethod + def add_nodes(cls,root,list_value): + if len(list_value) == 0: return list_value + for value in list_value: + #Comic.chapter + if value[0] != None: root.appendChild(cls.setNodeAndValue(value[1],value[0])) + + @classmethod + def writeComicInfoXML(cls,chapter=None,path=None,overlay=False): + root = cls.root_node("ComicInfo") + new_document = Document() + new_document.appendChild(root) + cls.add_nodes(root,ComicInfoEntity.getNodes()) + cls.getPathComicInfoXML() + if path != None: cls.path_comic_info = os.path.join(path,cls.COMIC_INFO_XML) + base_dir = os.path.dirname(cls.path_comic_info) + if not os.path.exists(base_dir): os.makedirs(base_dir) + if os.path.exists(cls.path_comic_info) and not overlay: + print(f"{cls.COMIC_INFO_XML} 已存在") + return None + with open(cls.path_comic_info , "w", encoding="utf-8") as fo: + new_document.writexml(fo, indent='', addindent='\t', newl='\n', encoding="utf-8") + fo.close() + print(f"{cls.COMIC_INFO_XML} 已生成 pathd=", cls.path_comic_info) + + @classmethod + def setComicInfo(cls,comicname=None,homepage=None,alias=None,author=None,icon=None,tags=None, + dep=None,genre=None,lang=None,age_rating=None,chapters=None,update_at=None,current_chapter_img=None): + author = ",".join(set(str(str(author).replace("&",",").replace(" ",",")).split(","))) + Comic.setHomePage(homepage) + Comic.setIcon(icon) + Comic.setListChapter(chapters) + Comic.setUpdateAt(update_at) + Comic.setComicName(str(comicname)) + #if alias != None: comicInfo.setComicNames(alias) + Comic.setAuthor(author) + Comic.setTags(tags) + Comic.setDep(dep) + #comicInfo.setCBS("韩漫") + if genre != None: Comic.setGenre(genre) + Comic.setLanguage(lang) + Comic.setAgeRating(age_rating) + Comic.setCurrentChapterImg(current_chapter_img) + + +class ComicInfoUtils: + PROGRESS_INFO = "info" + PROGRESS_DOWN = "download" + PROGRESS_IMG = "download" + PROGRESS_CBZ = "cbz" + PROGRESS_DONE = "done" + PROGRESS_NONE = "none" + IS_NEW_ICON = False + list_skip = [] + + + @classmethod + def getListToString(cls,to_list): + value = to_list + if isinstance(to_list,list): + value = ",".join(to_list) + return value + @classmethod + def setComicNameSkips(cls,value): return cls.list_skip.append(value) + @classmethod + def getIsComicNameSkips(cls,value): return value in ",".join(cls.list_skip) + @classmethod + def nextSavePath(cls,next,data=None): + save_path = ComicPath.getDirConfComic()+"/"+next+Comic.getChapterName() + if data != None: fileUtils.file_save(save_path, data) + return save_path + + @classmethod + def nextSaveInfoChapter(cls,chapter,data=None): + if data == None: data = Comic.getChapterImgs() + if Comic.getChapterName() != chapter: + print(f"chapter {Comic.getChapterName()} 与 {chapter} 不一致,已自动跳过") + cls.setProgress(cls.PROGRESS_INFO) + cls.nextSavePath("info_",data) + + @classmethod + def nextInfoToImgChapter(cls): cls.setProgress(cls.PROGRESS_IMG) + @classmethod + def nextImgToDownloadChapter(cls): cls.setProgress(cls.PROGRESS_DOWN) + @classmethod + def nextDownloadToCBZChapter(cls): cls.setProgress(cls.PROGRESS_CBZ) + @classmethod + def nextCBZToDoneChapter(cls): cls.setProgress(cls.PROGRESS_DONE) + @classmethod + def nextDoneSave(cls,data): cls.nextSavePath("done_",data) + @classmethod + def setProgress(cls,progress): + dbUtils.setComic(Comic.getChapterName(),progress,Comic.getComicName()) + + @classmethod + def isProgress(cls,progress,remove=None): + if remove: cls.setProgress("None") + return dbUtils.query(Comic.getChapter(),progress,Comic.getComicName()) + + @classmethod + def iconDB(cls): dbUtils.setComic(Comic.getComicName(),Comic.getIcon(),"icons") + + @classmethod + def equIcon(cls): return dbUtils.query(Comic.getComicName(),Comic.getIcon(),"icons") + + @classmethod + def setConfDirComicPath(cls,file_name,comic_name=None): + if comic_name != None: Comic.setComicName(comic_name) + return os.path.join(ComicPath.getDirConfComic(),file_name) + + @classmethod + def saveConfComicData(cls,file_name,data,comic_name=None): fileUtils.file_save(cls.setConfDirComicPath(file_name,comic_name), data) + @classmethod + def getPathInitConfComicData(cls,file_name,comic_name=None): return cls.setConfDirComicPath(file_name,comic_name) + + @classmethod + def updateLastDate(cls,date=None): + update_at = Comic.getUpdateAt() + if date != None: update_at = date + dbUtils.setComic(Comic.getComicName(), update_at, "update") + + @classmethod + def isUpdateComic(cls): + return dbUtils.query(Comic.getComicName(), Comic.getUpdateAt(),"update") + + @classmethod + def comicChapterDownload(cls,imgs,names): + Comic.setChapterImgs(imgs) + #Comic.setChapterListImg(imgs) + ComicInfo.setPages(names) + Comic.setChapterFilesName(names) \ No newline at end of file diff --git a/common/Constant.py b/common/Constant.py new file mode 100644 index 0000000..98c1c64 --- /dev/null +++ b/common/Constant.py @@ -0,0 +1,133 @@ +import os,datetime,shutil +from time import strftime +from common.Comic import Comic + +class pathStr: + comic_name = None + comic_jm="JM" + comic_bz="BZ" + comic_rm="RM" + + comic_url_main = None + base_comic_out = os.path.join("/mnt", "Comics") + old_cbz_path = os.path.join("/mnt","OldComics") + @classmethod + def base_cbz(cls): return cls.getBaseComicPath("CBZ") + @classmethod + def base_comic_img(cls): return cls.getBaseComicPath("outputComic") + @classmethod + def base_conf_path(cls): return cls.getBaseComicPath(".conf") + @classmethod + def base_html_cache(cls): return cls.getBaseComicPath("html_cache") + @classmethod + def base_html_chapter(cls): return cls.getBaseComicPath("html_updated") + @classmethod + def base_comic_update(cls): return cls.getBaseComicPath("comic_update") + @classmethod + def base_db(cls): return cls.getBaseComicPath("db") + @classmethod + def getBaseUrl(cls,url=None): + if url == None: + url = Comic.homepage + num = 3 + index = 0 + for x in range(0, num): + index = str(url).find("/",index)+1 + return url[0:index-1] + @classmethod + def getBaseComicPath(cls,join_path): return os.path.join(cls.base_comic_out,join_path) + + @classmethod + def setComicMainAndPath(cls,value): + cls.setComicMain(value) + cls.setComicMainPath(value) + + @classmethod + def setComicMain(cls,value): cls.comic_name = value + + @classmethod + def getComicMain(cls): return cls.comic_name + + @classmethod + def setComicMainPath(cls,value): + #if value != cls.comic_rm: cls.base_comic_out = os.path.join(cls.base_comic_out, value) + cls.base_comic_out = os.path.join(cls.base_comic_out, value) + + @classmethod + def base_html_week(cls): + date_path = cls.getDatePath() + return os.path.join(cls.base_comic_out,"html_"+str(date_path)) + + @classmethod + def getDatePath(cls): + date = datetime.datetime.now() + year = int(date.strftime("%Y")) + month = int(date.strftime("%m")) + day = int(date.strftime("%d")) + week = cls.get_week_of_month(year, month, day) + return f"{year}{month}{week}" + + @classmethod + def get_week_of_month(cls, year, month, day): + begin = int(datetime.date(year, month, 1).strftime("%W")) + end = int(datetime.date(year, month, day).strftime("%W")) + week = "{:0>2d}".format(end - begin + 1) + return week + +class ComicPath: + #顶级路径 + @classmethod + def setJoinPathDir(cls,path,dir="",prefix=None): + result = dir + if isinstance(path,dict) or isinstance(path,list): + for x in path: + result = os.path.join(result,x) + else: result = os.path.join(result,path) + if prefix != None: result += "."+prefix + return result + + @classmethod + def setDirConf(cls,path,prefix=None): return cls.setJoinPathDir(path,pathStr.base_conf_path(),prefix=prefix) + @classmethod + def setDirCBZ(cls,path,prefix=None): return cls.setJoinPathDir(path,pathStr.base_cbz(),prefix=prefix) + @classmethod + def setDirImg(cls,path,prefix=None): return cls.setJoinPathDir(path,pathStr.base_comic_img(),prefix=prefix) + #漫画配置文件路径 + @classmethod + def getDirConfComic(cls): return cls.setDirConf(Comic.comic_name) + #漫画CBZ路径 + @classmethod + def getDirCBZComic(cls): return cls.setDirCBZ(Comic.comic_name) + #漫画章节CBZ路径 + @classmethod + def getDirCBZComicChapter(cls): return cls.setDirCBZ([Comic.comic_name,Comic.chapter]) + #排序 + @classmethod + def getSortDirCBZComicChapter(cls): return cls.setDirCBZ([Comic.comic_name],str(Comic.number)+" "+Comic.chapter) + @classmethod + def getNewCBZComicChapter(cls,type="dir"): return cls.getNewToComicChapter(".CBZ", type) + @classmethod + def getNewIconComicChapter(cls,type="dir"): return cls.getNewToComicChapter(".jpg", type) + @classmethod + def getNewFileCBZComicChapter(cls,type="file"): return cls.getNewToComicChapter(".CBZ", type) + @classmethod + def getNewFileIconComicChapter(cls,type="file"): return cls.getNewToComicChapter(".jpg", type) + + @classmethod + def getNewToComicChapter(cls,su,type="dir"): + c_dir = cls.getDirCBZComicChapter() + s_dir = cls.getSortDirCBZComicChapter() + c_path = cls.getDirCBZComicChapter()+su + s_path = cls.getSortDirCBZComicChapter()+su + if os.path.exists(s_path) and s_path != None: + shutil.move(s_path, c_path) + print("文件已移动至:", c_path) + if type == "file": + return c_path + return c_dir + + @classmethod + def getDirComic(cls): return cls.setDirImg(Comic.comic_name) + + @classmethod + def getDirComicChapter(cls): return cls.setJoinPathDir(Comic.chapter,cls.getDirComic()) \ No newline at end of file diff --git a/entity/BaoZi.py b/domain/BaoZi.py similarity index 90% rename from entity/BaoZi.py rename to domain/BaoZi.py index dbfcc80..6233ec6 100644 --- a/entity/BaoZi.py +++ b/domain/BaoZi.py @@ -1,8 +1,7 @@ import json -from utils.HtmlUtils import htmlUtils -from utils.comic.ComicInfo import comicInfo -from utils.downloader import download_images -from utils.base.BaseComicEntity import baseComic +from utils.NetUtils import htmlUtils +from common.ComicInfo import ComicInfoUtils as ciUtils +from common.BaseComicEntity import baseComic class comicEntity: @classmethod @@ -19,7 +18,7 @@ class comicEntity: x = cls.baseComicData(url) books = x.get("books") len_books = len(books) - base_url = comicInfo.getBaseUrl(url) + base_url = ciUtils.getBaseUrl(url) for x in range(0, len_books): book = books[x] book_id = book.get("id") @@ -28,7 +27,7 @@ class comicEntity: comic_href = base_url+"/books/"+book_id href = baseComic.downladsComcis(book_name=book_name,comic_href=comic_href,updated=updated) cls.oneComic(href) - comicInfo.updateComicDate() + ciUtils.updateComicDate() @classmethod def oneComic(cls,c_url,sleep=None): diff --git a/domain/Domains.py b/domain/Domains.py new file mode 100644 index 0000000..9e8bbc9 --- /dev/null +++ b/domain/Domains.py @@ -0,0 +1,11 @@ +from domain.down.Baozi import DomainDown as baozi +from domain.down.RouMan import DomainDown as rouman +from common.Constant import pathStr + +class domains: + @classmethod + def setdomain(cls,url): + comic_main = pathStr.getComicMain() + if comic_main == pathStr.comic_bz: baozi.comicChapterDownload(url) + if comic_main == pathStr.comic_rm: rouman.comicChapterDownload(url) + if comic_main == None: print("comic_main为空,退出中...") & exit() diff --git a/domain/RouMan.py b/domain/RouMan.py new file mode 100644 index 0000000..7ce93fd --- /dev/null +++ b/domain/RouMan.py @@ -0,0 +1,47 @@ +from common.Constant import pathStr +from common.Comic import ListComic +from common.Comic import Comic +from common.BaseComicEntity import baseComic +from utils.NetUtils import htmlUtils + +class comicEntity: + @classmethod + def downladsComcis(cls,url): + str_xpath='//script[@id="__NEXT_DATA__"]/text()' + str_exec="props.pageProps.books" + books = htmlUtils.setXpathData(url,xpath=str_xpath,num=0,exec=str_exec) + #comic_names = htmlUtils.setXpathData(url,xpath=str_xpath,num=0,exec=str_exec,result_type="list",type="name") + #chapter_links = htmlUtils.setXpathData(url,xpath=str_xpath,num=0,exec=str_exec,result_type="list",type="id" + # ,start_add=pathStr.getBaseUrl(url)+"/books/") + #update_at= htmlUtils.setXpathData(url,xpath=str_xpath,num=0,exec=str_exec,result_type="list",type="updateAt") + #ciUtils.setListComicsLinksUpdateAt(comic_names,chapter_links,update_at) + ListComic.setListComicName(books,"name") + ListComic.setListComicChapterLink(books,"id",start_add=pathStr.getBaseUrl(url)+"/books/") + ListComic.setListComicUpdateAt(books,"updateAt") + return baseComic.updateComics(chapters_xpath='//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()') + + @classmethod + def oneComic(cls,sleep=None): + c_url = Comic.getCurrentDownLink() + if c_url == None: return None + title = htmlUtils.getXpathData('//div[@class="col"]/h5/text()',url=c_url,num=0,update=True) + #别名 + #alias = htmlUtils.xpathData('//span[contains(@class,"bookid_alias")]/text()',num=1) + icon = htmlUtils.getXpathData('//img[@class="img-thumbnail"]/@src',num=0) + author = htmlUtils.getXpathData('//div[contains(@class,"bookid_bookInfo")]/p[1]/text()',num=1) + tags = htmlUtils.getXpathData('//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()',num=0) + action = htmlUtils.getXpathData('//div[contains(@class,"bookid_bookInfo")]/p[2]/text()',num=1) + dep = htmlUtils.getXpathData('//div[contains(@class,"bookid_bookInfo")]/p[4]/text()',num=1) + update_date = htmlUtils.getXpathData('//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()',num=1) + chapters = htmlUtils.getXpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()') + chapter_href = htmlUtils.getXpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/@href') + + baseComic.oneComic(url=c_url,title=title,author=author, + icon=icon,tags=tags,dep=dep,chapters=chapters,chapter_href=chapter_href, + genre="韩漫",age_rating="R18+") + + @classmethod + def start(cls,url): + pathStr.setComicMainAndPath(pathStr.comic_rm) + cls.downladsComcis(url) + cls.oneComic() \ No newline at end of file diff --git a/entity/down/BaoZi.py b/domain/down/Baozi.py similarity index 63% rename from entity/down/BaoZi.py rename to domain/down/Baozi.py index f94593e..2dc093b 100644 --- a/entity/down/BaoZi.py +++ b/domain/down/Baozi.py @@ -1,12 +1,9 @@ -import hashlib -import json -import os -from utils.HtmlUtils import htmlUtils -from utils.FileUtils import imageUtils -from utils.comic.ComicInfo import comicInfo -from utils.Ntfy import ntfy +from common.ComicInfo import ComicInfoUtils as ciUtils +from common.ComicInfo import ComicInfo as ci +from common.Comic import Comic +from utils.NetUtils import htmlUtils -class comicCommon: +class DomainDown: @classmethod def comicChapterDownload(cls,chapter_url): imgs_url =htmlUtils.xpathData("//div[@class='gb-inside-container']/img/@data-src",url=chapter_url,update=True) @@ -21,6 +18,6 @@ class comicCommon: list_file_name.append(count_image+"."+img_su) list_img.append(count_image) count += 1 - comicInfo.setPages(list_img) - comicInfo.writeComicInfoXML(comicInfo.str_chapter) - comicInfo.comicChapterDownload(imgs_url,list_file_name) \ No newline at end of file + ci.setPages(list_img) + ci.writeComicInfoXML(Comic.getChapterName()) + ciUtils.comicChapterDownload(imgs_url,list_file_name) \ No newline at end of file diff --git a/domain/down/RouMan.py b/domain/down/RouMan.py new file mode 100644 index 0000000..2864401 --- /dev/null +++ b/domain/down/RouMan.py @@ -0,0 +1,44 @@ +from common.ComicInfo import ComicInfo as ci +from common.ComicInfo import Comic +from common.ComicInfo import ComicInfoUtils as ciUtils +from common.Constant import pathStr +from utils.FileUtils import imageUtils +from utils.NetUtils import htmlUtils +from utils.ComicUtils import ntfy + +class DomainDown: + @classmethod + def comicChapterDownload(cls,chapter_url): + str_xpath='//script[@id="__NEXT_DATA__"]/text()' + str_exec="props.pageProps" + book = htmlUtils.setXpathData(chapter_url,xpath=str_xpath,num=0,exec=str_exec) + Comic.setComicName(book,"bookName") + Comic.setChapterName(book,"chapterName") + #alias = x.get("alias") + Comic.setDep(book,"description") + images = Comic.getValue(book,"images") + chapter_api_url = ci.parseExec(book,"chapterAPIPath",start_add=pathStr.getBaseUrl(chapter_url)) + + if chapter_api_url != None: + ntfy.sendMsg(f"chapterApiUrl= {chapter_api_url}",alert=False) + data = htmlUtils.getJSON(chapter_api_url,update=True) + if data != None: + Comic.setChapterName(data,"chapter.name") + images = Comic.getValue(data,"chapter.images") + if len(images) == 0: + ntfy.sendMsg(f"未获取到章节图像 comic_name={Comic.getComicName()} chapter={Comic.getChapterName()}") + + count = 1 + list_img,list_file_name = [[],[]] + for image in images: + (image_src,scramble) = [image.get("src"),image.get("scramble")] + count_image = "{:0>3d}".format(count) + list_img.append(image_src) + image_src_prefix = "."+str(image_src).split(".")[-1] + if scramble: + de_str = str(image_src).split("/")[-1].replace(image_src_prefix,"==") + blocks_num = imageUtils.encodeImage(de_str) + count_image = "scramble="+str(blocks_num)+"_"+count_image + list_file_name.append(count_image+image_src_prefix) + count+=1 + ciUtils.comicChapterDownload(list_img,list_file_name) \ No newline at end of file diff --git a/entity/JM.py b/entity/JM.py deleted file mode 100644 index e5e70f9..0000000 --- a/entity/JM.py +++ /dev/null @@ -1,74 +0,0 @@ -import json -import re -from utils.HtmlUtils import htmlUtils -from utils.comic.ComicInfo import comicInfo -from utils.downloader import download_images -from utils.base.BaseComicEntity import baseComic - -class comicEntity: - @classmethod - def baseComicData(cls,url,update=False): - data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url,update=update) - data = json.loads(data[0]) - data = data.get("props") - x = data.get("pageProps") - return x - - @classmethod - def downladsComcis(cls,url): - #漫画名 - x = cls.baseComicData(url) - books = x.get("books") - len_books = len(books) - base_url = comicInfo.getBaseUrl(url) - for x in range(0, len_books): - book = books[x] - book_id = book.get("id") - book_name = book.get("name") - updated = book.get("updatedAt") - comic_href = base_url+"/books/"+book_id - href = baseComic.downladsComcis(book_name=book_name,comic_href=comic_href,updated=updated) - cls.oneComic(href) - comicInfo.updateComicDate() - - @classmethod - def oneComic(cls,c_url,sleep=None): - nums = htmlUtils.xpathData("//div[@class='row']/div[6]/div[1]/div[1]/ul[contains(@class,'btn-toolbar')]/a",url=c_url,update=True) - book_name = htmlUtils.xpathData("//div[@itemprop='name']/h1[@id='book-name']/text()",num=0) - title = re.sub(r'[\\\/\|\(\)\~\?\.\:\:\-\*\<\>]', '', book_name) - tags = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='tags']/a[@class='btn btn-sm btn-primary']/text()") - author = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='tag-block']/span[@data-type='author']/a[@class='btn btn-sm btn-primary']/text()") - book_msg = htmlUtils.xpathData("//div[@class='row']/div[@class='col-lg-7']/div[1]/div[@class='p-t-5 p-b-5']/text()") - jmid = book_msg[0] - dep = str(book_msg[1]).replace("叙述:","") - icon = htmlUtils.xpathData("//div[@class='row']//div[@class='thumb-overlay']/img/@src",num=0) - - referer = re.search(r'(https://\w+\.\w+)/', c_url).group(1) - if nums: - list_chapter_name = [] - list_chapter_href = [] - list_chapter_update = [] - cls.count_chapter = 0 - - for i in nums: - photo_name_list = i.xpath("li/text()")[0].split() - photo_date = i.xpath("li/span/text()")[0].split() - #print(re.findall(r'[\u4E00-\u9FA5]+.*?', i.xpath("li/text()")[0])) - try: - if re.findall(r'[\u4E00-\u9FA5]', photo_name_list[2]): - photo_name=re.sub(r'\s','',photo_name_list[0])+' '+photo_name_list[2] - else:photo_name=re.sub(r'\s','',photo_name_list[0]) - except Exception as e: - photo_name = re.sub(r'\s', '', photo_name_list[0]) - photo_name = re.sub(r'[\\\/\|\(\)\~\?\.\:\:\-\*\<\>\-]', '',photo_name) - #print(photo_name) - photoid=i.attrib['data-album'] - cls.aid = photoid - comicInfo.setValue1(cls.aid) - list_chapter_name.append(photo_name) - list_chapter_href.append(referer+i.attrib['href']) - list_chapter_update.append(photo_date[0]) - - baseComic.oneComic(url=c_url,title=title,author=author, - icon=icon,tags=tags,dep=dep,chapters=list_chapter_name,chapter_href=list_chapter_href, - alias=None) \ No newline at end of file diff --git a/entity/RouMan.py b/entity/RouMan.py deleted file mode 100644 index c3b85c7..0000000 --- a/entity/RouMan.py +++ /dev/null @@ -1,42 +0,0 @@ -import json -from utils.HtmlUtils import htmlUtils -from utils.comic.ComicInfo import comicInfo -from utils.downloader import download_images -from utils.base.BaseComicEntity import baseComic - -class comicEntity: - - @classmethod - def booksJson(cls,url,update=False): - data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url,update=update) - return json.loads(data[0]).get("props").get("pageProps").get("books") - - @classmethod - def downladsComcis(cls,url): - #漫画名 - books = cls.booksJson(url,update=True) - for x in range(0, len(books)): - comicInfo.setComicInfo(comicname=books[x].get("name"), - current_chapter_img=comicInfo.getBaseUrl(url)+"/books/"+books[x].get("id"), - update_at=books[x].get("updatedAt")) - cls.oneComic(baseComic.updateComics()) - - @classmethod - def oneComic(cls,c_url,sleep=None,date=comicInfo.getUpdateAt()): - if c_url == None: return None - title = htmlUtils.xpathData('//div[@class="col"]/h5/text()',url=c_url,num=0,update=True) - #别名 - #alias = htmlUtils.xpathData('//span[contains(@class,"bookid_alias")]/text()',num=1) - icon = htmlUtils.xpathData('//img[@class="img-thumbnail"]/@src',num=0) - author = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[1]/text()',num=1) - tags = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()',num=0) - action = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[2]/text()',num=1) - dep = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[4]/text()',num=1) - update_date = htmlUtils.xpathData('//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()',num=1) - chapters = htmlUtils.xpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()') - chapter_href = htmlUtils.xpathData('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/@href') - - baseComic.oneComic(url=c_url,title=title,author=author, - icon=icon,tags=tags,dep=dep,chapters=chapters,chapter_href=chapter_href, - genre="韩漫",age_rating="R18+") - comicInfo.updateComicDate(date=date) \ No newline at end of file diff --git a/entity/down/JM.py b/entity/down/JM.py deleted file mode 100644 index c53a900..0000000 --- a/entity/down/JM.py +++ /dev/null @@ -1,108 +0,0 @@ -import hashlib -import json -import os -from utils.HtmlUtils import htmlUtils -from utils.FileUtils import imageUtils -from utils.comic.ComicInfo import comicInfo -from utils.Ntfy import ntfy - -class comicCommon: - @classmethod - def comicChapterDownload(cls,chapter_url): - chapter_url = chapter_url+"?shunt=2" - img_list =htmlUtils.xpathData("//div[@class='panel-body']/div/div[contains(@class,'center scramble-page')]/img/@data-original",url=chapter_url,update=True) - pages_imgs =htmlUtils.xpathData("//div[@class='center scramble-page']/@id",url=chapter_url) - comicInfo.setPages(pages_imgs) - comicInfo.writeComicInfoXML(comicInfo.str_chapter) - #print("img_list:",len(img_list)) - list_img = [] - list_file_name = [] - for i in img_list: - img_url= i - img_name = os.path.basename(img_url).split('.')[0] - aid = int(comicInfo.getValue1()) - if aid > 220980: - #if is_scramble: - img_name = "scramble="+str(cls.get_scramble_num(aid,img_name))+"_"+img_name - #path_img = "%s\\%s.jpg" % (cls.aid, img_name) - path_img = "%s.jpg" % (img_name) - list_img.append(img_url) - list_file_name.append(path_img) - comicInfo.comicChapterDownload(list_img,list_file_name) - - @classmethod - def get_md5(cls,num): - result1 = hashlib.md5(num.encode()).hexdigest() - print('get_md5-', result1) - return result1 - - @classmethod - def get_scramble_num(cls,e, t): - #print(type(e),e, type(t),t) - a = 10 - try: - num_dict = {} - for i in range(10): - num_dict[i] = i * 2 + 2 - if (int(e) >= 268850): - n = str(e) + t; - # switch(n=(n = (n = md5(n)).substr(-1)), n %= 10) { - #print("n=",n) - tmp = ord(cls.get_md5(n)[-1]) - result = num_dict[tmp % 10] - a = result - return a - except Exception as e: - print(e.__traceback__.tb_lineno,e) - return False - - @classmethod - def encode_scramble_image(cls,imgpath): - image = Image.open(imgpath) - w, h = image.size - #image.show() - file_str = str(imgpath).split("=") - #10_29.jpg - base_dir = file_str[0].replace("scramble","") - base_name = file_str[-1] - base_fn = base_name.split("_") - save_name = base_fn[1] - save_name_delesu = save_name.split(".")[0] - blocks = int(base_fn[0]) - img_type = os.path.basename(imgpath).split('.')[-1] - save_path = os.path.join(os.path.dirname(imgpath),save_name_delesu+"."+img_type) - # print(type(aid),type(img_name)) - if blocks: - s = blocks # 随机值 - # print(s) - l = h % s # 切割最后多余的值 - box_list = [] - hz = 0 - for i in range(s): - c = math.floor(h / s) - g = i * c - hz += c - h2 = h - c * (i + 1) - l - if i == 0: - c += l;hz += l - else: - g += l - box_list.append((0, h2, w, h - g)) - - # print(box_list,len(box_list)) - item_width = w - # box_list.reverse() #还原切图可以倒序列表 - # print(box_list, len(box_list)) - newh = 0 - image_list = [image.crop(box) for box in box_list] - # print(box_list) - newimage = Image.new("RGB", (w, h)) - for image in image_list: - # image.show() - b_w, b_h = image.size - newimage.paste(image, (0, newh)) - - newh += b_h - newimage.save(save_path) - if os.path.exists(imgpath): - os.remove(imgpath) \ No newline at end of file diff --git a/entity/down/RouMan.py b/entity/down/RouMan.py deleted file mode 100644 index 5765926..0000000 --- a/entity/down/RouMan.py +++ /dev/null @@ -1,58 +0,0 @@ -import json -from utils.HtmlUtils import htmlUtils -from utils.FileUtils import imageUtils -from utils.comic.ComicInfo import comicInfo -from utils.Ntfy import ntfy - -class comicCommon: - @classmethod - def baseComicData(cls,url,update=False): - data = htmlUtils.xpathData('//script[@id="__NEXT_DATA__"]/text()',url=url,update=update) - data = json.loads(data[0]) - data = data.get("props") - x = data.get("pageProps") - return x - - @classmethod - def comicChapterDownload(cls,chapter_url): - x = cls.baseComicData(chapter_url,update=True) - book_name = x.get("bookName") - chapter_name = x.get("chapterName") - alias = x.get("alias") - description = x.get("description") - images = x.get("images") - chapter_api_path = x.get("chapterAPIPath") - comicInfo.setComicName(book_name) - comicInfo.setChapterName(chapter_name) - comicInfo.setDep(description) - - if chapter_api_path != None: - chapter_api_path = str(chapter_api_path).encode('utf-8').decode('unicode_escape') - chapter_api_url = comicInfo.getBaseUrl(chapter_url)+chapter_api_path - ntfy.sendMsg(f"chapterApiUrl= {chapter_api_url}",alert=False) - data = htmlUtils.getJSON(chapter_api_url,update=True) - if data != None: - data = data.get("chapter") - (chapter_name,images) = [data.get("name"),data.get("images")] - if images == None: - ntfy.sendMsg(f"未获取到章节图像 comic_name={book_name} chapter={chapter_name}") - - count = 1 - list_img = [] - list_file_name = [] - for image in images: - image_src = image.get("src") - scramble = image.get("scramble") - count_image = "{:0>3d}".format(count) - list_img.append(image_src) - image_src_prefix = "."+str(image_src).split(".")[-1] - if scramble: - su = "."+str(image_src).split(".")[-1] - de_str = str(image_src).split("/")[-1].replace(su,"==") - blocks = imageUtils.encodeImage(de_str) - count_image = "scramble="+str(blocks)+"_"+count_image - list_file_name.append(count_image+image_src_prefix) - count+=1 - #print("count_all_img=", count) - #netUtils.downloadComicChapterImages(list_img,scrambles=list_scramble) - comicInfo.comicChapterDownload(list_img,list_file_name) \ No newline at end of file diff --git a/main.py b/main.py index 5754821..615d7e5 100644 --- a/main.py +++ b/main.py @@ -1,20 +1,21 @@ import os,skip -from utils.comic.PathStr import pathStr -from entity.BaoZi import comicEntity as baoziEntity -from entity.RouMan import comicEntity as roumanEntity +from common.Constant import pathStr +from domain.BaoZi import comicEntity as baoziEntity +from domain.RouMan import comicEntity as roumanEntity def rouman(): - pathStr.setComicMainAndPath(pathStr.comic_rm) skip.roumanskip() for x in range(0,52): - roumanEntity.downladsComcis("https://rm01.xyz/books?&page="+str(x)) + roumanEntity.start("https://rm01.xyz/books?&page="+str(x)) def baozi(): pathStr.setComicMainAndPath(pathStr.comic_bz) baoziEntity.oneComic("https://baozimh.org/manga/biaoren-xinmanhua/") +def proxy(): + os.environ["http_proxy"] = "http://127.0.0.1:7890" + os.environ["https_proxy"] = "http://127.0.0.1:7890" + if __name__ == '__main__': rouman() -# baozi() -# os.environ["http_proxy"] = "http://127.0.0.1:7890" -# os.environ["https_proxy"] = "http://127.0.0.1:7890" \ No newline at end of file +# baozi() \ No newline at end of file diff --git a/skip.py b/skip.py index b1d9299..105398f 100644 --- a/skip.py +++ b/skip.py @@ -1,14 +1,14 @@ -from utils.comic.ComicInfo import comicInfo +from common.ComicInfo import ComicInfoUtils as ciUtils def roumanskip(): # comicInfo.setComicNameSkips("虐美人 1-117話") # comicInfo.setComicNameSkips("夢遊") # comicInfo.setComicNameSkips("療癒女孩") - comicInfo.setComicNameSkips("深度交流會") - comicInfo.setComicNameSkips("心機女教授") - comicInfo.setComicNameSkips("天降惡魔 Devil Drop デビルドロップ") - comicInfo.setComicNameSkips("穿越異世界之後救了我的人是個少年殺人犯,少年暗殺者(?)×倒黴催的姐姐順水推舟在異世界做起了愛第2話.zip") - comicInfo.setComicNameSkips("幫人家畫嘛 第二季 Cartoonists-NSFW Season2") + ciUtils.setComicNameSkips("深度交流會") + ciUtils.setComicNameSkips("心機女教授") + ciUtils.setComicNameSkips("天降惡魔 Devil Drop デビルドロップ") + ciUtils.setComicNameSkips("穿越異世界之後救了我的人是個少年殺人犯,少年暗殺者(?)×倒黴催的姐姐順水推舟在異世界做起了愛第2話.zip") + ciUtils.setComicNameSkips("幫人家畫嘛 第二季 Cartoonists-NSFW Season2") # comicInfo.setComicNameSkips("霸道主管要我IN") # comicInfo.setComicNameSkips("正妹小主管") - comicInfo.setComicNameSkips("反烏托邦遊戲") \ No newline at end of file + ciUtils.setComicNameSkips("反烏托邦遊戲") \ No newline at end of file diff --git a/utils/CBZUtils.py b/utils/CBZUtils.py deleted file mode 100644 index 0d0e0cd..0000000 --- a/utils/CBZUtils.py +++ /dev/null @@ -1,162 +0,0 @@ -import json -import os,shutil,time -from datetime import datetime -from pathlib import Path -from zipfile import ZipFile -from utils.comic.ComicInfo import comicInfo -from utils.Ntfy import ntfy -from utils.FileUtils import fileUtils as fu -from utils.comic.PathStr import pathStr -from utils.OldUtils import OldUtils - -class CBZUtils: - - @classmethod - def getCBZ_Dir(cls): return comicInfo.getNewCBZComicChapter() - - @classmethod - def getCBZ_Path(cls): return comicInfo.getNewFileCBZComicChapter() - - @classmethod - def readDirsOrFiles(cls,dir,type): - data = [] - files = os.listdir(dir) - for file in files: - path = os.path.join(dir,file) - if type == "files" and os.path.isfile(path): - data.append(path) - if type == "dirs" and os.path.isdir(path): - data.append(path) - return data - - @classmethod - def zip_compression(cls,source_dir=None, target_file=None, remove=True): - target_dir = os.path.dirname(target_file) - if not os.path.exists(target_dir): - os.makedirs(target_dir) - if not os.path.exists(target_file) and source_dir != None: - with ZipFile(target_file, mode='w') as zf: - for path, dir_names, filenames in os.walk(source_dir): - path = Path(path) - arc_dir = path.relative_to(source_dir) - y = 0 - for filename in filenames: - y = y + 1 - print("打包中:" + str(y) + "/" + str(len(filenames)), os.path.join(source_dir, filename)) - zf.write(path.joinpath(filename), arc_dir.joinpath(filename)) - zf.close() - ntfy.sendMsg(f"打包完成:{target_file}") - cls.verCBZComic(target_file) - - @classmethod - def packAutoComicChapterCBZ(cls): - chapter_path = comicInfo.getDirComicChapter() - if os.path.exists(chapter_path): - dirs = os.listdir(chapter_path) - for file in dirs: - if file.startswith("scramble="): - try: - os.remove(file) - except: - print(f"删除 {file} 发生错误,已跳过") - return False - cls.zip_compression(comicInfo.getDirComicChapter(), cls.getCBZ_Path()) - time.sleep(0.1) - fu.remove(comicInfo.getDirComicChapter()) - return True - - @classmethod - def replaceZip(cls,filepath,unpack_dir=None): - if not cls.compareFileDate(filepath): return None - if unpack_dir == None: - unpack_dir = str(filepath).split(".")[0] - fz = ZipFile(filepath, 'r') - for file in fz.namelist(): - if file.endswith(".jpg"): - data = fz.read(file) - if len(data) < 500 and os.path.exists(filepath): - os.remove(filepath) - print(f"数据不完整,已删除:{filepath}") - if cls.compareFileDate(filepath): - os.utime(filepath) - print(f"已更新文件时间 {filepath}") - if os.path.exists(unpack_dir): - shutil.rmtree(unpack_dir) - - @classmethod - def compareFileDate(cls,filepath): - if os.path.exists(filepath): - ctime = os.path.getmtime(filepath) - str_ctime = datetime.fromtimestamp(int(ctime)) - file_ctime = str(str_ctime.year)+"{:0>2d}".format(str_ctime.month)+"{:0>2d}".format(str_ctime.day)+"{:0>2d}".format(str_ctime.hour) - c_ctime = 2023011603 - else: - return False - if int(file_ctime) < c_ctime: - return True - return False - - @classmethod - def zip_info(cls,path,filter=True): - result = None - try: - with ZipFile(path, "r") as zip_file: - result = zip_file.namelist() - if filter: - filter_icon = comicInfo.COMIC_ICON_NAME+".jpg" - filter_info_xml = comicInfo.COMIC_INFO_XML - if filter_icon in result: result.remove(filter_icon) - if filter_info_xml in result: result.remove(filter_info_xml) - except Exception as e: - print(e) - return result - - #CBZ检验是否完整 - @classmethod - def verCBZComic(cls,path=None,list_img=None,min_size=300000): - #数据检验 - if path == None: path = cls.getCBZ_Path() - #文件不存在 则返回 - if fu.notExists(path): return False - if list_img == None: list_img = comicInfo.getChapterImgs() - - if fu.exists(path) and len(cls.zip_info(path)) == len(list_img): - print(f"文件校验成功:{path}") - comicInfo.setProgress(comicInfo.PROGRESS_DONE) - return True - else: - try: - if len(cls.zip_info(path)) < len(list_img) or os.path.getsize(path) < min_size: - fu.remove(path) - comicInfo.setProgress(comicInfo.PROGRESS_NONE) - except Exception as e: - print(e) - return False - - @classmethod - def updateOldCBZ(cls,filesname,result=False): - old_zipfile_path = os.path.join(pathStr.old_cbz_path,OldUtils.getOldComicName(),OldUtils.getOldChapter()+".CBZ") - #判断是否存在已下载CBZ文件 - if fu.exists(old_zipfile_path) and fu.notExists(CBZUtils.getCBZ_Path()): - print(f"存在CBZ文件{old_zipfile_path},解压中...") - zip_file = ZipFile(old_zipfile_path) - #CBZ中文件数量,剔除ComicInfo.xml - if len(filesname) == len(zip_file.namelist())-1: - unzip_path = comicInfo.getDirComicChapter() - zip_file.extractall(unzip_path) - zip_file.close() - print(f"解压完成: CBZ文件{old_zipfile_path}") - print("文件校验中...") - for file in os.listdir(unzip_path): - #检验图片损坏则删除 - if file.endswith(".jpg") and not fu.ver_file(os.path.join(unzip_path,file),type="image"): - fu.remove(unzip_path) - return False - comicInfo.writeComicInfoXML(overlay=True) - result = True - return result - - @classmethod - def nextCBZ(cls,list_img=None): - if list_img == None: list_img = comicInfo.getChapterImgs() - return not cls.verCBZComic(list_img=list_img) \ No newline at end of file diff --git a/utils/ComicUtils.py b/utils/ComicUtils.py index 28ce93c..2c20ade 100644 --- a/utils/ComicUtils.py +++ b/utils/ComicUtils.py @@ -1,6 +1,181 @@ -from opencc import OpenCC - -class fontUtils: +import os,shutil,time,requests +from datetime import datetime +from pathlib import Path +from zipfile import ZipFile +from common.ComicInfo import ComicInfoUtils as ciUtils +from common.ComicInfo import ComicInfo as ci +from common.ComicInfo import Comic +from utils.FileUtils import fileUtils as fu +from common.Constant import pathStr +from common.Constant import ComicPath +from utils.OldUtils import OldUtils + +class ntfy: @classmethod - def ChineseConvert(cls, text,convert='t2s'): - return OpenCC(convert).convert(text) # convert from Simplified Chinese to Traditional Chinese \ No newline at end of file + def sendMsg(cls, msg,alert=True,sleep=None,error=None): + try: + print(f"#ntfy: {msg}") + if alert: + requests.post("https://ntfy.caiwenxiu.cn/PyComic", + data=msg.encode(encoding='utf-8')) + except: + print(f"#ntfy error: {msg}") + if sleep != None: + time.sleep(int(sleep)) + if error != None: + print(f"#ntfy Error: {error}") + return False + else: + return True +class CBZUtils: + + @classmethod + def getCBZ_Dir(cls): return ComicPath.getNewCBZComicChapter() + + @classmethod + def getCBZ_Path(cls): return ComicPath.getNewFileCBZComicChapter() + + @classmethod + def readDirsOrFiles(cls,dir,type): + data = [] + files = os.listdir(dir) + for file in files: + path = os.path.join(dir,file) + if type == "files" and os.path.isfile(path): + data.append(path) + if type == "dirs" and os.path.isdir(path): + data.append(path) + return data + + @classmethod + def zip_compression(cls,source_dir=None, target_file=None, remove=True): + target_dir = os.path.dirname(target_file) + if not os.path.exists(target_dir): + os.makedirs(target_dir) + if not os.path.exists(target_file) and source_dir != None: + with ZipFile(target_file, mode='w') as zf: + for path, dir_names, filenames in os.walk(source_dir): + path = Path(path) + arc_dir = path.relative_to(source_dir) + y = 0 + for filename in filenames: + y = y + 1 + print("打包中:" + str(y) + "/" + str(len(filenames)), os.path.join(source_dir, filename)) + zf.write(path.joinpath(filename), arc_dir.joinpath(filename)) + zf.close() + ntfy.sendMsg(f"打包完成:{target_file}") + cls.verCBZComic(target_file) + + @classmethod + def packAutoComicChapterCBZ(cls): + chapter_path = ComicPath.getDirComicChapter() + if os.path.exists(chapter_path): + dirs = os.listdir(chapter_path) + for file in dirs: + if file.startswith("scramble="): + try: + os.remove(file) + except: + print(f"删除 {file} 发生错误,已跳过") + return False + cls.zip_compression(ComicPath.getDirComicChapter(), cls.getCBZ_Path()) + time.sleep(0.1) + fu.remove(ComicPath.getDirComicChapter()) + return True + + @classmethod + def replaceZip(cls,filepath,unpack_dir=None): + if not cls.compareFileDate(filepath): return None + if unpack_dir == None: + unpack_dir = str(filepath).split(".")[0] + fz = ZipFile(filepath, 'r') + for file in fz.namelist(): + if file.endswith(".jpg"): + data = fz.read(file) + if len(data) < 500 and os.path.exists(filepath): + os.remove(filepath) + print(f"数据不完整,已删除:{filepath}") + if cls.compareFileDate(filepath): + os.utime(filepath) + print(f"已更新文件时间 {filepath}") + if os.path.exists(unpack_dir): + shutil.rmtree(unpack_dir) + + @classmethod + def compareFileDate(cls,filepath): + if os.path.exists(filepath): + ctime = os.path.getmtime(filepath) + str_ctime = datetime.fromtimestamp(int(ctime)) + file_ctime = str(str_ctime.year)+"{:0>2d}".format(str_ctime.month)+"{:0>2d}".format(str_ctime.day)+"{:0>2d}".format(str_ctime.hour) + c_ctime = 2023011603 + else: + return False + if int(file_ctime) < c_ctime: + return True + return False + + @classmethod + def zip_info(cls,path,filter=True): + result = None + try: + with ZipFile(path, "r") as zip_file: + result = zip_file.namelist() + if filter: + filter_icon = ci.COMIC_ICON_NAME+".jpg" + filter_info_xml = ci.COMIC_INFO_XML + if filter_icon in result: result.remove(filter_icon) + if filter_info_xml in result: result.remove(filter_info_xml) + except Exception as e: + print(e) + return result + + #CBZ检验是否完整 + @classmethod + def verCBZComic(cls,path=None,list_img=None,min_size=300000): + #数据检验 + if path == None: path = cls.getCBZ_Path() + #文件不存在 则返回 + if fu.notExists(path): return False + if list_img == None: list_img = Comic.getChapterImgs() + + if fu.exists(path) and len(cls.zip_info(path)) == len(list_img): + print(f"文件校验成功:{path}") + ciUtils.setProgress(ciUtils.PROGRESS_DONE) + return True + else: + try: + if len(cls.zip_info(path)) < len(list_img) or os.path.getsize(path) < min_size: + fu.remove(path) + ciUtils.setProgress(ciUtils.PROGRESS_NONE) + except Exception as e: + print(e) + return False + + @classmethod + def updateOldCBZ(cls,filesname,result=False): + old_zipfile_path = ComicPath.setJoinPathDir([OldUtils.getOldComicName(),OldUtils.getOldChapter()], + pathStr.old_cbz_path,prefix="CBZ") + #判断是否存在已下载CBZ文件 + if fu.exists(old_zipfile_path) and fu.notExists(CBZUtils.getCBZ_Path()): + print(f"存在CBZ文件{old_zipfile_path},解压中...") + zip_file = ZipFile(old_zipfile_path) + #CBZ中文件数量,剔除ComicInfo.xml + if len(filesname) == len(zip_file.namelist())-1: + unzip_path = ComicPath.getDirComicChapter() + zip_file.extractall(unzip_path) + zip_file.close() + print(f"解压完成: CBZ文件{old_zipfile_path}") + print("文件校验中...") + for file in os.listdir(unzip_path): + #检验图片损坏则删除 + if file.endswith(".jpg") and not fu.ver_file(os.path.join(unzip_path,file),type="image"): + fu.remove(unzip_path) + return False + ci.writeComicInfoXML(overlay=True) + result = True + return result + + @classmethod + def nextCBZ(cls,list_img=None): + if list_img == None: list_img = Comic.getChapterImgs() + return not cls.verCBZComic(list_img=list_img) \ No newline at end of file diff --git a/utils/FileUtils.py b/utils/FileUtils.py index 9f302f0..ab56486 100644 --- a/utils/FileUtils.py +++ b/utils/FileUtils.py @@ -1,9 +1,8 @@ import base64,hashlib,os,shutil -import math,time -import numpy as np +import math,time,json from PIL import Image from tinydb import TinyDB, Query -from utils.comic.PathStr import pathStr +from common.Constant import pathStr class imageUtils: @@ -273,7 +272,34 @@ class fileUtils: except: print(f"删除错误:{path}") return False + + #文件保存 + @classmethod + def file_save(cls,path,data,mode=None,print_msg=False): + result = {} + f = {} + dir_name = os.path.dirname(path) + if not os.path.exists(dir_name): + os.makedirs(dir_name) + save_path = os.path.join(path) + if os.path.exists(save_path): + os.remove(save_path) + data = json.dumps(data) + if mode == None: + mode = "w+" + try: + f = open(save_path, mode, encoding="utf-8") + f.write(data) + f.close() + if print_msg: + print("data=",data) + result = path + "文件写入成功" + except: + result = path + "文件写入失败" + print(result) + return result + class dbUtils: @classmethod def base_path(cls,path): diff --git a/utils/HtmlUtils.py b/utils/HtmlUtils.py deleted file mode 100644 index 352bb30..0000000 --- a/utils/HtmlUtils.py +++ /dev/null @@ -1,123 +0,0 @@ -from fake_useragent import UserAgent -import requests,os,json -from lxml import html -import traceback -import time,re -from urllib3.util.retry import Retry -from requests.adapters import HTTPAdapter -from utils.Ntfy import ntfy -from utils.comic.PathStr import pathStr - -class htmlUtils: - - headers = {'User-Agent': UserAgent().random} - url_data = {} - - @classmethod - def getPathSaveHtml(cls,url,type=None): - rstr = r"[\/\\\:\*\?\"\<\>\|\.]" #  '/ \ : * ? " < > |' - try: - file_url = re.sub(rstr, "", url) - except: - file_url = "error_cache" - file_path = os.path.join(pathStr.base_html_cache(),file_url) - if type == "new": - return file_path - if os.path.exists(file_path): - if type == "read": - with open(file_path,"r",encoding="utf-8") as fs: return fs.read() - return file_path - else: - return None - - @classmethod - def saveHtml(cls,url,data,type=None): - file_path = cls.getPathSaveHtml(url,type="new") - dir_name = os.path.dirname(file_path) - if not os.path.exists(dir_name): - os.makedirs(dir_name) - with open(file_path,"w",encoding="utf-8") as fs: - if type== "json": data = json.dumps(data) - fs.write(str(data)) - - @classmethod - def remove_HtmlCache(cls,url): - file_path = cls.getPathSaveHtml(url,type="new") - if os.path.exists(file_path): - try: - os.remove(file_path) - print("已删除") - except: - print() - - @classmethod - def getHTML(cls, curl,type=None,update=False): - url_text = None - if update: cls.remove_HtmlCache(curl) - retries = Retry(total=5, backoff_factor=0.1, status_forcelist=[ 500, 502, 503, 504 ]) - s = requests.Session() - s.mount('http://', HTTPAdapter(max_retries=retries)) - s.mount('https://', HTTPAdapter(max_retries=retries)) - #数据为空则获取数据 - try: url_text = cls.getPathSaveHtml(curl,"read") - except: url_text = None - if url_text != None and update == False: return html.fromstring(url_text) - else: url_text = None - repeat = 0 - while url_text == None and repeat <=5: - try: - print(f"请求地址:{curl}") - res = s.get(curl,stream=True, headers=cls.headers, timeout=5,allow_redirects=True) - if type == "bytes": - url_text = res - if type == "json": - cls.saveHtml(curl,res.text,type="json") - return json.loads(res.text) - if type == None: - url_text = html.fromstring(res.text) - cls.saveHtml(curl,res.text) - except: - repeat += 1 - ntfy.sendMsg(f"请求失败:{curl}",sleep=1) - return url_text - - @classmethod - def getBytes(cls, url): - return cls.getHTML(url,type="bytes") - - @classmethod - def getJSON(cls,url,update=False): - return cls.getHTML(url,type="json",update=update) - - @classmethod - def xpathData(cls,c_xpath,url=None,num=None,not_eq=None,update=False): - if url == None: - url = cls.temp_url - else: - cls.temp_url = url - result = [] - if update: - html_cache_path = cls.getPathSaveHtml(url,"new") - if os.path.exists(html_cache_path): - try: - os.remove(html_cache_path) - ntfy.sendMsg(f"html_cache更新成功 {html_cache_path}") - except: - ntfy.sendMsg(f"html_cache更新失败 {html_cache_path}") - #获取html实体数据 - et = cls.getHTML(url) - if et == None: - return None - #比对数据 - count = 1 - xpaths = et.xpath(c_xpath) - for x in xpaths: - if x != not_eq: - result.append(x) - count +=1 - if num != None: - try: - result = result[num] - except: - result = None - return result \ No newline at end of file diff --git a/utils/NetUtils.py b/utils/NetUtils.py new file mode 100644 index 0000000..04baaec --- /dev/null +++ b/utils/NetUtils.py @@ -0,0 +1,278 @@ +from __future__ import print_function +from queue import Queue +from fake_useragent import UserAgent +import shutil,imghdr,concurrent.futures +import requests,os,json,time,re +from lxml import html +from urllib3.util.retry import Retry +from requests.adapters import HTTPAdapter +from common.Constant import pathStr +from common.ComicInfo import ComicInfoUtils as ciUtils +from common.ComicInfo import ComicInfo as ci +from common.ComicInfo import Comic +from common.Constant import ComicPath +from utils.FileUtils import fileUtils as fu + + +class htmlUtils: + + headers = {'User-Agent': UserAgent().random} + url_data = {} + #domain + @classmethod + def parseExec(cls,data,exec): + if data !=None and exec != None: + dots = str(exec).split(".") + if not isinstance(data,dict): data = json.loads(data) + for dot in dots: + data = data.get(dot) + return data + + @classmethod + def getXpathData(cls,c_xpath,url=None,num=None,not_eq=None,update=False): + return htmlUtils.xpathData(c_xpath=c_xpath,url=url,num=num,not_eq=not_eq,update=update) + + @classmethod + def setXpathData(cls,url,xpath,exec,num=None,result_type=None,type=None,start_add=None): + result = cls.parseExec(htmlUtils.xpathData(xpath,url=url,num=num),exec) + if result == None: return None + if result_type == "list" and type != None: + data = [] + for x in range(0, len(result)): + if start_add != None: + data.append(start_add+result[x].get(type)) + else: + data.append(result[x].get(type)) + return data + return result + + @classmethod + def getPathSaveHtml(cls,url,type=None): + rstr = r"[\/\\\:\*\?\"\<\>\|\.]" #  '/ \ : * ? " < > |' + try: + file_url = re.sub(rstr, "", url) + except: + file_url = "error_cache" + file_path = os.path.join(pathStr.base_html_cache(),file_url) + if type == "new": + return file_path + if os.path.exists(file_path): + if type == "read": + with open(file_path,"r",encoding="utf-8") as fs: return fs.read() + return file_path + else: + return None + + @classmethod + def saveHtml(cls,url,data,type=None): + file_path = cls.getPathSaveHtml(url,type="new") + dir_name = os.path.dirname(file_path) + if not os.path.exists(dir_name): + os.makedirs(dir_name) + with open(file_path,"w",encoding="utf-8") as fs: + if type== "json": data = json.dumps(data) + fs.write(str(data)) + + @classmethod + def remove_HtmlCache(cls,url): + file_path = cls.getPathSaveHtml(url,type="new") + if os.path.exists(file_path): + try: + os.remove(file_path) + print("已删除") + except: + print() + + @classmethod + def getHTML(cls, curl,type=None,update=False): + url_text = None + if update: cls.remove_HtmlCache(curl) + retries = Retry(total=1, backoff_factor=0.5, status_forcelist=[ 500, 502, 503, 504 ]) + s = requests.Session() + s.keep_alive = False + s.mount('http://', HTTPAdapter(max_retries=retries)) + s.mount('https://', HTTPAdapter(max_retries=retries)) + #数据为空则获取数据 + try: url_text = cls.getPathSaveHtml(curl,"read") + except: url_text = None + if url_text != None and update == False: return html.fromstring(url_text) + else: url_text = None + repeat = 0 + while url_text == None and repeat <=5: + try: + print(f"请求地址:{curl}") + res = s.get(curl,stream=True, headers=cls.headers, timeout=10,allow_redirects=True) + if type == "bytes": + url_text = res + if type == "json": + cls.saveHtml(curl,res.text,type="json") + return json.loads(res.text) + if type == None: + url_text = html.fromstring(res.text) + cls.saveHtml(curl,res.text) + res.close() + except Exception as e: + repeat += 1 + print(f"请求失败:Exception: {e} {curl}") + return url_text + + @classmethod + def getBytes(cls, url): + return cls.getHTML(url,type="bytes") + + @classmethod + def getJSON(cls,url,update=False): + return cls.getHTML(url,type="json",update=update) + + @classmethod + def xpathData(cls,c_xpath,url=None,num=None,not_eq=None,update=False): + if url == None: url = cls.temp_url + else: cls.temp_url = url + result = [] + if update: + html_cache_path = cls.getPathSaveHtml(url,"new") + if os.path.exists(html_cache_path): + try: + os.remove(html_cache_path) + print(f"html_cache更新成功 {html_cache_path}") + except: + print(f"html_cache更新失败 {html_cache_path}") + #获取html实体数据 + et = cls.getHTML(url) + if et == None: + return None + #比对数据 + count = 1 + xpaths = et.xpath(c_xpath) + for x in xpaths: + if x != not_eq: + result.append(x) + count +=1 + if num != None: + try: + result = result[num] + except: + result = None + return result + +class downloadUtils: + headers = { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "Proxy-Connection": "keep-alive", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36", + "Accept-Encoding": "gzip, deflate, sdch", + # 'Connection': 'close', + } + down_queue = Queue() + + @classmethod + def common_download(cls,file_name,image_url,dst_dir,timeout=10,proxy=None,proxy_type=None): + proxies = None + if proxy_type is not None: + proxies = { + "http": proxy_type + "://" + proxy, + "https": proxy_type + "://" + proxy } + response = None + file_path = os.path.join(dst_dir, file_name) + if os.path.exists(file_path): + print("download_image 文件已存在,已跳过=",file_path) + return None + temp_path = os.path.join(dst_dir, file_name+".downloads") + repair_count = 1 + response = requests.get( + image_url, headers=cls.headers, timeout=timeout, proxies=proxies) + while response.status_code != 200 and repair_count <= 5: + time.sleep(0.7) + cls.download_image(image_url,dst_dir,file_name) + print(f'重试:第{repair_count}次 {image_url}') + repair_count += 1 + with open(temp_path, 'wb') as f: + f.write(response.content) + response.close() + #验证是否是图像 + if fu.ver_file(temp_path,type="image"): + shutil.move(temp_path, file_path) + print("## OK: {} {}".format(file_path, image_url)) + else: + print("## Fail: {} {}".format(image_url, "图像损坏")) + cls.down_queue.put([file_name,image_url,dst_dir]) + + @classmethod + def download_image(cls,timeout=20, proxy_type=None, proxy=None,type="image"): + repeat = 0 + while not cls.down_queue.empty() and repeat <= 10: + repeat += 1 + data = cls.down_queue.get(False) + (file_name,image_url,dst_dir) = [data[0],data[1],data[2]] + if repeat > 1: + print(f"第{repeat}次下载数据中... file_name={file_name}") + try: + cls.common_download(file_name,image_url,dst_dir) + except: + print(f"下载重试中 {file_name}={image_url}") + cls.down_queue.put([file_name,image_url,dst_dir]) + + + @classmethod + def download_images(cls,image_urls, dst_dir,concurrency=None,timeout=20,proxy_type=None, proxy=None,files_name=None): + """ + Download image according to given urls and automatically rename them in order. + :param timeout: + :param proxy: + :param proxy_type: + :param image_urls: list of image urls + :param dst_dir: output the downloaded images to dst_dir + :param file_prefix: if set to "img", files will be in format "img_xxx.jpg" + :param concurrency: number of requests process simultaneously + :return: none + """ + if concurrency == None: + concurrency = len(image_urls) + with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor: + future_list = list() + count = 0 + if not os.path.exists(dst_dir): + os.makedirs(dst_dir) + for image_url in image_urls: + file_name = files_name[count] + cls.down_queue.put([file_name,image_url,dst_dir]) + future_list.append(executor.submit( + cls.download_image,timeout, proxy_type, proxy)) + count += 1 + concurrent.futures.wait(future_list, timeout) + + @classmethod + def download_comic_icon(cls,is_new=ciUtils.IS_NEW_ICON): + icon_url = Comic.getIcon() + if icon_url == None: + print("icon 不存在,已跳过") + return None + save_name = ci.COMIC_ICON_NAME + icon_prefix = "."+str(icon_url).split(".")[-1] + icon_prefix = icon_prefix.split("?")[0] + #判断漫画名路径是否已存在comicname/cover.jpg, 存在跳过 + path_comic_icon = os.path.join(ComicPath.getDirConfComic(),save_name+icon_prefix) + if not ciUtils.equIcon() and fu.exists(path_comic_icon): + os.remove(path_comic_icon) + if fu.notExists(path_comic_icon): + cls.download_images([icon_url],ComicPath.getDirConfComic(),files_name=[save_name+icon_prefix]) + #if not os.path.exists(path_cbz_comic): + # os.makedirs(path_cbz_comic) + save_path = os.path.join(ComicPath.getDirCBZComic(),Comic.getChapterName()+icon_prefix) + if is_new: + #历史版本ICON + if os.path.exists(save_path): + os.remove(save_path) + if os.path.exists(path_comic_icon): + base_dir = ComicPath.getDirComicChapter() + if not os.path.exists(base_dir): os.makedirs(base_dir) + shutil.copy(path_comic_icon,os.path.join(base_dir,save_name+icon_prefix)) + else: + if fu.notExists(ComicPath.getDirCBZComic()): os.makedirs(ComicPath.getDirCBZComic()) + shutil.copy(path_comic_icon,save_path) + print(f"{path_comic_icon} 已复制至: {save_path}") + #保存icon信息 + ciUtils.iconDB() + ciUtils.nextDownloadToCBZChapter() + ciUtils.setProgress(ciUtils.PROGRESS_CBZ) \ No newline at end of file diff --git a/utils/Ntfy.py b/utils/Ntfy.py deleted file mode 100644 index 806ef12..0000000 --- a/utils/Ntfy.py +++ /dev/null @@ -1,19 +0,0 @@ -import requests,time - -class ntfy: - @classmethod - def sendMsg(cls, msg,alert=True,sleep=None,error=None): - try: - print(f"#ntfy: {msg}") - if alert: - requests.post("https://ntfy.caiwenxiu.cn/PyComic", - data=msg.encode(encoding='utf-8')) - except: - print(f"#ntfy error: {msg}") - if sleep != None: - time.sleep(int(sleep)) - if error != None: - print(f"#ntfy Error: {error}") - return False - else: - return True \ No newline at end of file diff --git a/utils/comic/ComicInfo.py b/utils/comic/ComicInfo.py deleted file mode 100644 index f0c9c7f..0000000 --- a/utils/comic/ComicInfo.py +++ /dev/null @@ -1,494 +0,0 @@ -from xml.dom.minidom import Document -import os,re -from utils.comic.PathStr import pathStr -import json,shutil -from utils.FileUtils import dbUtils -from utils.ComicUtils import fontUtils -from utils.OldUtils import OldUtils - -class comicInfo(): - COMIC_ICON_NAME = "000" - COMIC_INFO_XML = "ComicInfo.xml" - PROGRESS_INFO = "info" - PROGRESS_DOWN = "download" - PROGRESS_IMG = "download" - PROGRESS_CBZ = "cbz" - PROGRESS_DONE = "done" - PROGRESS_NONE = "none" - - IS_NEW_ICON = False - - document = Document() - path_comic_info = None - - root = "ComicInfo" - chapter = "Title" - comic_name = "Series" - number = "Number" - dep = "Summary" - author = "Writer" - genre = "Genre" - cbs = "Publisher" - lang = "LanguageISO" - comic_names = "SeriesGroup" - tags = "Tags" - date_year = "Year" - date_month = "Month" - date_day = "Day" - page_count = "PageCount" - pages = "Pages" - web = "Web" - age_rating = "AgeRating" - - str_comic_name = None - str_chapter = None - str_number = None - str_icon = None - str_homepage = None - str_listchapter = None - str_chapter_imgs = None - str_update_at = None - str_date_year = None - str_date_month = None - str_date_day = None - str_page_count = None - str_web = None - str_list_img = None - str_files_img = None - str_chapter_index= None - str_value1 = None - str_current_chapter_img = None - list_skip = [] - - chapter_node = None - comic_name_node = None - number_node = None - dep_node = None - author_node = None - genre_node = None - cbs_node = None - lang_node = None - comic_names_node = None - tags_node = None - date_year_node = None - date_month_node = None - date_day_node = None - page_count_node = None - pages_node = None - web_node = None - age_rating_node = None - - @classmethod - def setNodeAndValue(cls,node,value): - if value != None: - c_node = cls.document.createElement(node) - node_text = cls.document.createTextNode(fontUtils.ChineseConvert(str(value).replace("\n",""))) - c_node.appendChild(node_text) - return c_node - return None - - @classmethod - def root_node(cls): return cls.document.createElement(cls.root) - - @classmethod - def setChapterName(cls,value): - cls.str_chapter = fontUtils.ChineseConvert(cls.fixFileName(value)) - OldUtils.setOldChapter(cls.fixFileName(value)) - cls.chapter_node = cls.setNodeAndValue(cls.chapter,value) - - @classmethod - def setComicValue(cls,value): - result = None - if value != None or value != "": result = value - return result - - @classmethod - def setListChapter(cls, value): cls.str_list_chapter = cls.setComicValue(value) - @classmethod - def setChapterImgs(cls, value): cls.str_chapter_imgs = cls.setComicValue(value) - @classmethod - def setCurrentChapterImg(cls, value): cls.str_current_chapter_img = cls.setComicValue(value) - @classmethod - def getCurrentChapterImg(cls): return cls.str_current_chapter_img - @classmethod - def getChapterImgs(cls): return cls.str_chapter_imgs - @classmethod - def getLenChapters(cls): return len(cls.str_list_chapter) - @classmethod - def setComicName(cls,value): - cls.str_comic_name = fontUtils.ChineseConvert(cls.fixFileName(value)) - OldUtils.setOldComicName(cls.fixFileName(value)) - cls.comic_name_node = cls.setNodeAndValue(cls.comic_name, value) - - @classmethod - def setComicNames(cls,value): - #去重 - value = ",".join(set(str(fontUtils.ChineseConvert(value)).split(","))) - cls.comic_names_node = cls.setNodeAndValue(cls.comic_names,value) - @classmethod - def setNumber(cls,value): - cls.str_number = value - cls.number_node = cls.setNodeAndValue(cls.number, value) - @classmethod - def getNumber(cls): return cls.str_number - @classmethod - def setDep(cls,value): cls.dep_node = cls.setNodeAndValue(cls.dep, value) - @classmethod - def setAuthor(cls,value): cls.author_node = cls.setNodeAndValue(cls.author,cls.getListToString(value)) - @classmethod - def setLang(cls,value): cls.lang_node = cls.setNodeAndValue(cls.lang, value) - @classmethod - def setAgeRating(cls,value): cls.age_rating_node = cls.setNodeAndValue(cls.age_rating, value) - @classmethod - def setGenre(cls,value): cls.genre_node = cls.setNodeAndValue(cls.genre, cls.getListToString(value)) - @classmethod - def setTags(cls,value): cls.tags_node = cls.setNodeAndValue(cls.tags,cls.getListToString(value)) - @classmethod - def setCBS(cls,value): cls.cbs_node = cls.setNodeAndValue(cls.cbs,value) - - @classmethod - def setWeb(cls,value): - cls.str_web = cls.setComicValue(value) - cls.web_node = cls.setNodeAndValue(cls.web,cls.setComicValue(value)) - - @classmethod - def setChapterListImg(cls,value): cls.str_list_img=cls.setComicValue(value) - @classmethod - def setValue1(cls,value): cls.str_value1 = value - @classmethod - def getValue1(cls): return cls.str_value1 - @classmethod - def getChapterListImg(cls): return cls.str_list_img - @classmethod - def setChapterFilesName(cls,value): cls.str_files_img=cls.setComicValue(value) - @classmethod - def getChapterFilesName(cls): return cls.str_files_img - @classmethod - def getWeb(cls): return cls.str_web - - @classmethod - def setPageCount(cls,value): - cls.str_page_count = cls.setComicValue(int(value)) - cls.page_count_node = cls.setNodeAndValue(cls.page_count,cls.str_page_count) - - @classmethod - def setPages(cls,value): - if value != None: - su = "."+str(value[0]).split(".")[-1] - join_list=",".join(value).replace(su,"") - value = join_list.split(",") - cls.setPageCount(len(value)+1 if cls.IS_NEW_ICON else len(value)) - root_node = cls.document.createElement(cls.pages) - if cls.IS_NEW_ICON: - #添加封面 - icon_node = cls.document.createElement("Page") - icon_node.setAttribute("Image","000") - icon_node.setAttribute("Type","FrontCover") - root_node.appendChild(icon_node) - for page in value: - c_node = cls.document.createElement("Page") - page = page.split("_")[-1] - c_node.setAttribute("Image",page) - root_node.appendChild(c_node) - cls.pages_node = root_node - - @classmethod - def setDate(cls,value,split): - values = str(value).split(split) - cls.str_date_year = values[0] - cls.str_date_month = values[1] - cls.str_date_day = values[2] - cls.date_year_node = cls.setNodeAndValue(cls.date_year,values[0]) - cls.date_month_node = cls.setNodeAndValue(cls.date_month,values[1]) - cls.date_day_node = cls.setNodeAndValue(cls.date_day,values[2]) - - - @classmethod - def setIcon(cls,value): - cls.str_icon = cls.setComicValue(value) - return cls.str_icon - - @classmethod - def setHomePage(cls, value): cls.str_homepage = cls.setComicValue(value) - @classmethod - def getHomePage(cls): return cls.str_homepage - @classmethod - def setUpdateAt(cls, value): cls.str_update_at = cls.setComicValue(value) - @classmethod - def getUpdateAt(cls): return cls.str_update_at - @classmethod - def getListToString(cls,to_list): - value = to_list - if isinstance(to_list,list): - value = ",".join(to_list) - return value - - @classmethod - def setChapterIndex(cls,value): - cls.setNumber(value) - cls.str_chapter_index = cls.setComicValue(value) - @classmethod - def getChapterIndex(cls): return cls.str_chapter_index - @classmethod - def setComicNameSkips(cls,value): return cls.list_skip.append(value) - @classmethod - def getIsComicNameSkips(cls,value): return value in ",".join(cls.list_skip) - - @classmethod - def getBaseUrl(cls,url=None): - if url == None: - url = cls.str_homepage - num = 3 - index = 0 - for x in range(0, num): - index = str(url).find("/",index)+1 - return url[0:index-1] - - @classmethod - def getIcon(cls): return cls.str_icon - @classmethod - def getComicName(cls): return cls.str_comic_name - @classmethod - def getChapter(cls): return cls.str_chapter - - @classmethod - def fixFileName(cls,filename,replace=None): - intab = r'[?*/\|.:><]' - str_replace = "" - if replace != None: - str_replace = replace - filename = re.sub(intab, str_replace, filename) - count = 1 - while True: - str_file = filename[0-count] - if str_file == " ": - count += 1 - else: - filename = filename[0:len(filename)+1-count] - break - return filename - - @classmethod - def getDirConfComic(cls): - if cls.str_comic_name != None: - return os.path.join(pathStr.base_conf_path(), cls.str_comic_name) - else: - print("comicName不存在,退出中") - exit() - - @classmethod - def getDirCBZComic(cls): - if cls.str_comic_name != None: - return os.path.join(pathStr.base_cbz(), cls.str_comic_name) - else: - print("comicName不存在,退出中 getDirCBZComic") - exit() - - @classmethod - def getDirCBZComicChapter(cls): - if cls.str_comic_name != None and cls.str_chapter != None: - return os.path.join(pathStr.base_cbz(),cls.str_comic_name,cls.str_chapter) - else: - print("comicName不存在,退出中 getDirCBZComicChapter") - exit() - - @classmethod - def getSortDirCBZComicChapter(cls): - if cls.str_comic_name != None and cls.str_chapter != None and cls.str_chapter_index != None: - return os.path.join(pathStr.base_cbz(),cls.str_comic_name,str(cls.str_chapter_index)+" "+cls.str_chapter) - else: - print("comicName不存在,退出中 getSortDirCBZComicChapter") - return None - - @classmethod - def getNewCBZComicChapter(cls,type="dir"): return cls.getNewToComicChapter(".CBZ", type) - @classmethod - def getNewIconComicChapter(cls,type="dir"): return cls.getNewToComicChapter(".jpg", type) - @classmethod - def getNewFileCBZComicChapter(cls,type="file"): return cls.getNewToComicChapter(".CBZ", type) - @classmethod - def getNewFileIconComicChapter(cls,type="file"): return cls.getNewToComicChapter(".jpg", type) - - - @classmethod - def getNewToComicChapter(cls,su,type="dir"): - c_dir = cls.getDirCBZComicChapter() - s_dir = cls.getSortDirCBZComicChapter() - c_path = cls.getDirCBZComicChapter()+su - s_path = cls.getSortDirCBZComicChapter()+su - if os.path.exists(s_path) and s_path != None: - shutil.move(s_path, c_path) - print("文件已移动至:", c_path) - if type == "file": - return c_path - return c_dir - - @classmethod - def getDirComic(cls): - if cls.str_comic_name != None: - return os.path.join(pathStr.base_comic_img(), cls.str_comic_name) - else: - print("comicName不存在,退出中") - exit() - - @classmethod - def getDirComicChapter(cls): - if cls.str_comic_name != None and cls.str_chapter != None: - return os.path.join(pathStr.base_comic_img(),cls.str_comic_name,cls.str_chapter) - else: - print("comicName与chapter 不存在,退出中") - exit() - @classmethod - def getPathComicInfoXML(cls): - try: - cls.path_comic_info = os.path.join(pathStr.base_comic_img(),cls.str_comic_name,cls.str_chapter, cls.COMIC_INFO_XML) - except: - return None - return cls.path_comic_info - - @classmethod - def writeComicInfoXML(cls,chapter=None,path=None,overlay=False): - root = cls.root_node() - new_document = Document() - new_document.appendChild(root) - if cls.chapter_node != None: root.appendChild(cls.chapter_node) - if cls.comic_name_node != None: root.appendChild(cls.comic_name_node) - if cls.number_node != None: root.appendChild(cls.number_node) - if cls.dep_node != None: root.appendChild(cls.dep_node) - if cls.author_node != None: root.appendChild(cls.author_node) - if cls.genre_node != None: root.appendChild(cls.genre_node) - if cls.cbs_node != None: root.appendChild(cls.cbs_node) - if cls.lang_node != None: root.appendChild(cls.lang_node) - if cls.age_rating_node != None: root.appendChild(cls.age_rating_node) - if cls.comic_names_node != None: root.appendChild(cls.comic_names_node) - if cls.tags_node != None: root.appendChild(cls.tags_node) - if cls.date_year_node != None: root.appendChild(cls.date_year_node) - if cls.date_month_node != None: root.appendChild(cls.date_month_node) - if cls.date_day_node != None: root.appendChild(cls.date_day_node) - if cls.page_count_node != None: root.appendChild(cls.page_count_node) - if cls.pages_node != None: root.appendChild(cls.pages_node) - cls.getPathComicInfoXML() - if path != None: cls.path_comic_info = os.path.join(path,cls.COMIC_INFO_XML) - base_dir = os.path.dirname(cls.path_comic_info) - if not os.path.exists(base_dir): os.makedirs(base_dir) - if os.path.exists(cls.path_comic_info) and not overlay: - print(f"{cls.COMIC_INFO_XML} 已存在") - return None - with open(cls.path_comic_info , "w", encoding="utf-8") as fo: - new_document.writexml(fo, indent='', addindent='\t', newl='\n', encoding="utf-8") - fo.close() - print(f"{cls.COMIC_INFO_XML} 已生成 pathd=", cls.path_comic_info) - - #文件保存 - @classmethod - def file_save(cls,path,data,mode=None,print_msg=False): - result = {} - f = {} - dir_name = os.path.dirname(path) - if not os.path.exists(dir_name): - os.makedirs(dir_name) - - save_path = os.path.join(path) - if os.path.exists(save_path): - os.remove(save_path) - data = json.dumps(data) - if mode == None: - mode = "w+" - try: - f = open(save_path, mode, encoding="utf-8") - f.write(data) - f.close() - if print_msg: - print("data=",data) - result = path + "文件写入成功" - except: - result = path + "文件写入失败" - print(result) - return result - - @classmethod - def nextSavePath(cls,next,data=None): - save_path = cls.getDirConfComic()+"/"+next+cls.str_chapter - if data != None: - cls.file_save(save_path, data) - return save_path - - @classmethod - def nextSaveInfoChapter(cls,chapter,data=None): - if data == None: data = cls.getChapterImgs() - if cls.str_chapter != chapter: - print(f"chapter {cls.str_chapter} 与 {chapter} 不一致,已自动跳过") - cls.setProgress(cls.PROGRESS_INFO) - cls.nextSavePath("info_",data) - - @classmethod - def nextInfoToImgChapter(cls): cls.setProgress(cls.PROGRESS_IMG) - @classmethod - def nextImgToDownloadChapter(cls): cls.setProgress(cls.PROGRESS_DOWN) - @classmethod - def nextDownloadToCBZChapter(cls): cls.setProgress(cls.PROGRESS_CBZ) - @classmethod - def nextCBZToDoneChapter(cls): cls.setProgress(cls.PROGRESS_DONE) - @classmethod - def nextDoneSave(cls,data): cls.nextSavePath("done_",data) - @classmethod - def setProgress(cls,progress): - dbUtils.setComic(cls.str_chapter,progress,cls.str_comic_name) - - @classmethod - def isProgress(cls,progress,remove=None): - if remove: cls.setProgress("None") - return dbUtils.query(cls.str_chapter,progress,cls.str_comic_name) - - @classmethod - def iconDB(cls): dbUtils.setComic(cls.str_comic_name,cls.str_icon,"icons") - - @classmethod - def equIcon(cls): return dbUtils.query(cls.str_comic_name,cls.str_icon,"icons") - - @classmethod - def setConfDirComicPath(cls,file_name,comic_name=None): - if comic_name != None: cls.setComicName(comic_name) - return os.path.join(cls.getDirConfComic(),file_name) - - @classmethod - def saveConfComicData(cls,file_name,data,comic_name=None): cls.file_save(cls.setConfDirComicPath(file_name,comic_name), data) - @classmethod - def getPathInitConfComicData(cls,file_name,comic_name=None): return cls.setConfDirComicPath(file_name,comic_name) - - @classmethod - def updateComicDate(cls,date=None): - update_at = cls.getUpdateAt() - if date != None: - update_at = date - dbUtils.setComic(cls.str_comic_name, update_at, "update") - - @classmethod - def isUpdateComic(cls): - return dbUtils.query(cls.str_comic_name, cls.str_update_at,"update") - - @classmethod - def comicChapterDownload(cls,imgs,names): - cls.setChapterImgs(imgs) - cls.setChapterListImg(imgs) - cls.setPages(names) - cls.setChapterFilesName(names) - - @classmethod - def setComicInfo(cls,comicname=None,homepage=None,alias=None,author=None,icon=None,tags=None, - dep=None,genre=None,lang=None,age_rating=None,chapters=None,update_at=None,current_chapter_img=None): - author = str(author).replace("&",",").replace(" ",",") - cls.setHomePage(homepage) - cls.setComicName(str(comicname)) - if alias != None: comicInfo.setComicNames(alias) - cls.setAuthor(author) - cls.setIcon(icon) - cls.setTags(tags) - cls.setDep(dep) - #comicInfo.setCBS("韩漫") - if genre != None: cls.setGenre(genre) - cls.setLang(lang) - cls.setAgeRating(age_rating) - cls.setListChapter(chapters) - cls.setUpdateAt(update_at) - cls.setCurrentChapterImg(current_chapter_img) \ No newline at end of file diff --git a/utils/comic/PathStr.py b/utils/comic/PathStr.py deleted file mode 100644 index f61d5de..0000000 --- a/utils/comic/PathStr.py +++ /dev/null @@ -1,66 +0,0 @@ -import os,datetime -from time import strftime -class pathStr: - - comic_name = None - comic_jm="JM" - comic_bz="BZ" - comic_rm="RM" - - comic_url_main = None - base_comic_out = os.path.join("/mnt", "Comics") - old_cbz_path = os.path.join("/mnt","OldComics") - @classmethod - def base_cbz(cls): return cls.getBaseComicPath("CBZ") - @classmethod - def base_comic_img(cls): return cls.getBaseComicPath("outputComic") - @classmethod - def base_conf_path(cls): return cls.getBaseComicPath(".conf") - @classmethod - def base_html_cache(cls): return cls.getBaseComicPath("html_cache") - @classmethod - def base_html_chapter(cls): return cls.getBaseComicPath("html_updated") - @classmethod - def base_comic_update(cls): return cls.getBaseComicPath("comic_update") - @classmethod - def base_db(cls): return cls.getBaseComicPath("db") - - @classmethod - def getBaseComicPath(cls,join_path): return os.path.join(cls.base_comic_out,join_path) - - @classmethod - def setComicMainAndPath(cls,value): - cls.setComicMain(value) - cls.setComicMainPath(value) - - @classmethod - def setComicMain(cls,value): cls.comic_name = value - - @classmethod - def getComicMain(cls): return cls.comic_name - - @classmethod - def setComicMainPath(cls,value): - #if value != cls.comic_rm: cls.base_comic_out = os.path.join(cls.base_comic_out, value) - cls.base_comic_out = os.path.join(cls.base_comic_out, value) - - @classmethod - def base_html_week(cls): - date_path = cls.getDatePath() - return os.path.join(cls.base_comic_out,"html_"+str(date_path)) - - @classmethod - def getDatePath(cls): - date = datetime.datetime.now() - year = int(date.strftime("%Y")) - month = int(date.strftime("%m")) - day = int(date.strftime("%d")) - week = cls.get_week_of_month(year, month, day) - return f"{year}{month}{week}" - - @classmethod - def get_week_of_month(cls, year, month, day): - begin = int(datetime.date(year, month, 1).strftime("%W")) - end = int(datetime.date(year, month, day).strftime("%W")) - week = "{:0>2d}".format(end - begin + 1) - return week \ No newline at end of file diff --git a/utils/downloader.py b/utils/downloader.py deleted file mode 100644 index 0e554e2..0000000 --- a/utils/downloader.py +++ /dev/null @@ -1,174 +0,0 @@ -""" Download image according to given urls and automatically rename them in order. """ -# -*- coding: utf-8 -*- -# author: Yabin Zheng -# Email: sczhengyabin@hotmail.com - -from __future__ import print_function - -from queue import Queue -import shutil -import imghdr -import os -import concurrent.futures -import requests -import time -from utils.Ntfy import ntfy -from utils.comic.ComicInfo import comicInfo -from utils.HtmlUtils import htmlUtils -from utils.FileUtils import fileUtils as fu - -headers = { - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", - "Proxy-Connection": "keep-alive", - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " - "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36", - "Accept-Encoding": "gzip, deflate, sdch", - # 'Connection': 'close', -} - -down_queue = Queue() - -def common_download(file_name,image_url,dst_dir,timeout=10,proxy=None,proxy_type=None): - proxies = None - if proxy_type is not None: - proxies = { - "http": proxy_type + "://" + proxy, - "https": proxy_type + "://" + proxy } - response = None - file_path = os.path.join(dst_dir, file_name) - if os.path.exists(file_path): - print("download_image 文件已存在,已跳过=",file_path) - return None - temp_path = os.path.join(dst_dir, file_name+".downloads") - repair_count = 1 - response = requests.get( - image_url, headers=headers, timeout=timeout, proxies=proxies) - while response.status_code != 200 and repair_count <= 5: - time.sleep(0.7) - download_image(image_url,dst_dir,file_name) - ntfy.sendMsg(f'重试:第{repair_count}次 {image_url}') - repair_count += 1 - with open(temp_path, 'wb') as f: - f.write(response.content) - response.close() - #验证是否是图像 - if fu.ver_file(temp_path,type="image"): - shutil.move(temp_path, file_path) - print("## OK: {} {}".format(file_path, image_url)) - else: - print("## Fail: {} {}".format(image_url, "图像损坏")) - down_queue.put([file_name,image_url,dst_dir]) - -def download_image(timeout=20, proxy_type=None, proxy=None,type="image"): - repeat = 0 - while not down_queue.empty() and repeat <= 10: - repeat += 1 - data = down_queue.get(False) - (file_name,image_url,dst_dir) = [data[0],data[1],data[2]] - if repeat > 1: - ntfy.sendMsg(f"第{repeat}次下载数据中... file_name={file_name}") - try: - common_download(file_name,image_url,dst_dir) - except: - ntfy.sendMsg(f"下载重试中 {file_name}={image_url}") - down_queue.put([file_name,image_url,dst_dir]) - - - -def download_images(image_urls, dst_dir,concurrency=None,timeout=20,proxy_type=None, proxy=None,files_name=None): - """ - Download image according to given urls and automatically rename them in order. - :param timeout: - :param proxy: - :param proxy_type: - :param image_urls: list of image urls - :param dst_dir: output the downloaded images to dst_dir - :param file_prefix: if set to "img", files will be in format "img_xxx.jpg" - :param concurrency: number of requests process simultaneously - :return: none - """ - if concurrency == None: - concurrency = len(image_urls) - with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor: - future_list = list() - count = 0 - if not os.path.exists(dst_dir): - os.makedirs(dst_dir) - for image_url in image_urls: - file_name = files_name[count] - down_queue.put([file_name,image_url,dst_dir]) - future_list.append(executor.submit( - download_image,timeout, proxy_type, proxy)) - count += 1 - concurrent.futures.wait(future_list, timeout) - -def download_comic_icon(is_new=comicInfo.IS_NEW_ICON): - icon_url = comicInfo.getIcon() - if icon_url == None: - print("icon 不存在,已跳过") - return None - save_name = comicInfo.COMIC_ICON_NAME - icon_prefix = "."+str(icon_url).split(".")[-1] - icon_prefix = icon_prefix.split("?")[0] - #判断漫画名路径是否已存在comicname/cover.jpg, 存在跳过 - path_comic_icon = os.path.join(comicInfo.getDirConfComic(),save_name+icon_prefix) - if not comicInfo.equIcon() and fu.exists(path_comic_icon): - os.remove(path_comic_icon) - if fu.notExists(path_comic_icon): - download(icon_url, path_comic_icon) - #if not os.path.exists(path_cbz_comic): - # os.makedirs(path_cbz_comic) - save_path = os.path.join(comicInfo.getDirCBZComic(),comicInfo.getChapter()+icon_prefix) - if is_new: - #历史版本ICON - if os.path.exists(save_path): - os.remove(save_path) - if os.path.exists(path_comic_icon): - base_dir = comicInfo.getDirComicChapter() - if not os.path.exists(base_dir): os.makedirs(base_dir) - shutil.copy(path_comic_icon,os.path.join(base_dir,save_name+icon_prefix)) - else: - if fu.notExists(comicInfo.getDirCBZComic()): os.makedirs(comicInfo.getDirCBZComic()) - shutil.copy(path_comic_icon,save_path) - print(f"{path_comic_icon} 已复制至: {save_path}") - #保存icon信息 - comicInfo.iconDB() - comicInfo.nextDownloadToCBZChapter() - comicInfo.setProgress(comicInfo.PROGRESS_CBZ) - - # 定义下载函数 -def download(url,path,file_type=None): - if os.path.exists(path): - if imghdr.what(path): - msg = "已存在同路径文件,已跳过:"+path - print(msg) - return msg - else: - print("文件已损坏,已重试:"+path) - path = os.path.join(os.path.dirname(path),str(os.path.basename(path)).split("?")[0]) - tmp_file = path+".downloads" - if os.path.exists(tmp_file): - os.remove(tmp_file) - print("存在缓存文件,已删除:",tmp_file) - repair_count = 1 - res = htmlUtils.getBytes(url) - while res.status_code != 200 and repair_count <= 5: - res = htmlUtils.getBytes(url) - print(f'重试:第{repair_count}次 {url}') - repair_count += 1 - #判断是否为图片 - if file_type == "image": - if 'image' not in res.headers.get("content-type",""): - print(f"url= {url} Error: URL doesnot appear to be an image") - basedir= os.path.dirname(path) - if not os.path.exists(basedir): - os.makedirs(basedir) - #expected_length = res.headers.get('Content-Length') - #actual_length = res.raw.tell() - with open(tmp_file, 'wb') as f: - for ch in res: - f.write(ch) - f.close() - shutil.move(tmp_file, path) - print(f"url={url} 保存至:{path}") - return path \ No newline at end of file