update new Rouman5

This commit is contained in:
caiwx86 2024-10-28 00:03:20 +08:00
parent 6638254416
commit 5f5b6cb6ec
10 changed files with 142 additions and 169 deletions

View File

@ -3,6 +3,7 @@ from xml.dom import minidom
from typing import List
import json,os
from lxml import etree
from Comics.settings import COMIC_INFO_XML_FILE,COMIC_INFO_XSD_FILE,OUTPUT_DIR,PROJECT_KEY
# Define the ComicInfo and ComicPageInfo classes
class ComicInfo:
@ -94,7 +95,7 @@ class ComicInfoXml:
if remove:
os.remove(xml_file)
def parse_comicinfo(self, comic: ComicInfo, save_dir=None, xml_filename="ComicInfo.xml", xsd_filename="ComicInfo_2.1.xsd"):
def parse_comicinfo(self, comic: ComicInfo, save_dir=None, xml_filename="ComicInfo.xml", xsd_filename="ComicInfo.xsd"):
"""_summary_
Args:
@ -143,7 +144,7 @@ class ComicInfoXml:
#xml_data = json_to_xml_with_declaration(json_data)
#print(xml_data)
def scrapy_xml_by_json(self, json_data, save_dir=None):
def scrapy_xml_by_json(self, json_data, save_dir=None, xsd_file=COMIC_INFO_XSD_FILE):
comic = ComicInfo()
comic.Title = json_data.get("chapter", "")
comic.Series = json_data.get("name", "")
@ -163,5 +164,5 @@ class ComicInfoXml:
page.Image = image_name.split(".")[0].split("_")[-1]
pages.append(page.Image)
comic.Pages.append(page)
self.parse_comicinfo(comic, save_dir=save_dir)
self.parse_comicinfo(comic, save_dir=save_dir, xsd_filename=xsd_file)
return pages

View File

@ -28,7 +28,8 @@ def _serialize_to_images(value, result_type=None):
# suffix = "."+str(image_src).split(".")[-1]
suffix = ".jpg"
image_name = count_image + suffix
if scramble:
#if scramble:
if scramble == "True":
de_str = str(image_src).split("/")[-1].replace(suffix, "==")
blocks_num = imageUtils.encodeImage(de_str)
image_name = ComicPath.getFileScrambleImageName(count=count_image, block=blocks_num, suffix=suffix)
@ -46,7 +47,7 @@ def _serialize_to_images(value, result_type=None):
def serialize_to_images(value): return _serialize_to_images(value)
# 图像链接处理方法
def serialize_to_image_urls(value): return _serialize_to_images(value, result_type="image_urls")
def serialize_to_image_urls(value): return _serialize_to_images(value, result_type="image_urls")
# ComicItem
class ComicItem(Item):
@ -93,11 +94,27 @@ class ComicItem(Item):
# 图像名
images_name = Field()
domain = Field()
#章节链接
chapter_href = Field()
#章节API
chapter_api = Field()
class BooksItem(Item):
current_project = Field()
names = Field()
urls = Field()
class ImageItem(Item):
image_url = Field()
image_name = Field()
image_path = Field()
image_type = Field()
isScramble = Field()
class Image():
def setImage(self, url, scramble): return { "src" : url, "scramble": scramble}
# 序列化-作者
def serializer_info_writer(value):
(list_value, value) = [[], str(value).replace("&", " ")]

View File

@ -1,8 +1,8 @@
import json,logging
import json,logging,os
from scrapy.loader import ItemLoader
from Comics.settings import PROJECT_KEY
from Comics.settings import PROJECT_KEY,IMAGES_STORE
class ComicLoader(ItemLoader):
class BaseLoader(ItemLoader):
def parseExec(self,data,exec):
if data !=None and exec != None:
dots = str(exec).split(".")
@ -53,7 +53,7 @@ class ComicLoader(ItemLoader):
def get_exec(self, value, str_exec):
return self.parseExec(value, str_exec)
def add_value(self, field_name, value, *processors, re=None, **kw):
if self.auto_replace_value(field_name, value):
return super().add_value(field_name, value, *processors, re=re, **kw)
@ -68,15 +68,29 @@ class ComicLoader(ItemLoader):
# 设置漫画属性
def set_properties(self, name, value=None, xpath=None, index=None, sexec=None):
if value != None and sexec==None:
if value != None:
self.add_value(field_name=name, value=value)
if xpath != None:
self.add_xpath(field_name=name, xpath=xpath, index=index)
if sexec != None:
self.add_exec(field_name=name, value=value, str_exec=sexec)
def get_output_value(self, field_name, skip_field=["chapter"]):
value = super().get_output_value(field_name)
try:
if isinstance(value, list) and len(value) == 1:
if field_name not in skip_field: value = value[0]
else: value = "".join(value)
except:
print(f"get_output_value value={value} type={type(value)}")
return value
# 工程名
def project_name(self, project_name): self.add_value(PROJECT_KEY, project_name)
# 工程名
def get_project_name(self): return self.get_output_value(PROJECT_KEY)
class ComicLoader(BaseLoader):
# 漫画名
def name(self, value=None, xpath=None, index=None, sexec=None): self.set_properties('name', value, xpath, index, sexec)
# 漫画封面链接
@ -101,7 +115,9 @@ class ComicLoader(ItemLoader):
# 图像名称
def images(self, value=None, xpath=None, index=None, sexec=None): self.set_properties('images', value, xpath, index, sexec)
# 图像链接
def image_urls(self, value=None, xpath=None, index=None, sexec=None): self.set_properties('image_urls', value, xpath, index, sexec)
def image_urls(self, value=None, xpath=None, index=None, sexec=None):
self.set_properties('images', value, xpath, index, sexec)
self.set_properties('image_urls', value, xpath, index, sexec)
def get_output_value(self, field_name, skip_field=["chapter"]):
value = super().get_output_value(field_name)
@ -121,8 +137,6 @@ class ComicLoader(ItemLoader):
def get_schapter(self): return self.get_output_value("s_chapter")
# 漫画封面
def get_icon(self): return self.get_output_value("icon")
# 工程名
def get_project_name(self): return self.get_output_value(PROJECT_KEY)
# 章节链接
def get_chapter_href(self): return self.get_output_value("chapter_href")
# 全部章节
@ -143,7 +157,9 @@ class ComicLoader(ItemLoader):
def set_chapter(self, value): self.set_properties('chapter', value=value)
def set_schapter(self, value): self.set_properties('s_chapter', value=value)
def set_domain(self, value): self.set_properties('domain', value=value)
def get_domain(self): return self.get_output_value("domain")
# 章节页码
def count(self):
len_images = len(self.get_images())
@ -162,6 +178,26 @@ class ComicLoader(ItemLoader):
def load_item(self, chapter=None):
self.count()
self.index()
if not self.get_icon().startswith("http"): self.icon(self.get_domain()+ self.get_icon())
if chapter != None: self.set_chapter(chapter)
self.save_sname_schapter()
return super().load_item()
return super().load_item()
def set_image_item(self, image_url, image_path, image_name, image_scramble="False", image_type="Image"):
return { "image_url" : image_url, "image_path" : image_path, "image_name" : image_name, "image_scramble" : image_scramble , "image_type" : image_type}
# 图像链接处理
def parse_images(self):
images_item = []
icon_path = os.path.join(self.get_project_name(), "icons", self.get_name(), self.get_name()+".jpg")
images_item.append(self.set_image_item(image_url= self.get_icon() , image_path = icon_path , image_name=self.get_name()+".jpg", image_scramble="False", image_type="Icon"))
for url, name in zip(self.get_image_urls(), self.get_images()):
image_path = os.path.join(self.get_project_name(), "images", self.get_name(), self.get_chapter(), name)
images_item.append(self.set_image_item(image_url= url , image_path= image_path, image_name=name))
return images_item
class BooksLoader(BaseLoader):
def get_names(self): return self.get_output_value("names")
def get_urls(self): return self.get_output_value("urls")

View File

@ -7,11 +7,11 @@
# useful for handling different item types with a single interface
import os,scrapy,logging,shutil
from Comics import settings
from Comics.items import ComicItem
from Comics.items import ComicItem,ImageItem
from Comics.loader import ComicLoader
from Comics.utils import CBZUtils,fileUtils as fu
from Comics.utils import ComicPath
from Comics.utils import checkUtils,oldUtils
from Comics.utils import oldUtils
from Comics.exporters import JsonExport,ItemExporter
from scrapy.pipelines.images import ImagesPipeline
from Comics._utils.ComicInfo import ComicInfoXml
@ -21,13 +21,11 @@ class ComicsPipeline():
# item就是yield后面的对象
def process_item(self, item: ComicItem, spider):
if isinstance(item, ComicItem):
# 'output/rm_comic/json/壞X/第1話 壞X'
# 已存在漫画CBZ文件 调用转换
result_item = None
if fu.exists(ComicPath(item).PATH_CBZ()): result_item = ItemExporter().export_obj(item)
# 不存在漫画CBZ文件
else: result_item = JsonExport(file=ComicPath(item).getDirJosnComicChapter()).export_json(ComicLoader(item).load_item(), if_return=True)
#oldUtils().clean_old_files(files=result_item["chapters"], folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_CBZ_DIR), move_folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_OLD_CBZ_DIR))
return result_item
class BaseImagesPipeline(ImagesPipeline):
@ -59,47 +57,26 @@ class BaseImagesPipeline(ImagesPipeline):
if not result[0]: fail_data.append(result[1])
if len(fail_data) == 0 and len(results) != 0: is_success = True
return is_success
# 封面下载操作类
class IconDownloadPipeline(BaseImagesPipeline):
# 数据处理
def get_media_requests(self, item, info):
comic = ComicLoader(item=item)
# 获取封面链接和封面保存路径
icon_url, icon_cache_path = [ comic.get_icon(), super().get_file_path(item, result_type="icon_cache") ]
# 封面已存在
if fu.exists(icon_cache_path): return False
else: yield scrapy.Request(url=icon_url, meta={'path': icon_cache_path })
def item_completed(self, results, item, info):
if super().success_completed(item, results):
print(" icon download success")
# 更新封面到Icon文件夹内
super().update_icon(item)
return item
class ImgDownloadPipeline(BaseImagesPipeline):
def get_media_requests(self, item, info):
comic = ComicLoader(item=item)
self.image_urls, self.images = [ comic.get_image_urls(), comic.get_images() ]
# 添加封面下载信息至下载列表中
# self.add_download_icon(item)
for image_url,image in zip(self.image_urls,self.images):
if_down, image_path = [ True, super().get_file_path(item, image)]
images_item = comic.parse_images()
for image_item in images_item:
if_down = True
image_url = image_item["image_url"]
image_path = image_item["image_path"]
if image_item["image_type"] == "Icon":
image_path = super().get_file_path(item, result_type="icon_cache")
if fu.exists(image_path): return False
# 图像(含加密图像)已存在
if super().image_scramble_exits(item, image_path):
#if image_path == self.get_file_path(item, result_type="icon_cache"):
# logging.info(f"icon file exists: IMAGE_STORE {image_path}")
#else:
if_down = False
logging.info(f"file exists: IMAGE_STORE {image_path}")
if if_down:
logging.info(f"downloading {image_url} --> IMAGE_STORE {image_path}")
yield scrapy.Request(url=image_url, meta={'path': image_path})
yield scrapy.Request(url=image_url, meta={'path': image_path})
# 打包cbz封面
def pack_icon(self, item):
@ -122,10 +99,12 @@ class ImgDownloadPipeline(BaseImagesPipeline):
item (_type_): Comic item数据
info (_type_): 信息
"""
if super().success_completed(item, results): super().update_icon(item)
cbz_path = super().get_file_path(item, result_type="cbz")
chapter_dir = ComicPath(item=item).file_path(result_type=ComicPath().MAPPING_IMAGES_DIR)
images_file = oldUtils().old_images(folder=chapter_dir)
if len(images_file) != len(ComicLoader(item=item).get_image_urls()): return
images_file = oldUtils().old_images(folder=chapter_dir)
if images_file == None or len(images_file) != len(ComicLoader(item=item).get_image_urls()): return
if fu.exists(cbz_path):
#self.update_icon(item)
chapter = os.path.basename(cbz_path).split(".")[0]
@ -135,19 +114,9 @@ class ImgDownloadPipeline(BaseImagesPipeline):
self.pack_icon(item)
else:
# ComicInfoXml 生成
#comic_info = ComicInfoXmlItemExporter(dir=super().get_file_path(item=item, result_type="comic_info")).export_xml(item)
comic_pages = ComicInfoXml().scrapy_xml_by_json(item, save_dir=super().get_file_path(item=item, result_type="images_dir"))
#if CBZUtils.packComicChapterCBZ(src_dir= super().get_file_path(item, result_type="images_dir"),
# dts_path= cbz_path,
# comic_info_images= comic_info['Pages'], remove=True):
if CBZUtils.packComicChapterCBZ(src_dir= super().get_file_path(item, result_type="images_dir"),
dts_path= cbz_path,
comic_info_images= comic_pages, remove=True):
super().update_icon(item)
self.pack_icon(item)
# CBZ校验失败
#else:
# checkUtils().export_error(item)
#sleep_time = random.randint(3,15)
#print(f'等待{sleep_time}秒后进行下一章节')
#time.sleep(int(sleep_time))
self.pack_icon(item)

View File

@ -97,7 +97,7 @@ ITEM_PIPELINES = {
# 'scrapy.pipelines.images.ImagesPipeline' : 1,
'Comics.pipelines.ComicsPipeline': 300,
# 'Comics.pipelines.ImageParsePipeline': 400,
'Comics.pipelines.IconDownloadPipeline': 400,
# 'Comics.pipelines.IconDownloadPipeline': 400,
'Comics.pipelines.ImgDownloadPipeline': 500,
}
@ -130,4 +130,5 @@ LOG_STDOUT = True # 标准化输出
CBZ_EXPORT_PATH = os.path.join(BASE_OUTPUT,"CBZ")
OLD_CBZ_EXPORT_PATH = os.path.join(BASE_OUTPUT,"Old_CBZ")
#数据导出类 排序
COMIC_INFO_XML_FILE = "ComicInfo.xml"
COMIC_INFO_XML_FILE = "ComicInfo.xml"
COMIC_INFO_XSD_FILE = "Comics/assets/ComicInfo_2.1.xsd"

View File

@ -1,6 +1,8 @@
import scrapy,logging,os,skip
from Comics.items import ComicItem
import scrapy,logging,os,skip,json,re
from Comics.items import ComicItem,Image
from Comics.items import BooksItem
from Comics.loader import ComicLoader
from Comics.loader import BooksLoader
from Comics.utils import ComicPath
from Comics.utils import Conf
from Comics.utils import oldUtils
@ -9,7 +11,7 @@ class RmComicSpider(scrapy.Spider):
name = 'rm_comic'
allowed_domains = ['rouman5.com']
main_url = 'https://'+allowed_domains[0]
start_urls = main_url+'/books'
start_urls = main_url+"/books"
# 遍历网站页数数据
def start_requests(self):
@ -18,18 +20,18 @@ class RmComicSpider(scrapy.Spider):
# 获取多个漫画信息
def books_comic(self, response):
comics = ComicLoader(item=ComicItem(), response=response)
books_item = Conf().books(self.name, BooksLoader(BooksItem(), response))
# 获取漫画网站//script[@id]内的json数据并获取props.pageProps.books数据并作偱环解析
for book in comics.get_exec(comics.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0], str_exec="props.pageProps.books"):
for book,url in zip(books_item.get_names(), books_item.get_urls()):
# 排除指定的漫画名
if book['name'] not in skip.skip_comic:
yield scrapy.Request(url=self.start_urls+"/"+book['id'], callback=self.parse_comic)
if book not in skip.skip_comic: yield scrapy.Request(url=self.main_url+"/"+url, callback=self.parse_comic)
# 获取某个漫画的相关数据
# 获取到多个章节链接后进入下个流程
def parse_comic(self, response):
# 初始化Comic数据并根据工程名称读取配置文件并自动解析
comic_item = Conf().comic(self.name, ComicLoader(ComicItem(), response))
comic_item.set_domain(self.main_url)
path_comic = comic_item.load_item()
cbz_dir = ComicPath(path_comic).file_path(result_type=ComicPath.MAPPING_CBZ_DIR)
move_folder = ComicPath(path_comic).file_path(result_type=ComicPath.MAPPING_OLD_CBZ_DIR)
@ -44,7 +46,6 @@ class RmComicSpider(scrapy.Spider):
# 获取最终存放CBZ的路径
cbz_path = ComicPath(item=item).PATH_CBZ()
# 校验繁体和简体中文CBZ路径是否存在
# if not checkUtils().is_error(item) and os.path.exists(cbz_path):
if cbz_path !=None and os.path.exists(cbz_path):
logging.info(f"漫画 {cbz_path} 已存在, 跳过中...")
yield item
@ -56,22 +57,24 @@ class RmComicSpider(scrapy.Spider):
def parse_chapter(self, response):
# 获取传入的漫画item数据
ci = ComicLoader(item=response.meta['item'], response=response)
reuslt_json = None
for data_json in ci.get_xpath('//script/text()'):
if data_json.startswith('self.__next_f.push([1,"5') : reuslt_json = data_json
# 再次通过获取的XPATH数据解析并保存到ci(ComicItem)中
item: ComicLoader = Conf().parse_chapter(item=ci, value=ci.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0])
comic, chapter_api_url = [ item.load_item() ,item.get_chapter_api() ]
if chapter_api_url is not None and len(chapter_api_url) != 0 :
try:
yield scrapy.Request(self.main_url + chapter_api_url, meta={'item': comic}, callback=self.parse_chapter_api)
except:
logging.warning(f"yield scrapy.Request({self.main_url} + {chapter_api_url}, meta={comic}, callback=self.parse_chapter_api)")
else:
yield comic
# 加密数据API处理
def parse_chapter_api(self, response):
comic_item = ComicLoader(item=response.meta['item'], response=response)
return Conf().parse_chapter_api(item=comic_item, value=response.text).load_item()
# 正则表达式匹配 .jpg 链接
jpg_links = re.findall(r'(https?://\S+\.jpg)', reuslt_json)
images_urls = []
# 打印提取的 .jpg 链接
for link in jpg_links:
sr_value = re.search(r'sr:(\d+)', link)
# 打印提取到的 sr: 的值
if sr_value:
sr = sr_value.group(1) # group(1) 返回第一个捕获组,即数字部分
else:
print("No match found")
images_urls.append(Image().setImage(url=link, scramble=sr.replace("0", "False").replace("1", "True")))
ci.image_urls(value=images_urls)
yield ci.load_item()
def parse(self, response):
raise NotImplementedError

View File

@ -1,22 +1,29 @@
books:
names: '//div[@class="truncate"]/text()'
urls: '//div[@class="grid grid-cols-1 sm:grid-cols-4 md:grid-cols-6 gap-2 sm:gap-4"]//a/@href'
data:
name: '//div[@class="col"]/h5/text()'
icon: '//img[@class="img-thumbnail"]/@src'
name: '//div[@class="basis-3/5 text-sm sm:text-base"]//div[@class="text-xl text-gray-900"]/text()'
icon: '//div[@class="flex flex-row gap-3 sm:gap-4"]//div[@class="basis-2/5"]/img[@class="rounded"]/@src'
author:
xpath: '//div[contains(@class,"bookid_bookInfo")]/p[1]/text()'
index: 1
tags: '//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()'
xpath: '//div[@class="flex flex-row gap-3 sm:gap-4"]//span[@class="text-gray-800"]/text()'
index: 0
tags:
xpath: '//div[@class="flex flex-row gap-3 sm:gap-4"]//span[@class="text-gray-800"]/text()'
index: 3
dep:
xpath: '//div[contains(@class,"bookid_bookInfo")]/p[4]/text()'
xpath: '//div[@class="my-2 text-gray-800 text-sm sm:text-base"]/p/text()'
index: 1
date:
xpath: '//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()'
xpath: '//div[@class="text-gray-500 text-sm mt-2"]/div/text()'
index: 1
genre:
value: "韩漫"
age_rating:
value: "R18+"
chapter_href: '//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/@href'
chapters: '//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()'
chapter_href: '//div[@class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-2 px-2 py-4"]//a/@href'
chapters: '//div[@class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-2 px-2 py-4"]//div[@class="text truncate bg-slate-300 p-2 hover:bg-rose-100"]/text()'
parse_chapter:
name:

View File

@ -1,68 +0,0 @@
import scrapy,logging,time,os
from Comics.items import ComicItem
from Comics.loader import ComicLoader
from Comics.utils import ComicPath
from Comics.settings import PROJECT_KEY
import skip
class RmComicSpider(scrapy.Spider):
name = 'yh_comic'
allowed_domains = ['www.shuanglilock.com.cn']
main_url = 'https://'+allowed_domains[0]
start_urls = main_url+'/info'
def start_requests(self):
# for x in range(0,60):
yield scrapy.Request("https://www.shuanglilock.com.cn/info/27145/", callback=self.parse_comic)
# 获取多个漫画信息
# def books_comic(self, response):
# comics = ComicLoader(item=ComicItem(), response=response)
# data = comics.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0]
# for book in comics.get_exec(data, str_exec="props.pageProps.books"):
# comics.add_value('link', self.start_urls+"/"+book['id'])
# if book['name'] not in skip.skip_comic:
# yield scrapy.Request(url=self.start_urls+"/"+book['id'], callback=self.parse_comic)
# 获取某个漫画的相关数据
# 获取到多个章节链接后进入下个流程
def parse_comic(self, response):
comic_item = ComicLoader(item=ComicItem(), response=response)
comic_item.project_name(self.name)
comic_item.name(xpath='//div[@class="comics-detail__info"]/h1[@class="comics-detail__title"]/text()')
comic_item.icon(xpath='//div[@class="pure-u-1-1 pure-u-sm-1-3 pure-u-md-1-6"]/img/@src')
comic_item.author(xpath='//div[@class="comics-detail__info"]/h2[@class="comics-detail__author"]/text()')
comic_item.tags(xpath='//div[@class="tag-list"]/a[@class="tag"]/text()')
comic_item.dep(xpath='//p[contains(@class,"comics-detail__desc")]/text()')
#comic_item.date(xpath='//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()', index=1)
comic_item.genre(value="樱花漫画")
#comic_item.age_rating(value="R18+")
chapter_href = comic_item.get_xpath('//div[contains(@id,"chapter-items")]'
'//a[@class="comics-chapters__item"]/@href')
chapters = comic_item.get_xpath('//div[contains(@id,"chapter-items")]'
'//a[@class="comics-chapters__item"]//span/text()')
for chapter, link in zip(chapters, chapter_href):
comic_item.chapters(value=chapters)
comic_item.chapter(value=chapter)
item = comic_item.load_item()
cbz_path = ComicPath(item).get_file_path(result_type="cbz", convert=True)
if os.path.exists(cbz_path):
logging.info(f"漫画 {cbz_path} 已存在, 跳过中...")
yield item
else:
yield scrapy.Request(self.main_url+link, meta={'item': item}, callback=self.parse_chapter)
# 读取某章节下的所有图片
def parse_chapter(self, response):
comic_item = ComicLoader(item=response.meta['item'], response=response)
comic_item.image_urls(xpath='//div[@class="comiclist"]/div[@class="comicpage"]/div/img/@data-original')
comic_item.images(xpath='//div[@class="comiclist"]/div[@class="comicpage"]/div/img/@data-original')
comic = comic_item.load_item()
yield comic
def parse(self, response):
raise NotImplementedError
def error_parse(self, response):
raise NotImplementedError

View File

@ -7,9 +7,11 @@ from opencc import OpenCC
from PIL import Image
from pathlib import Path
from zipfile import ZipFile
from Comics.settings import COMIC_INFO_XML_FILE,OUTPUT_DIR,PROJECT_KEY
from Comics.settings import COMIC_INFO_XML_FILE,COMIC_INFO_XSD_FILE,OUTPUT_DIR,PROJECT_KEY
import yaml
from Comics.loader import BaseLoader
from Comics.loader import ComicLoader
from Comics.loader import BooksLoader
from tinydb import TinyDB, Query
# 配置类
@ -47,7 +49,7 @@ class Conf():
return None
# 根据读取的配置数据导入到ComicLoader中
def comic(self, project, item: ComicLoader, child_data='data', val=None):
def base_data(self, project, item: BaseLoader, child_data='data', val=None):
item.project_name(project)
data = self.get_config_value(project, child_data)
for key, xpath_data in data.items():
@ -59,6 +61,12 @@ class Conf():
item.set_properties(name=key, value=value, xpath=xpath, index=index, sexec=sexec)
return item
def books(self, project, item: BooksLoader, child_data='books', val=None):
return self.base_data(project, item, child_data, val)
def comic(self, project, item: ComicLoader, child_data='data', val=None):
return self.base_data(project, item, child_data, val)
def parse_chapter(self,item: ComicLoader, value):
return self.comic(item.get_project_name(), item, "parse_chapter", value)
@ -245,7 +253,7 @@ class CommonUtils:
@classmethod
def validate_comicinfo_xml(cls, xml_file):
cls._validate_xml(xml_file, "ComicInfo.xsd")
cls._validate_xml(xml_file, COMIC_INFO_XSD_FILE)
# 图片处理类
@ -772,7 +780,6 @@ class ntfy:
print("Notification sent successfully!")
else:
print(f"Failed to send notification. Status code: {response.status_code}")
print(response.json())
class logger: