ComicScrapy/Comics/spiders/baseSpider.py
2024-03-31 19:38:40 +08:00

28 lines
1.4 KiB
Python

import logging, os
from Comics.utils import Conf
from Comics.utils import ComicPath
from Comics.loader import ComicLoader
from Comics.items import ComicItem
class baseSpider:
def parse_comic_data(self, project, response):
data = []
# 初始化Comic数据并根据工程名称读取配置文件并自动解析
comic_item = Conf().comic(project, ComicLoader(ComicItem(), response))
# 循环遍历根据配置文件自动解析并注入的章节名和章节链接
for chapter, link in zip(comic_item.get_chapters(), comic_item.get_chapter_href()):
# 打包导出item数据
item = comic_item.load_item(chapter=chapter)
# 获取最终存放CBZ的路径
cbz_path = ComicPath(item=item).PATH_CBZ()
# 校验繁体和简体中文CBZ路径是否存在
# if not checkUtils().is_error(item) and os.path.exists(cbz_path):
if cbz_path !=None and os.path.exists(cbz_path):
logging.info(f"漫画 {cbz_path} 已存在, 跳过中...")
data.append({ "success" : False, "item" : item})
else:
# 开始访问章节链接并跳转到self.parse_chapter
# yield scrapy.Request(self.main_url+link, meta={'item': item}, callback=self.parse_chapter)
data.append({ "success" : True, "item" : item, "link" : link})
return data