fix
This commit is contained in:
parent
3298bcc287
commit
faee8328a4
@ -1,28 +0,0 @@
|
|||||||
import logging, os
|
|
||||||
from Comics.utils import Conf
|
|
||||||
from Comics.utils import ComicPath
|
|
||||||
from Comics.loader import ComicLoader
|
|
||||||
from Comics.items import ComicItem
|
|
||||||
|
|
||||||
class baseSpider:
|
|
||||||
|
|
||||||
def parse_comic_data(self, project, response):
|
|
||||||
data = []
|
|
||||||
# 初始化Comic数据并根据工程名称读取配置文件并自动解析
|
|
||||||
comic_item = Conf().comic(project, ComicLoader(ComicItem(), response))
|
|
||||||
# 循环遍历根据配置文件自动解析并注入的章节名和章节链接
|
|
||||||
for chapter, link in zip(comic_item.get_chapters(), comic_item.get_chapter_href()):
|
|
||||||
# 打包导出item数据
|
|
||||||
item = comic_item.load_item(chapter=chapter)
|
|
||||||
# 获取最终存放CBZ的路径
|
|
||||||
cbz_path = ComicPath(item=item).PATH_CBZ()
|
|
||||||
# 校验繁体和简体中文CBZ路径是否存在
|
|
||||||
# if not checkUtils().is_error(item) and os.path.exists(cbz_path):
|
|
||||||
if cbz_path !=None and os.path.exists(cbz_path):
|
|
||||||
logging.info(f"漫画 {cbz_path} 已存在, 跳过中...")
|
|
||||||
data.append({ "success" : False, "item" : item})
|
|
||||||
else:
|
|
||||||
# 开始访问章节链接并跳转到self.parse_chapter
|
|
||||||
# yield scrapy.Request(self.main_url+link, meta={'item': item}, callback=self.parse_chapter)
|
|
||||||
data.append({ "success" : True, "item" : item, "link" : link})
|
|
||||||
return data
|
|
||||||
@ -1,10 +1,10 @@
|
|||||||
import scrapy,logging,time,os,skip
|
import scrapy,logging,time,os,skip
|
||||||
from Comics.items import ComicItem
|
from Comics.items import ComicItem
|
||||||
from Comics.loader import ComicLoader
|
from Comics.loader import ComicLoader
|
||||||
from Comics.spiders.base import baseSpider
|
from Comics.utils import ComicPath
|
||||||
from Comics.utils import Conf
|
from Comics.utils import Conf
|
||||||
|
|
||||||
class RmComicSpider(scrapy.Spider, baseSpider):
|
class RmComicSpider(scrapy.Spider):
|
||||||
name = 'rm_comic'
|
name = 'rm_comic'
|
||||||
allowed_domains = ['roum12.xyz']
|
allowed_domains = ['roum12.xyz']
|
||||||
main_url = 'https://'+allowed_domains[0]
|
main_url = 'https://'+allowed_domains[0]
|
||||||
@ -27,13 +27,22 @@ class RmComicSpider(scrapy.Spider, baseSpider):
|
|||||||
# 获取某个漫画的相关数据
|
# 获取某个漫画的相关数据
|
||||||
# 获取到多个章节链接后进入下个流程
|
# 获取到多个章节链接后进入下个流程
|
||||||
def parse_comic(self, response):
|
def parse_comic(self, response):
|
||||||
for comic in super().parse_comic_data(self.name, response):
|
# 初始化Comic数据并根据工程名称读取配置文件并自动解析
|
||||||
if comic.get("success"):
|
comic_item = Conf().comic(self.name, ComicLoader(ComicItem(), response))
|
||||||
# 开始访问章节链接并跳转到self.parse_chapter
|
# 循环遍历根据配置文件自动解析并注入的章节名和章节链接
|
||||||
yield scrapy.Request(self.main_url+comic.get("link"), meta={'item': comic.get("item")}, callback=self.parse_chapter)
|
for chapter, link in zip(comic_item.get_chapters(), comic_item.get_chapter_href()):
|
||||||
|
# 打包导出item数据
|
||||||
|
item = comic_item.load_item(chapter=chapter)
|
||||||
|
# 获取最终存放CBZ的路径
|
||||||
|
cbz_path = ComicPath(item=item).PATH_CBZ()
|
||||||
|
# 校验繁体和简体中文CBZ路径是否存在
|
||||||
|
# if not checkUtils().is_error(item) and os.path.exists(cbz_path):
|
||||||
|
if cbz_path !=None and os.path.exists(cbz_path):
|
||||||
|
logging.info(f"漫画 {cbz_path} 已存在, 跳过中...")
|
||||||
|
yield item
|
||||||
else:
|
else:
|
||||||
yield comic.get("item")
|
# 开始访问章节链接并跳转到self.parse_chapter
|
||||||
|
yield scrapy.Request(self.main_url+link, meta={'item': item}, callback=self.parse_chapter)
|
||||||
|
|
||||||
# 读取某章节下的所有图片
|
# 读取某章节下的所有图片
|
||||||
def parse_chapter(self, response):
|
def parse_chapter(self, response):
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user