This commit is contained in:
caiwx86 2023-08-19 13:25:32 +08:00
parent 1f975c7c37
commit 32adf42a8d
3 changed files with 13 additions and 10 deletions

View File

@ -4,6 +4,7 @@ from Comics.loader import ComicLoader
from Comics.items import ListComicItem
from Comics.utils.Constant import ComicPath
from Comics.settings import PROJECT_KEY
import skip
class RmComicSpider(scrapy.Spider):
name = 'rm_comic'
@ -14,15 +15,14 @@ class RmComicSpider(scrapy.Spider):
def start_requests(self):
yield scrapy.Request(self.start_urls, callback=self.books_comic)
# 获取多个漫画信息
def books_comic(self, response):
#books_comic = ComicLoader(item=ListComicItem(), response=response)
books_comic = ComicLoader(item=ComicItem(), response=response)
data = books_comic.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0]
str_exec = "props.pageProps.books"
books = books_comic.get_exec(data, str_exec=str_exec)
for book in books:
books_comic.add_value('link', self.start_urls+"/"+book['id'])
yield scrapy.Request(url=self.start_urls+"/"+book['id'], callback=self.parse_comic)
comics = ComicLoader(item=ComicItem(), response=response)
data = comics.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0]
for book in comics.get_exec(data, str_exec="props.pageProps.books"):
comics.add_value('link', self.start_urls+"/"+book['id'])
if not book['name'] in skip.skip_comic:
yield scrapy.Request(url=self.start_urls+"/"+book['id'], callback=self.parse_comic)
# 获取某个漫画的相关数据
# 获取到多个章节链接后进入下个流程

3
skip.py Normal file
View File

@ -0,0 +1,3 @@
skip_comic=[
"千絲萬縷的遊戲人生"
]