fix
This commit is contained in:
parent
1f975c7c37
commit
32adf42a8d
@ -87,4 +87,4 @@ class ImgDownloadPipeline(ImagesPipeline):
|
|||||||
|
|
||||||
sleep_time = random.randint(25,60)
|
sleep_time = random.randint(25,60)
|
||||||
print(f'等待{sleep_time}秒后进行下一章节')
|
print(f'等待{sleep_time}秒后进行下一章节')
|
||||||
time.sleep(int(sleep_time))
|
time.sleep(int(sleep_time))
|
||||||
@ -4,6 +4,7 @@ from Comics.loader import ComicLoader
|
|||||||
from Comics.items import ListComicItem
|
from Comics.items import ListComicItem
|
||||||
from Comics.utils.Constant import ComicPath
|
from Comics.utils.Constant import ComicPath
|
||||||
from Comics.settings import PROJECT_KEY
|
from Comics.settings import PROJECT_KEY
|
||||||
|
import skip
|
||||||
|
|
||||||
class RmComicSpider(scrapy.Spider):
|
class RmComicSpider(scrapy.Spider):
|
||||||
name = 'rm_comic'
|
name = 'rm_comic'
|
||||||
@ -13,16 +14,15 @@ class RmComicSpider(scrapy.Spider):
|
|||||||
|
|
||||||
def start_requests(self):
|
def start_requests(self):
|
||||||
yield scrapy.Request(self.start_urls, callback=self.books_comic)
|
yield scrapy.Request(self.start_urls, callback=self.books_comic)
|
||||||
|
|
||||||
|
# 获取多个漫画信息
|
||||||
def books_comic(self, response):
|
def books_comic(self, response):
|
||||||
#books_comic = ComicLoader(item=ListComicItem(), response=response)
|
comics = ComicLoader(item=ComicItem(), response=response)
|
||||||
books_comic = ComicLoader(item=ComicItem(), response=response)
|
data = comics.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0]
|
||||||
data = books_comic.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0]
|
for book in comics.get_exec(data, str_exec="props.pageProps.books"):
|
||||||
str_exec = "props.pageProps.books"
|
comics.add_value('link', self.start_urls+"/"+book['id'])
|
||||||
books = books_comic.get_exec(data, str_exec=str_exec)
|
if not book['name'] in skip.skip_comic:
|
||||||
for book in books:
|
yield scrapy.Request(url=self.start_urls+"/"+book['id'], callback=self.parse_comic)
|
||||||
books_comic.add_value('link', self.start_urls+"/"+book['id'])
|
|
||||||
yield scrapy.Request(url=self.start_urls+"/"+book['id'], callback=self.parse_comic)
|
|
||||||
|
|
||||||
# 获取某个漫画的相关数据
|
# 获取某个漫画的相关数据
|
||||||
# 获取到多个章节链接后进入下个流程
|
# 获取到多个章节链接后进入下个流程
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user