diff --git a/Comics/pipelines.py b/Comics/pipelines.py index f809a07..fd231cb 100644 --- a/Comics/pipelines.py +++ b/Comics/pipelines.py @@ -87,4 +87,4 @@ class ImgDownloadPipeline(ImagesPipeline): sleep_time = random.randint(25,60) print(f'等待{sleep_time}秒后进行下一章节') - time.sleep(int(sleep_time)) \ No newline at end of file + time.sleep(int(sleep_time)) \ No newline at end of file diff --git a/Comics/spiders/rm_comic.py b/Comics/spiders/rm_comic.py index 0e0402a..2390116 100644 --- a/Comics/spiders/rm_comic.py +++ b/Comics/spiders/rm_comic.py @@ -4,6 +4,7 @@ from Comics.loader import ComicLoader from Comics.items import ListComicItem from Comics.utils.Constant import ComicPath from Comics.settings import PROJECT_KEY +import skip class RmComicSpider(scrapy.Spider): name = 'rm_comic' @@ -13,16 +14,15 @@ class RmComicSpider(scrapy.Spider): def start_requests(self): yield scrapy.Request(self.start_urls, callback=self.books_comic) - + + # 获取多个漫画信息 def books_comic(self, response): - #books_comic = ComicLoader(item=ListComicItem(), response=response) - books_comic = ComicLoader(item=ComicItem(), response=response) - data = books_comic.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0] - str_exec = "props.pageProps.books" - books = books_comic.get_exec(data, str_exec=str_exec) - for book in books: - books_comic.add_value('link', self.start_urls+"/"+book['id']) - yield scrapy.Request(url=self.start_urls+"/"+book['id'], callback=self.parse_comic) + comics = ComicLoader(item=ComicItem(), response=response) + data = comics.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0] + for book in comics.get_exec(data, str_exec="props.pageProps.books"): + comics.add_value('link', self.start_urls+"/"+book['id']) + if not book['name'] in skip.skip_comic: + yield scrapy.Request(url=self.start_urls+"/"+book['id'], callback=self.parse_comic) # 获取某个漫画的相关数据 # 获取到多个章节链接后进入下个流程 diff --git a/skip.py b/skip.py new file mode 100644 index 0000000..6a1e00a --- /dev/null +++ b/skip.py @@ -0,0 +1,3 @@ +skip_comic=[ + "千絲萬縷的遊戲人生" +] \ No newline at end of file