ComicScrapy/Comics/spiders/rm_comic.py

import scrapy,json
from Comics.items import ComicItem
from Comics.spiders.utils.CommonUtils import CommonUtils

class RmComicSpider(scrapy.Spider):
    name = 'rm_comic'
    allowed_domains = ['rm01.xyz']
    main_url = 'https://rm01.xyz'
    #start_urls = ['https://rm01.xyz/books/63b65185-f798-4c8f-a0b0-8811615908fd/0']

    def start_requests(self):
        yield scrapy.Request(self.main_url + '/books/63b65185-f798-4c8f-a0b0-8811615908fd', callback=self.parse_comic)

    def parse_comic(self, response):
        comic = ComicItem()
        comic['name'] = response.xpath('//div[@class="col"]/h5/text()').extract_first()
        comic['icon'] = response.xpath('//img[@class="img-thumbnail"]/@src').extract_first()
        comic['author'] = response.xpath('//div[contains(@class,"bookid_bookInfo")]/p[1]/text()').extract()[1]
        comic['tags'] = response.xpath('//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()').extract_first()
        comic['dep'] = response.xpath('//div[contains(@class,"bookid_bookInfo")]/p[4]/text()').extract()[1]
        comic['date'] = response.xpath('//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()').extract()[1]
        comic['chapters'] = response.xpath('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()').extract()
        comic['chapter_href'] = response.xpath('//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/@href').extract()
        list_img = []
        for link in comic['chapter_href']:
            yield list_img.append(scrapy.Request(self.main_url+link,meta={'item' : comic} , callback=self.parse_chapter,errback=self.err))

    def err(self):
        print("Error=====")

    def parse_chapter(self, response):
        item = response.meta['item']
        data = response.xpath('//script[@id="__NEXT_DATA__"]/text()').extract_first()
        str_exec="props.pageProps."
        comic_name = CommonUtils.parseExec(data,str_exec+"bookName")
        chapterName = CommonUtils.parseExec(data,str_exec+"chapterName")
        description = CommonUtils.parseExec(data,str_exec+"description")
        images = CommonUtils.parseExec(data,str_exec+"images")
        chapter_api_url = CommonUtils.parseExec(data,str_exec+"chapterAPIPath")
        item['list_img'] = images
        yield item

    def parse(self, response):
        raise NotImplementedError