update

2024-02-20 21:08:13 +08:00 · 2024-02-20 21:08:13 +08:00 · c78fa7e47d
commit c78fa7e47d
parent 2a9820949b
14 changed files with 424 additions and 260 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,6 @@
 .scrapy/*
 .vscode/*
 .DS_Store
 CBZ/*
 output/*
 /**/__pycache__
--- a/Comics/init.py
+++ b/Comics/init.py
--- a/Comics/exporters.py
+++ b/Comics/exporters.py
@ -6,20 +6,10 @@ from scrapy.exporters import JsonItemExporter
 from Comics.items import ComicInfoItem
 from Comics.items import ComicItem
 from Comics.settings import COMIC_INFO_XML_STORE
-from Comics.utils.Constant import ComicPath
+from Comics.utils import ComicPath
 from scrapy.utils.python import is_listlike, to_bytes, to_unicode
 from itemadapter import ItemAdapter
 class ItemImport():
    def import_obj(self, file):
        if os.path.exists(file):
            with open(file, "r", encoding="utf-8") as fs:
                result = fs.read() 
                fs.close()
                return result
        else:
            return []
 class CommonExporter():
    def getPath(self, file , sufix=None):
        sufix = "."+sufix
--- a/Comics/items.py
+++ b/Comics/items.py
@ -4,9 +4,9 @@
 # https://docs.org/en/latest/topics/items.html
 import os,Comics.settings as settings,logging
 from scrapy.item import Item, Field
-from Comics.utils.Constant import ComicPath
+from Comics.utils import ComicPath
-from Comics.utils.FileUtils import imageUtils
+from Comics.utils import imageUtils
-from itemloaders.processors import TakeFirst, MapCompose, Join
+from itemloaders.processors import TakeFirst
 # 繁体中文转为简体中文
 def serialize_to_chinese(value): return ComicPath.chinese_convert(value)
@ -86,6 +86,11 @@ class ComicItem(Item):
    image_urls = Field(serializer=serialize_to_image_urls)
    # 图像名
    images_name = Field()
    #章节链接
    chapter_href = Field()
    #章节API
    chapter_api = Field()
 # 序列化-作者
 def serializer_info_writer(value):
--- a/Comics/loader.py
+++ b/Comics/loader.py
@ -8,7 +8,8 @@ class ComicLoader(ItemLoader):
            dots = str(exec).split(".")
            if not isinstance(data,dict): data = json.loads(data)
            for dot in dots:
-                data = data.get(dot)
+                if data != None: data = data.get(dot)
                logging.debug(f"data= {data} dot={dot}")
        return data
    def add_xpath(self, field_name, xpath, *processors, index=None, exec=None, re=None, is_null=None, **kw):
@ -60,8 +61,8 @@ class ComicLoader(ItemLoader):
    def auto_replace_value(self, field_name, value):
        if self.get_output_value(field_name) != None:
-            self._replace_value(field_name, value)
+                self._replace_value(field_name, value)
-            return False 
+                return False 
        else: return True
@ -101,7 +102,30 @@ class ComicLoader(ItemLoader):
    def images(self, value=None, xpath=None, index=None, sexec=None): self.set_properties('images', value, xpath, index, sexec)
    # 图像链接
    def image_urls(self, value=None, xpath=None, index=None, sexec=None): self.set_properties('image_urls', value, xpath, index, sexec)
-
+  
    def get_output_value(self, field_name):
        value = super().get_output_value(field_name)
        try:
            if isinstance(value, list) and len(value) == 1 : value = value[0]
        except: 
            print(f"get_output_value value={value} type={type(value)}")
        return value
    # 漫画名称 
    def get_name(self): return self.get_output_value("name")
    # 漫画章节
    def get_chapter(self): return self.get_output_value("chapter")
    # 工程名 
    def get_project_name(self): return self.get_output_value(PROJECT_KEY)
    # 章节链接
    def get_chapter_href(self): return self.get_output_value("chapter_href")
    # 全部章节
    def get_chapters(self): return self.get_output_value("chapters")
    def get_chapter_api(self): return self.get_output_value("chapter_api")
    def get_image_urls(self): return self.get_output_value("image_urls")
 class ComicEntity:
    ENTITY = None
--- a/Comics/pipelines.py
+++ b/Comics/pipelines.py
@ -9,11 +9,11 @@ import os,scrapy,logging
 from Comics import settings
 from Comics.items import ComicItem
 from Comics.settings import OUTPUT_DIR
-from Comics.loader import ComicEntity
+from Comics.loader import ComicEntity,ComicLoader
 from Comics.exporters import ComicInfoXmlItemExporter
-from Comics.utils.FileUtils import CBZUtils,fileUtils as fu
+from Comics.utils import CBZUtils,fileUtils as fu
-from Comics.utils.Constant import ComicPath
+from Comics.utils import ComicPath
-from Comics.utils.ComicUtils import checkUtils
+from Comics.utils import checkUtils
 from Comics.exporters import JsonExport,ItemExporter
 from scrapy.pipelines.images import ImagesPipeline
@ -24,12 +24,14 @@ class ComicsPipeline():
    # item就是yield后面的对象
    def process_item(self, item, spider):
        if isinstance(item, ComicItem):
            # item = ComicEntity(item).item()
                    # 'output/rm_comic/json/壞X/第1話 壞X'
-            if fu.exists(ComicPath.path_cbz(item=item)):
+            # 已存在漫画CBZ文件 调用转换
-                return ItemExporter().export_obj(item)
+            if fu.exists(ComicPath.path_cbz(item=item)): return ItemExporter().export_obj(item)
            else:
-                file = os.path.join(OUTPUT_DIR, spider.name, "json", item['name'], item['chapter'])
+            # 不存在漫画CBZ文件
-                return JsonExport(file=file).export_json(ComicEntity(item).item(), if_return=True)
+                #file = os.path.join(OUTPUT_DIR, spider.name, "json", item['name'], item['chapter'])
                return JsonExport(file=ComicPath.getDirJosnComicChapter(item)).export_json(ComicEntity(item).item(), if_return=True)
        # image解析
    def close_spider(self, spider):
@ -102,6 +104,11 @@ class ImgDownloadPipeline(ImagesPipeline):
        # return item
        # 打包
        cbz_path = self.get_file_path(item, result_type="cbz")
        success_data = []
        for result in results:
            if result[0]: success_data.append(result[1])
        image_urls = ComicLoader(item=item).get_image_urls()
        if len(success_data) != len(image_urls): return
        if fu.exists(cbz_path):
            self.update_icon(item)
            self.pack_icon(item)
--- a/Comics/settings.py
+++ b/Comics/settings.py
@ -26,7 +26,7 @@ ROBOTSTXT_OBEY = False
 HTTPERROR_ALLOWED_CODES = [ 200 , 403]
 # Configure maximum concurrent requests performed by Scrapy (default: 16)
-CONCURRENT_REQUESTS = 16
+CONCURRENT_REQUESTS = 8 
 # Configure a delay for requests for the same website (default: 0)
 # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
@ -45,7 +45,7 @@ RETRY_HTTP_CODES = [408, 401]
 CONCURRENT_REQUESTS_PER_DOMAIN = 16
 CONCURRENT_REQUESTS_PER_IP = 16
 PROXY_LIST = [
-    "http://127.0.0.1:7890",
+#    "http://127.0.0.1:7890",
 #    "http://10.0.10.117:8123",
 ]
 # Disable cookies (enabled by default)
--- a/Comics/spiders/rm_comic.py
+++ b/Comics/spiders/rm_comic.py
@ -1,15 +1,17 @@
 import scrapy,logging,time,os,skip
 from Comics.items import ComicItem
 from Comics.loader import ComicLoader
-from Comics.utils.Constant import ComicPath
+from Comics.utils import ComicPath
-from Comics.utils.ComicUtils import checkUtils
+from Comics.utils import checkUtils
 from Comics.utils import Conf
 class RmComicSpider(scrapy.Spider):
    name = 'rm_comic'
-    allowed_domains = ['roum1.xyz']
+    allowed_domains = ['roum12.xyz']
    main_url = 'https://'+allowed_domains[0]
    start_urls = main_url+'/books'
    # 遍历网站页数 
    def start_requests(self):
        for x in range(0,60):
            yield scrapy.Request(self.start_urls+"?&page="+str(x), callback=self.books_comic)
@ -17,66 +19,45 @@ class RmComicSpider(scrapy.Spider):
    # 获取多个漫画信息
    def books_comic(self, response):
        comics = ComicLoader(item=ComicItem(), response=response)
-        data = comics.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0]
+        for book in comics.get_exec(comics.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0], str_exec="props.pageProps.books"):
        for book in comics.get_exec(data, str_exec="props.pageProps.books"):
            comics.add_value('link', self.start_urls+"/"+book['id'])
            if book['name'] not in skip.skip_comic:
                yield scrapy.Request(url=self.start_urls+"/"+book['id'], callback=self.parse_comic)
    # 获取某个漫画的相关数据
    # 获取到多个章节链接后进入下个流程
    def parse_comic(self, response):
-        comic_item = ComicLoader(item=ComicItem(), response=response)
+        comic_item = Conf().comic(self.name, ComicLoader(ComicItem(), response))
-        comic_item.project_name(self.name)
+        for chapter, link in zip(comic_item.get_chapters(), comic_item.get_chapter_href()):
        comic_item.name(xpath='//div[@class="col"]/h5/text()')
        comic_item.icon(xpath='//img[@class="img-thumbnail"]/@src')
        comic_item.author(xpath='//div[contains(@class,"bookid_bookInfo")]/p[1]/text()', index=1)
        comic_item.tags(xpath='//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()')
        comic_item.dep(xpath='//div[contains(@class,"bookid_bookInfo")]/p[4]/text()', index=1)
        comic_item.date(xpath='//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()', index=1)
        comic_item.genre(value="韩漫")
        comic_item.age_rating(value="R18+")
        chapter_href = comic_item.get_xpath('//div[contains(@class,"bookid_chapterBox")]'
                                               '//div[contains(@class,"bookid_chapter")]/a/@href')
        chapters = comic_item.get_xpath('//div[contains(@class,"bookid_chapterBox")]'
                                           '//div[contains(@class,"bookid_chapter")]/a/text()')
        for chapter, link in zip(chapters, chapter_href):
            comic_item.chapters(value=chapters)
            comic_item.chapter(value=chapter)
            item = comic_item.load_item()
-            cbz_path = ComicPath.get_file_path(item=item, result_type="cbz", convert=True)
+            cbz_path = ComicPath.get_file_path(item=item, result_type="cbz", convert=True, chapter=chapter)
-            if not checkUtils().is_error(item):
+            if not checkUtils().is_error(item) and os.path.exists(cbz_path):
-                if os.path.exists(cbz_path):
+                logging.info(f"漫画 {cbz_path} 已存在, 跳过中...")
-                    logging.info(f"漫画 {cbz_path} 已存在, 跳过中...")
+                yield item
-                    yield item
+            else:
-                else:
+                # 开始访问章节链接并跳转到self.parse_chapter
-                    yield scrapy.Request(self.main_url+link, meta={'item': item}, callback=self.parse_chapter)
+                yield scrapy.Request(self.main_url+link, meta={'item': item}, callback=self.parse_chapter)
    # 读取某章节下的所有图片
    def parse_chapter(self, response):
        comic_item = ComicLoader(item=response.meta['item'], response=response)
        data = comic_item.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0]
-        str_exec = "props.pageProps."
+        item: ComicLoader = Conf().parse_chapter(item=comic_item, value=data)
-        comic_item.name(value=data, sexec=str_exec+"bookName")
+        comic = item.load_item()
-        comic_item.dep(value=data, sexec=str_exec+"description")
+        chapter_api_url = item.get_chapter_api() 
-        comic_item.chapter(value=data, sexec=str_exec+"chapterName")
+        if chapter_api_url is not None and len(chapter_api_url) != 0 :
-        comic_item.image_urls(value=data, sexec=str_exec+"images")
+            try:
-        comic_item.images(value=data, sexec=str_exec+"images")
+                yield scrapy.Request(self.main_url + chapter_api_url, meta={'item': comic}, callback=self.parse_chapter_api)
-        comic = comic_item.load_item()
+            except: 
-        chapter_api_url = comic_item.get_exec(data, str_exec+"chapterAPIPath")
+                logging.warning(f"yield scrapy.Request({self.main_url} + {chapter_api_url}, meta={comic}, callback=self.parse_chapter_api)")
        if chapter_api_url is not None:
            yield scrapy.Request(self.main_url + chapter_api_url, meta={'item': comic}, callback=self.parse_chapter_api)
        else:
            yield comic 
    # 加密数据API处理
    def parse_chapter_api(self, response):
        comic_item = ComicLoader(item=response.meta['item'], response=response)
-        comic_item.chapter(value=response.text, sexec='chapter.name')
+        item: ComicLoader = Conf().parse_chapter(item=comic_item, value=response.text)
-        comic_item.image_urls(value=response.text, sexec='chapter.images')
+        yield item.load_item()
        comic_item.images(value=response.text, sexec='chapter.images')
        yield comic_item.load_item()
    def parse(self, response):
--- a/Comics/spiders/rm_comic.yml
+++ b/Comics/spiders/rm_comic.yml
@ -0,0 +1,41 @@
 data:
  name: '//div[@class="col"]/h5/text()'
  icon: '//img[@class="img-thumbnail"]/@src'
  author: 
    xpath: '//div[contains(@class,"bookid_bookInfo")]/p[1]/text()'
    index: 1
  tags: '//div[contains(@class,"bookid_bookInfo")]/p[3]/b/text()'
  dep: 
    xpath: '//div[contains(@class,"bookid_bookInfo")]/p[4]/text()'
    index: 1
  date: 
    xpath: '//div[contains(@class,"bookid_bookInfo")]/p[5]/small/text()'
    index: 1
  genre: 
    value: "韩漫"
  age_rating: 
    value: "R18+"
  chapter_href: '//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/@href'
  chapters: '//div[contains(@class,"bookid_chapterBox")]//div[contains(@class,"bookid_chapter")]/a/text()'
 parse_chapter:
  name:
    sexec: props.pageProps.bookName
  dep: 
    sexec: props.pageProps.description
  chapter:
    sexec: props.pageProps.chapterName
  image_urls:
    sexec: props.pageProps.images
  images:
    sexec: props.pageProps.images
  chapter_api:
    sexec: props.pageProps.chapterAPIPath
 parse_chapter_api:
  chapter:
    sexec: chapter.name
  image_urls: 
    sexec: chapter.images
  images:
    sexec: chapter.images 
--- a/Comics/spiders/yh_comic.py
+++ b/Comics/spiders/yh_comic.py
@ -1,7 +1,7 @@
 import scrapy,logging,time,os
 from Comics.items import ComicItem
 from Comics.loader import ComicLoader
-from Comics.utils.Constant import ComicPath
+from Comics.utils import ComicPath
 from Comics.settings import PROJECT_KEY
 import skip
--- a/Comics/utils/FileUtils.py
+++ b/Comics/utils/FileUtils.py
@ -1,74 +1,166 @@
-import base64,hashlib,os,shutil
+import base64,hashlib,os,shutil,os.path
 import math,time,json,datetime,logging
 import re,requests,time,xmlschema
 from datetime import date
 from Comics import settings
 from opencc import OpenCC
 from PIL import Image
 from Comics.utils.Constant import ComicPath
 from pathlib import Path
 from zipfile import ZipFile
-from Comics.settings import COMIC_INFO_XML_FILE,CBZ_EXPORT_PATH,IMAGES_STORE
+from Comics.settings import COMIC_INFO_XML_FILE,OUTPUT_DIR,PROJECT_KEY
-from Comics.utils.Constant import ntfy
+import yaml
 from Comics.loader import ComicLoader
 # 配置类
 class Conf():
    # 读取yml文件配置 
    # @project 根据工程名读取配置 project.yml
    # @key 读取key内的字典的数据(默认为空)
    #def init(self, project, key=None):
    #    data = None
    #    if project == None: project = "config"
    #    with open(os.path.join("Comics","spiders", project)+".yml") as f:
    #        data = yaml.load(f, Loader=yaml.FullLoader)
    #    if key != None and data != None:
    #        return data[key]
    def get_config_value(self, project, key=None):
        # 使用Path类来处理文件路径
        config_path = Path(os.path.join("Comics","spiders", project)+".yml")
        #Path("Comics") / "spiders" / project / (project + ".yml")
        # 检查项目是否存在
        if not config_path.is_file():
            return None
        # 打开文件并加载配置数据
        try:
            with config_path.open('r') as f:
                data = yaml.safe_load(f)
        except yaml.YAMLError as e:
            print(f"Error loading YAML file: {e}")
            return None
        # 检查key是否存在
        if key is not None and key in data:
            return data[key]
        else:
            return None
    # 根据读取的配置数据导入到ComicLoader中 
    def comic(self, project, item: ComicLoader, child_data='data', val=None):
        item.project_name(project)
        data = self.get_config_value(project, child_data)
        for key, xpath_data in data.items():
            if isinstance(xpath_data, str): xpath_data = {'xpath': xpath_data}
            xpath = xpath_data.get('xpath', None)
            index = xpath_data.get('index', None)
            value = xpath_data.get('value', None) if val is None else val
            sexec = xpath_data.get('sexec', None)
            item.set_properties(name=key, value=value, xpath=xpath, index=index, sexec=sexec)
        return item
    def parse_chapter(self,item: ComicLoader, value):
        return self.comic(item.get_project_name(), item, "parse_chapter", value)
 # 文件操作类 
 class fileUtils:
    # 文件是否存在
    @classmethod
    def exists(cls, path): return os.path.exists(path)
-       
+      
    # 文件路径拼接 
    @classmethod
    def join(cls, path, *paths): return os.path.join(path, *paths);
-    
+   
    # 文件夹名
    @classmethod
    def dirname(cls, path): return os.path.dirname(path);
    # 文件名 
    @classmethod
    def basename(cls, path): return os.path.basename(path);
-     
+    
    # 保存文件 
    @classmethod
    def save_file(cls,path,data):
        root_dir = os.path.dirname(path)
-        if not os.path.exists(root_dir):
+        if not os.path.exists(root_dir): os.makedirs(root_dir)
            os.makedirs(root_dir)
        with open(path,'w',encoding='utf-8') as fs:
            fs.write(str(data))
-            fs.close()
+    
-            
+    # 返回校验后的文件路径 
    @classmethod
    def path(cls, file):
        base_dir = os.path.dirname(file)
        if not os.path.exists(base_dir): os.makedirs(base_dir)
        return file
-        
+    
    # 比较文件大小
    @classmethod
    def compare_size(cls, dst, file):
-        if os.path.exists(dst) and os.path.exists(file):
+        if cls.exists(dst) and cls.exists(file):
            return os.stat(dst).st_size == os.stat(file).st_size
        else:
-            return 0
+            return None 
-
+    
    #   读取文件
    @classmethod
    def read(cls, file):
        if os.path.exists(file):
            with open(file, "r", encoding="utf-8") as fs: return fs.read() 
        else:
            return []
    """
      图像编号 image-1.jpg
      如：存在image.png 返回 image-1.png 反之 image.png
    """
    @classmethod
-    def file_check(cls, file, result="file"):
+    def file_check(cls, file, result="file", count=0):
-        temp_file_name = file
+        temp_file_name, files_size, files_name = [file, {}, []]
-        count = 1
+        # 默认文件名不存在
-        files_size = []
+        if not cls.exists(temp_file_name) and temp_file_name == file: count = 1 
-        name, suffix = temp_file_name.split(".")
+        while count or count == 0:
-        while count:
+            temp_file_name = ComicPath().images_icon(file=file, count=count)
-            if os.path.exists(temp_file_name):
+            if cls.exists(temp_file_name):
-                files_size.append(os.stat(temp_file_name).st_size)
+                # 保存存在的文件名
-                temp_file_name = name+"-"+str(count)+"."+suffix
+                files_name.append(temp_file_name) 
                file_size = os.path.getsize(temp_file_name)
                # 保存文件名和大小数据
                files_size[file_size] = {"name": temp_file_name, "size": file_size}
                # 格式化文件名
                # temp_file_name = ComicPath().images_icon(file=file, count=count)
                count += 1
            else:
                # 检测是否有重复数据
                # 提取重复并需删除的文件名
                diff_names = {value["name"] for value in files_size.values()}
                # 不存在则返回原文件名
                if len(diff_names) == 0: return file
                for file_name in files_name:
                   if file_name not in diff_names: 
                       logging.info(f"删除文件：{file_name}")
                       os.remove(file_name)
                # 判断是否存在初始文件和多个文件名
                if file in diff_names: 
                    move_file = ComicPath().images_icon(file=file, count=count)
                    logging.info(f"移动文件{file}到 {move_file}")
                    shutil.move(file, move_file)
                    cls.file_check(file=file,result=result,count=0)
                # 去重后文件名数与存在的文件名数不存在则证明文件存在重复，重新运行本方法
                if len(set(diff_names)) != len(set(files_name)): cls.file_check(file, result=result,count=0)
                if result == "size":
-                    return files_size
+                   return {value["size"] for value in files_size.values()}
                else:
-                    return temp_file_name
+                   return temp_file_name
-
+                
    # 判断文件是否更新
    @classmethod
    def file_update(cls, old_file, new_file):
        is_update = False
-        if os.path.exists(old_file):
+        if os.path.exists(old_file): is_update =  os.path.getsize(old_file) not in cls.file_check(new_file, result="size")
            is_update =  os.stat(old_file).st_size not in cls.file_check(new_file, result="size")
        return is_update
    # 判断是否需要更新封面
@ -81,7 +173,7 @@ class fileUtils:
            logging.info(f"update icon ... {image_path} ===> {cls.file_check(save_path)}")
            shutil.copyfile(image_path, cls.file_check(save_path))
-
+# 公共工具类
 class CommonUtils:
    @classmethod
    def parseExec(cls,data,exec):
@ -92,6 +184,28 @@ class CommonUtils:
                data = data.get(dot)
        return data
    @classmethod
    def _validate_xml(cls,xml_file, xsd_file):
        # 读取XSD文件
        xsd = xmlschema.XMLSchema(xsd_file)
        # 验证XML
        is_valid = xsd.is_valid(xml_file)
        if is_valid:
            print("XML文件通过XSD验证成功！")
        else:
            print("XML文件未通过XSD验证。以下是验证错误信息：")
            validation_errors = xsd.to_errors(xml_file)
            for error in validation_errors:
                print(error)
    @classmethod
    def validate_comicinfo_xml(cls, xml_file):
        cls._validate_xml(xml_file, "ComicInfo.xsd")
 # 图片处理类
 class imageUtils:
    @classmethod
@ -307,7 +421,7 @@ class imageUtils:
                logging.debug(f"remove {img_path}")
            return save_path
-
+# 压缩工具类
 class CBZUtils:
    @classmethod
@ -418,4 +532,159 @@ class CBZUtils:
        else:
            os.remove(zip_path)
            logging.error(f"validating fail === {zip_path}")
-            return False
+            return False   
 # 检测工具类       
 class checkUtils:
    def read(self, item):
        file = os.path.join(OUTPUT_DIR, ComicLoader(item=item).get_project_name(), "error_comics.json")
        return fileUtils.read(file)
    # 
    # 检测某一章节是否连续错误
    def export_error(self, item):
        if not self.is_error(item):
            file = os.path.join(OUTPUT_DIR, ComicLoader(item=item).get_project_name(), "error_comics.json")
            try:
                error_comic = eval(self.read(item))
            except:
                error_comic = []
            error_comic.append({ "name" : ComicPath.new_file_name(item['name']),
                                 "chapter" : ComicPath.new_file_name(item['chapter']),
                                 "date" : ComicPath().getYearMonthDay()})
            fileUtils.save_file(file, json.dumps(error_comic))
    def is_error(self, item):
        try:
            for error_c in eval(self.read(item)):
                (name, chatper, date) = [error_c['name'], error_c['chapter'], error_c['date']]
                if ComicPath.new_file_name(item['name']) == ComicPath.new_file_name(name) and ComicPath.new_file_name(item['chapter']) == ComicPath.new_file_name(chatper):
                    return True
                else:
                    return False
        except:
            return False
 # Comic路径类        
 class ComicPath:
    PREFIX_SCRAMBLE = "scramble="
    @classmethod
    def getYearMonthDay(cls):
        today = date.today()
        # 格式化为年-月-日
        return today.strftime("%Y%m%d")
    @classmethod
    def getDirComicChapter(cls, item, categorize=""): 
        comic = ComicLoader(item=item)
        return os.path.join(OUTPUT_DIR, comic.get_project_name(), categorize, comic.get_name(), comic.get_chapter())
    @classmethod
    def getDirJosnComicChapter(cls, item): 
        return cls.getDirComicChapter(item=item, categorize="json")
    @classmethod
    def getFileScrambleImageName(cls,count,block,suffix=".jpg"): return cls.PREFIX_SCRAMBLE+str(block)+"_"+str(count)+suffix
    @classmethod
    def getFileScrambleImageSave(cls,file,relative=False, is_prefix=True): 
        file_name = str(file).split("_")[-1]
        if relative:
           file_name = os.path.basename(file_name)
        if relative == "fullpath":
           file_name = os.path.join(os.path.dirname(file), file_name)
        if not is_prefix:
            return file_name.split(".")[0]
        else:
            return file_name
    #繁体中文转简体中文
    @classmethod
    def chinese_convert(cls, text,convert='t2s'): return OpenCC(convert).convert(str(text))
    #处理成符合规定的文件名
    @classmethod
    def fix_file_name(cls, filename, replace=None):
        if not isinstance(filename, str):
            return filename
        in_tab = r'[?*/\|.:><]'
        str_replace = ""
        if replace is not None:
            str_replace = replace
        filename = re.sub(in_tab, str_replace, filename)
        count = 1
        while True:
            str_file = filename[0-count]
            if str_file == " ":
                count += 1
            else:
                filename = filename[0:len(filename)+1-count]
                break
        return filename
    @classmethod
    def new_file_name(cls, name): return cls.fix_file_name(cls.chinese_convert(name))
    @classmethod
    def get_file_path(cls, item, result_type="image", file=None, convert=False, chapter=None):
        PROJECT = ComicLoader(item=item).get_project_name()
        if not convert:
            name = item['name']
            if chapter == None: chapter = item['chapter']
        else:
            name = cls.fix_file_name(cls.chinese_convert(item['name']))
            if chapter == None: chapter = cls.fix_file_name(cls.chinese_convert(item['chapter']))
        if result_type == "image":
            if os.path.sep not in file:
                file = os.path.join(PROJECT, "images", name, chapter, file)
        elif result_type == "comic_info":
            file = os.path.join(PROJECT, "images", name, chapter)
        elif result_type == "cbz_icon":
            file = os.path.join(settings.CBZ_EXPORT_PATH, PROJECT, name, chapter+".jpg")
        elif result_type == "down_icon":
            file = os.path.join(settings.IMAGES_STORE, cls.get_file_path(item=item, result_type="icon"))
        elif result_type == "down_cache_icon":
            file = os.path.join(settings.IMAGES_STORE, cls.get_file_path(item=item, result_type="icon_cache"))
        elif result_type == "icon":    
            file = os.path.join(PROJECT, "icons", name, name+".jpg")
        elif result_type == "icon_cache":
            file = os.path.join(PROJECT, "icons", ".cache", name+".jpg")
        elif result_type == "cbz":
            file = os.path.join(settings.CBZ_EXPORT_PATH, PROJECT, name, chapter+".CBZ")
        elif result_type == "images_dir":
            file = os.path.join(settings.IMAGES_STORE, PROJECT, "images", name, chapter)
        else:
            raise ValueError(f"Unsupported result_type: {result_type}")
        return file
    @classmethod
    def path_cbz(cls, item):
        return cls.get_file_path(item, result_type="cbz", convert=True)
    @classmethod
    def images_icon(cls, file, count):
        if count == 0: return file
        name, suffix = os.path.splitext(file)
        return name+"-"+str(count)+suffix
 # 通知类
 class ntfy:
    @classmethod
    def sendMsg(cls, msg,alert=False,sleep=None,error=None):
        try:
            print(f"#ntfy: {msg}")
            if alert:
                requests.post("https://ntfy.caiwenxiu.cn/PyComic", 
                data=msg.encode(encoding='utf-8'))
        except:
            print(f"#ntfy error: {msg}")
        if sleep != None:
            logging.info(f'等待{sleep}秒后进入下一阶段')
            time.sleep(int(sleep))
        if error != None:
            print(f"#ntfy Error: {error}")
            return False
        else:
            return True
--- a/Comics/utils/ComicUtils.py
+++ b/Comics/utils/ComicUtils.py
@ -1,40 +0,0 @@
 import os,json
 from Comics.settings import CBZ_EXPORT_PATH,OUTPUT_DIR,PROJECT_KEY
 from Comics.utils.Constant import ComicPath
 from Comics.exporters import ComicInfoXmlItemExporter,JsonExport,ItemExporter, ItemImport
 from Comics.utils.FileUtils import fileUtils as fu 
 from Comics.loader import ComicEntity
 class checkUtils:
    def read(self, item):
        file = os.path.join(OUTPUT_DIR, item[PROJECT_KEY][0], "error_comics.json")
        return ItemImport().import_obj(file)
    # 
    # 检测某一章节是否连续错误
    def export_error(self, item):
        if not self.is_error(item):
            file = os.path.join(OUTPUT_DIR, item[PROJECT_KEY][0], "error_comics.json")
            try:
                error_comic = eval(self.read(item))
            except:
                error_comic = []
            error_comic.append({ "name" : ComicPath.new_file_name(item['name']),
                                 "chapter" : ComicPath.new_file_name(item['chapter']),
                                 "date" : ComicPath().getYearMonthDay()})
            fu.save_file(file, json.dumps(error_comic))
    def is_error(self, item):
        try:
            for error_c in eval(self.read(item)):
                (name, chatper, date) = [error_c['name'], error_c['chapter'], error_c['date']]
                if ComicPath.new_file_name(item['name']) == ComicPath.new_file_name(name) and ComicPath.new_file_name(item['chapter']) == ComicPath.new_file_name(chatper):
                    return True
                else:
                    return False
        except:
            return False
--- a/Comics/utils/Constant.py
+++ b/Comics/utils/Constant.py
@ -1,114 +0,0 @@
 import os.path,logging
 import re,requests,time
 from datetime import date
 from Comics import settings
 from opencc import OpenCC
 class ComicPath:
    PREFIX_SCRAMBLE = "scramble="
    @classmethod
    def getYearMonthDay(cls):
        today = date.today()
        # 格式化为年-月-日
        return today.strftime("%Y%m%d")
    @classmethod
    def getDirComicChapter(cls):
        return None
    @classmethod
    def getFileScrambleImageName(cls,count,block,suffix=".jpg"): return cls.PREFIX_SCRAMBLE+str(block)+"_"+str(count)+suffix
    @classmethod
    def getFileScrambleImageSave(cls,file,relative=False, is_prefix=True): 
        file_name = str(file).split("_")[-1]
        if relative:
           file_name = os.path.basename(file_name)
        if relative == "fullpath":
           file_name = os.path.join(os.path.dirname(file), file_name)
        if not is_prefix:
            return file_name.split(".")[0]
        else:
            return file_name
    #繁体中文转简体中文
    @classmethod
    def chinese_convert(cls, text,convert='t2s'): return OpenCC(convert).convert(str(text))
    #处理成符合规定的文件名
    @classmethod
    def fix_file_name(cls, filename, replace=None):
        if not isinstance(filename, str):
            return filename
        in_tab = r'[?*/\|.:><]'
        str_replace = ""
        if replace is not None:
            str_replace = replace
        filename = re.sub(in_tab, str_replace, filename)
        count = 1
        while True:
            str_file = filename[0-count]
            if str_file == " ":
                count += 1
            else:
                filename = filename[0:len(filename)+1-count]
                break
        return filename
    @classmethod
    def new_file_name(cls, name): return cls.fix_file_name(cls.chinese_convert(name))
    @classmethod
    def get_file_path(cls, item, result_type="image", file=None, convert=False):
        PROJECT = item[settings.PROJECT_KEY][0]
        if not convert:
            name = item['name']
            chapter = item['chapter']
        else:
            name = cls.fix_file_name(cls.chinese_convert(item['name']))
            chapter = cls.fix_file_name(cls.chinese_convert(item['chapter']))
        if result_type == "image":
            if os.path.sep not in file:
                file = os.path.join(PROJECT, "images", name, chapter, file)
        elif result_type == "comic_info":
            file = os.path.join(PROJECT, "images", name, chapter)
        elif result_type == "cbz_icon":
            file = os.path.join(settings.CBZ_EXPORT_PATH, PROJECT, name, chapter+".jpg")
        elif result_type == "down_icon":
            file = os.path.join(settings.IMAGES_STORE, cls.get_file_path(item=item, result_type="icon"))
        elif result_type == "down_cache_icon":
            file = os.path.join(settings.IMAGES_STORE, cls.get_file_path(item=item, result_type="icon_cache"))
        elif result_type == "icon":    
            file = os.path.join(PROJECT, "icons", name, name+".jpg")
        elif result_type == "icon_cache":
            file = os.path.join(PROJECT, "icons", ".cache", name+".jpg")
        elif result_type == "cbz":
            file = os.path.join(settings.CBZ_EXPORT_PATH, PROJECT, name, chapter+".CBZ")
        elif result_type == "images_dir":
            file = os.path.join(settings.IMAGES_STORE, PROJECT, "images", name, chapter)
        return file
    @classmethod
    def path_cbz(cls, item):
        return cls.get_file_path(item, result_type="cbz", convert=True)
 class ntfy:
    @classmethod
    def sendMsg(cls, msg,alert=False,sleep=None,error=None):
        try:
            print(f"#ntfy: {msg}")
            if alert:
                requests.post("https://ntfy.caiwenxiu.cn/PyComic", 
                data=msg.encode(encoding='utf-8'))
        except:
            print(f"#ntfy error: {msg}")
        if sleep != None:
            logging.info(f'等待{sleep}秒后进入下一阶段')
            time.sleep(int(sleep))
        if error != None:
            print(f"#ntfy Error: {error}")
            return False
        else:
            return True
--- a/run.py
+++ b/run.py
@ -2,4 +2,4 @@
 from scrapy import cmdline
-cmdline.execute("scrapy crawl rm_comic".split())
+cmdline.execute("scrapy crawl rm_comic".split())
`@ -2,4 +2,4 @@`

	`from scrapy import cmdline`	`from scrapy import cmdline`

	`cmdline.execute("scrapy crawl rm_comic".split())`	`cmdline.execute("scrapy crawl rm_comic".split())`