update

2023-06-20 02:52:51 +08:00 · 2023-06-20 02:52:51 +08:00 · ac30f59a33
commit ac30f59a33
parent af7812794f
20 changed files with 1167 additions and 1060 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,5 @@
 .scrapy/*
-images/*
-json/*
+.vscode/*
 CBZ/*
+output/*
 /**/__pycache__
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -1,3 +0,0 @@
-# Default ignored files
-/shelf/
-/workspace.xml
--- a/.idea/ComicScrapy.iml
+++ b/.idea/ComicScrapy.iml
@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="stable_vscode" jdkType="Python SDK" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-</module>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@ -1,6 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <settings>
-    <option name="USE_PROJECT_PROFILE" value="false" />
-    <version value="1.0" />
-  </settings>
-</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -1,4 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="stable_vscode" project-jdk-type="Python SDK" />
-</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/ComicScrapy.iml" filepath="$PROJECT_DIR$/.idea/ComicScrapy.iml" />
-    </modules>
-  </component>
-</project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="" vcs="Git" />
-  </component>
-</project>
--- a/Comics/exporters.py
+++ b/Comics/exporters.py
@ -3,6 +3,7 @@ import os.path,json,ast
 from Comics.settings import COMIC_INFO_FIELDS_TO_EXPORT
 from scrapy.exporters import XmlItemExporter
 from scrapy.exporters import PythonItemExporter
+from scrapy.exporters import JsonItemExporter 
 from Comics.items import ComicInfoItem
 from Comics.items import ComicItem
 from Comics.settings import COMIC_INFO_XML_STORE
@ -10,6 +11,15 @@ from Comics.utils.Constant import ComicPath
 from scrapy.utils.python import is_listlike, to_bytes, to_unicode
 from itemadapter import ItemAdapter

+class CommonExporter():
+    def getPath(self, file , sufix=None):
+        sufix = "."+sufix
+        dirname = os.path.dirname(file)
+        if not os.path.exists(dirname):
+            os.makedirs(dirname)
+        if sufix != None and sufix not in file:
+            file = file + sufix
+        return file

 class ItemExporter(PythonItemExporter):
    def convert(self, data):
@ -25,6 +35,21 @@ class ItemExporter(PythonItemExporter):
        self.finish_exporting()
        return obj_item

+class JsonExport(JsonItemExporter):
+    def __init__(self, file, **kwargs):
+        file = CommonExporter().getPath(file=file, sufix= "json")
+        self.file = open(file, "wb")
+        super(JsonExport, self).__init__(self.file, **kwargs)
+    
+    def export_json(self, json_object, if_return=False):
+        self.start_exporting()
+        self.export_item(json_object)
+        self.finish_exporting()
+        self.file.close()
+        if if_return:
+            return ItemExporter().export_obj(json_object)
+    
+
 class ComicInfoXmlItemExporter(XmlItemExporter):
    custom_root_element = "ComicInfo"
    def __init__(self, comic, chapter):
@ -66,8 +91,8 @@ class ComicInfoXmlItemExporter(XmlItemExporter):
        self._beautify_indent(depth=1)
        self._beautify_newline()
        for name, value in self._get_serialized_fields(comic_info, default_value=""):
-            if name is "Pages":
-               value = str(value).split(',')
+            if name == "Pages":
+               value = ast.literal_eval(value)
            if value is not None or value != "":
                self._export_xml_field(name, value, depth=2, child_element=child_element)
        #self._beautify_indent(depth=1)
--- a/Comics/items.py
+++ b/Comics/items.py
@ -2,8 +2,10 @@
 #
 # See documentation in:
 # https://docs.org/en/latest/topics/items.html
+import os,Comics.settings as settings,logging
 from scrapy.item import Item, Field
 from Comics.utils.Constant import ComicPath
+from Comics.utils.FileUtils import imageUtils
 from scrapy.loader.processors import TakeFirst, MapCompose, Join

 def serialize_to_chinese(value):
@ -13,9 +15,37 @@ def serialize_to_fix_file(value):
    file = ComicPath.chinese_convert(value)
    return ComicPath.fix_file_name(file)

-class ComicOItem(Item):
+def _serialize_to_images(value, result_type=None):
+    count = 1
+    images_item = []
+    image_urls = []
+    for image in value:
+        (image_src, scramble) = [image.get("src"), image.get("scramble")]
+        count_image = settings.IMAGES_NAME_FORMAT.format(count)
+        suffix = "."+str(image_src).split(".")[-1]
+        image_name = count_image + suffix
+        if scramble:
+            de_str = str(image_src).split("/")[-1].replace(suffix, "==")
+            blocks_num = imageUtils.encodeImage(de_str)
+            image_name = ComicPath.getFileScrambleImageName(count=count_image, block=blocks_num, suffix=suffix)
+        #images_item.append(ImagesItem(image_name=count_image + suffix, image_url=image_src, image_path=image_name))
+        images_item.append(image_name)
+        image_urls.append(image_src)
+        count += 1
+    logging.info(f"images_len: {len(images_item)}")
+    if result_type == "image_urls": return image_urls
+    else: return images_item
+
+def serialize_to_images(value): return _serialize_to_images(value) 
+
+
+def serialize_to_image_urls(value): return _serialize_to_images(value, result_type="image_urls") 
+
+
+class ListComicItem(Item):
    name = Field()
-    chapterItem = Field()
+    link = Field()
+    

 class ComicItem(Item):
    # 编号
@ -25,7 +55,7 @@ class ComicItem(Item):
    # 章节名
    chapter = Field(serializer=serialize_to_fix_file, output_processor=TakeFirst())
    # 图片链接
-    list_img = Field()
+    list_img = Field(serializer=serialize_to_images)
    # 作者
    author = Field(serialize_to_chinese=serialize_to_chinese, output_processor=TakeFirst())
    # 封面链接
@ -41,13 +71,18 @@ class ComicItem(Item):
    # 年龄分级
    age_rating = Field(output_processor=TakeFirst())

-    images = Field()
+    images_old = Field(serializer=serialize_to_images)
+    images = Field(serializer=serialize_to_images)
+    image_urls = Field(serializer=serialize_to_image_urls)
    images_name = Field()

-class ImageItem(Item):
+class ImagesItem(Item):
    image_name = Field()
    image_url = Field()
    image_path = Field()
+    images = Field()
+    image_urls = Field()
+    comic = Field()

 def serializer_info_writer(value):
    list_value = []
@ -56,6 +91,42 @@ def serializer_info_writer(value):
        list_value.append(v)
    return ",".join(list_value)

+# Result_type name
+def _serializer_info_imagesa(value, result_type=None):
+    info = []
+    for success, img in value:
+        img_path = os.path.join(settings.IMAGES_STORE, img['path'])
+        if result_type == 'name':
+            info.append(ComicPath().getFileScrambleImageSave(img_path,True,False))
+        else:
+            info.append(img_path)
+    if result_type == "len":
+        value = len(info)
+    else:
+        value = info 
+    return value
+
+def _serialize_info_images(value, result_type=None):
+    images = []
+    for image in value:
+        images.append(ComicPath().getFileScrambleImageSave(image,True,False))
+    if result_type == "count":
+        return len(images)
+    else:
+        return images
+    
+    
+def serializer_info_images(value): return _serialize_info_images(value)
+
+def serializer_info_images_count(value): return _serialize_info_images(value, "count")
+
+def serializer_info_images_completed(value):
+    return _serialize_info_images(value, result_type='name')
+
+def serializer_info_images_count(value):
+    return _serialize_info_images(value, result_type='len')
+    
+
 class ComicInfoItem(Item):
    Title = Field(info='chapter')#"章节名",True]
    Series = Field(info='name')# ","漫画名",True]
@ -70,9 +141,11 @@ class ComicInfoItem(Item):
    Genre = Field(info='genre')# ","流派",True]
    Tags = Field(info='tags')# ","标签",True]
    Web = Field()# ","主页",False]
-    PageCount = Field()# ","总页数",True]
+    #PageCount = Field()# ","总页数",True]
+    PageCount = Field(info='images',serializer=serializer_info_images_count)# ","总页数",True]
    LanguageISO = Field()#","语言",True]
    AgeRating = Field(info='age_rating')#","年龄分级",False]
-    Pages = Field(info='images_name')#","页码",True]
+    #Pages = Field(info='images_name', serializer=serializer_info_images_completed)#","页码",True]
+    Pages = Field(info='images', serializer=serializer_info_images)#","页码",True]
    # ComicInfo.xml and ComicChapter.json end

--- a/Comics/loader.py
+++ b/Comics/loader.py
@ -1,5 +1,6 @@
 import json
 from scrapy.loader import ItemLoader
+
 class ComicLoader(ItemLoader):
    def parseExec(cls,data,exec):
        if data !=None and exec != None:
@ -42,3 +43,14 @@ class ComicLoader(ItemLoader):

    def get_exec(self, value, str_exec):
        return self.parseExec(value, str_exec)
+   
+    def add_value(self, field_name, value, *processors, re=None, **kw):
+        if self.auto_replace_value(field_name, value):
+            return super().add_value(field_name, value, *processors, re=re, **kw)
+    
+     
+    def auto_replace_value(self, field_name, value):
+        if self.get_output_value(field_name) != None:
+            self._replace_value(field_name, value)
+            return False 
+        else: return True 
--- a/Comics/pipelines.py
+++ b/Comics/pipelines.py
@ -5,16 +5,17 @@


 # useful for handling different item types with a single interface
-import os, scrapy
+import os, scrapy,logging,time,random
 from Comics import settings
 from Comics.utils.FileUtils import imageUtils
 from Comics.utils.FileUtils import fileUtils
 from Comics.utils.Constant import ComicPath
 from Comics.items import ComicItem
-from Comics.items import ImageItem
+from Comics.items import ImagesItem
 from scrapy.pipelines.images import ImagesPipeline
 from Comics.exporters import ComicInfoXmlItemExporter
 from Comics.exporters import ItemExporter
+from Comics.exporters import JsonExport
 from Comics.utils.FileUtils import CBZUtils

 class ComicsPipeline:
@ -23,59 +24,40 @@ class ComicsPipeline:
    # item就是yield后面的对象
    def process_item(self, item, spider):
        if isinstance(item, ComicItem):
-            item = ComicItem(ItemExporter().export_obj(item))
-        file = os.path.join("json", item['name'], item['chapter'])
-        fileUtils.save_file(f"{file}.json", item)
-        return item
+            file = os.path.join(settings.OUTPUT_DIR,"json", item['name'], item['chapter'])
+            data = JsonExport(file=file).export_json(item, if_return=True)
+            #item['images'] = data['images']
+        return data 
        # image解析

    def close_spider(self,spider):
        pass

-class ImageParsePipeline:
-    def process_item(self, item, spider):
-        if isinstance(item, ComicItem):
-            count = 1
-            images_item = []
-            for image in item['list_img']:
-                (image_src, scramble) = [image.get("src"), image.get("scramble")]
-                count_image = "{:0>3d}".format(count)
-                suffix = "."+str(image_src).split(".")[-1]
-                image_name = count_image + suffix
-                if scramble:
-                    de_str = str(image_src).split("/")[-1].replace(suffix, "==")
-                    blocks_num = imageUtils.encodeImage(de_str)
-                    image_name = ComicPath.getFileScrambleImageName(count=count_image, block=blocks_num, suffix=suffix)
-                image_path = os.path.join(item['name'], item['chapter'], image_name)
-                images_item.append(ImageItem(image_name=count_image + suffix, image_url=image_src, image_path=image_path))
-                count += 1
-            item['images'] = images_item
-            return item

 class ImgDownloadPipeline(ImagesPipeline):
-    def file_path(self, request, response=None, info=None, *, item=None):
-        image = request.meta['item']
-        image_path = image['image_path']
-        en_image_path = os.path.join(os.path.dirname(image_path), image['image_name'])
-        if os.path.exists(os.path.join(settings.IMAGES_STORE, en_image_path)):
-            return en_image_path
-        else:
-            return image_path
+    def file_exits(self, image_path):
+        en_image_path = ComicPath().getFileScrambleImageSave(image_path, relative="fullpath") 
+        return os.path.exists(os.path.join(settings.IMAGES_STORE, en_image_path))
+    
+    def file_full_path(self, item, image): return os.path.join(item['name'], item['chapter'], image)
+    
+    def file_path(self, request, response=None, info=None, *, item=None): return request.meta['path']

    def get_media_requests(self, item, info):
-        for image in item['images']:
-            yield scrapy.Request(url=image['image_url'], meta={'item': image})
+        for image_url,image_path in zip(item['image_urls'],item['images']):
+            image_path = self.file_full_path(item, image_path)
+            if self.file_exits(image_path):
+                logging.info(f"file exists: {image_path}")
+            else:
+                logging.info(f"downloading {image_url} --> {image_path}")
+                yield scrapy.Request(url=image_url, meta={'path': image_path})

    def item_completed(self, results, item, info):
-        info_img = []
-        for success, img in results:
-            img_path = os.path.join(settings.IMAGES_STORE, img['path'])
-            # 解密图片
-            img_path = imageUtils.deScrambleImagesByPath(img_path)
-            info_img.append(os.path.basename(img_path).split('.')[0])
-        item['images_name'] = ",".join(info_img)
+        item['images_name'] = results
        # return item
        # ComicInfoXml 生成
-        ComicInfoXmlItemExporter(comic=item['name'], chapter=item['chapter']).export_xml(item)
+        comic_info = ComicInfoXmlItemExporter(comic=item['name'], chapter=item['chapter']).export_xml(item)
        # 打包
-        CBZUtils.packComicChapterCBZ(comic=item['name'], chapter=item['chapter'], remove=False)
+        CBZUtils.packComicChapterCBZ(comic=item['name'], chapter=item['chapter'],
+                                     comic_info_images= comic_info["Pages"], remove=False)
+        time.sleep(random.randint(5,10)) 
--- a/Comics/settings.py
+++ b/Comics/settings.py
@ -7,13 +7,14 @@
 #     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
 #     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 from fake_useragent import UserAgent
+import os

 BOT_NAME = 'Comics'

 SPIDER_MODULES = ['Comics.spiders']
 NEWSPIDER_MODULE = 'Comics.spiders'

-
+OUTPUT_DIR = "output"
 # Crawl responsibly by identifying yourself (and your website) on the user-agent
 #USER_AGENT = 'Comics (+http://www.yourdomain.com)'
 USER_AGENT = UserAgent().random
@ -22,22 +23,23 @@ ROBOTSTXT_OBEY = False

 HTTPERROR_ALLOWED_CODES = [ 200 , 403]
 # Configure maximum concurrent requests performed by Scrapy (default: 16)
-#CONCURRENT_REQUESTS = 32
+CONCURRENT_REQUESTS = 16 

 # Configure a delay for requests for the same website (default: 0)
 # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
 # See also autothrottle settings and docs
-IMAGES_STORE = 'images'
-COMIC_INFO_XML_STORE = 'images'
-DOWNLOAD_DELAY = 20
+IMAGES_STORE = os.path.join(OUTPUT_DIR, 'images')
+IMAGES_NAME_FORMAT = "{:0>3d}"
+COMIC_INFO_XML_STORE = IMAGES_STORE 
+DOWNLOAD_DELAY = 0 
 #重试
 RETRY_ENABLED = True
 RETRY_TIMES = 10 # 想重试几次就写几
 # 下面这行可要可不要
 RETRY_HTTP_CODES = [500, 502, 503, 504, 408, 401]
 # The download delay setting will honor only one of:
-#CONCURRENT_REQUESTS_PER_DOMAIN = 16
-#CONCURRENT_REQUESTS_PER_IP = 16
+CONCURRENT_REQUESTS_PER_DOMAIN = 16
+CONCURRENT_REQUESTS_PER_IP = 16
 PROXY_LIST = [
    "http://127.0.0.1:7890",
 ]
@ -79,8 +81,9 @@ DOWNLOADER_MIDDLEWARES = {
 # Configure item pipelines
 # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 ITEM_PIPELINES = {
+#    'scrapy.pipelines.images.ImagesPipeline' : 1,
    'Comics.pipelines.ComicsPipeline': 300,
-    'Comics.pipelines.ImageParsePipeline': 400,
+#    'Comics.pipelines.ImageParsePipeline': 400,
    'Comics.pipelines.ImgDownloadPipeline': 500,
 }

@ -102,10 +105,14 @@ AUTOTHROTTLE_DEBUG = False
 HTTPCACHE_ENABLED = True 
 HTTPCACHE_EXPIRATION_SECS = 0
 HTTPCACHE_DIR = 'httpcache'
-HTTPCACHE_IGNORE_HTTP_CODES = [500, 502, 404, 403, 401]
+HTTPCACHE_IGNORE_HTTP_CODES = [500, 502, 404]
 #HTTPCACHE_STORAGE = 'Comics.middlewares.MyFilesystemCacheStorage'
 HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'

+# Logging configuration
+LOG_LEVEL = "INFO" # 日志等级
+LOG_STDOUT = True # 标准化输出
+
 CBZ_EXPORT_PATH = "CBZ"
 #数据导出类 排序
 COMIC_INFO_XML_FILE = "ComicInfo.xml"
--- a/Comics/spiders/rm_comic.py
+++ b/Comics/spiders/rm_comic.py
@ -1,18 +1,28 @@
-import scrapy
+import scrapy,logging,time
 from Comics.items import ComicItem
 from Comics.loader import ComicLoader
-from itemadapter import ItemAdapter
-from Comics.items import ComicInfoItem
+from Comics.items import ListComicItem

 class RmComicSpider(scrapy.Spider):
    name = 'rm_comic'
    allowed_domains = ['rm01.xyz']
    main_url = 'https://rm01.xyz'
-    #start_urls = ['https://rm01.xyz/books/63b65185-f798-4c8f-a0b0-8811615908fd/0']
+    start_urls = 'https://rm01.xyz/books'
    
    def start_requests(self):
-        yield scrapy.Request('https://rm01.xyz'
-                             '/books/306ec1e2-f701-4fda-bb78-041ad6ec4020', callback=self.parse_comic)
+        yield scrapy.Request(self.start_urls, callback=self.books_comic)
+    
+    def books_comic(self, response):
+        books_comic = ComicLoader(item=ListComicItem(), response=response)
+        data = books_comic.get_xpath('//script[@id="__NEXT_DATA__"]/text()')[0]
+        str_exec = "props.pageProps.books"
+        books = books_comic.get_exec(data, str_exec=str_exec)
+        for book in  books:
+            books_comic.add_value('link', book['id'])
+            logging.info(f"downloading books %s" % book['name'])
+            time.sleep(3)
+            yield scrapy.Request(url=self.start_urls+"/"+book['id'], callback=self.parse_comic) 
+        
        
    # 获取某个漫画的相关数据
    # 获取到多个章节链接后进入下个流程
@ -43,7 +53,8 @@ class RmComicSpider(scrapy.Spider):
        #comic_item.add_exec('dep', data, str_exec=str_exec+"description")
        comic_item.add_value('index', response.meta['num'])
        comic_item.add_exec('chapter', data, str_exec=str_exec + "chapterName")
-        comic_item.add_exec('list_img', data, str_exec+"images")
+        comic_item.add_exec('image_urls', data, str_exec+"images")
+        comic_item.add_exec('images', data, str_exec+"images")
        comic = comic_item.load_item()
        chapter_api_url = comic_item.get_exec(data, str_exec+"chapterAPIPath")
        if chapter_api_url is not None:
@ -55,8 +66,10 @@ class RmComicSpider(scrapy.Spider):
    def parse_chapter_api(self, response):
        comic_item = ComicLoader(item=response.meta['item'], response=response)
        comic_item.add_exec('chapter', response.text, str_exec='chapter.name')
-        comic_item.add_exec('list_img', response.text, str_exec='chapter.images')
+        comic_item.add_exec('image_urls', response.text, str_exec='chapter.images')
+        comic_item.add_exec('images', response.text, str_exec='chapter.images')
        yield comic_item.load_item()

+
    def parse(self, response):
        raise NotImplementedError
--- a/Comics/utils/Constant.py
+++ b/Comics/utils/Constant.py
@ -12,7 +12,16 @@ class ComicPath:
    def getFileScrambleImageName(cls,count,block,suffix=".jpg"): return cls.PREFIX_SCRAMBLE+str(block)+"_"+str(count)+suffix
    
    @classmethod
-    def getFileScrambleImageSave(cls,file): return str(file).split("_")[-1]
+    def getFileScrambleImageSave(cls,file,relative=False, is_prefix=True): 
+        file_name = str(file).split("_")[-1]
+        if relative:
+           file_name = os.path.basename(file_name)
+        if relative == "fullpath":
+           file_name = os.path.join(os.path.dirname(file), file_name)
+        if not is_prefix:
+            return file_name.split(".")[0]
+        else:
+            return file_name
    
    #繁体中文转简体中文
    @classmethod
--- a/Comics/utils/FileUtils.py
+++ b/Comics/utils/FileUtils.py
@ -16,6 +16,12 @@ class fileUtils:
            fs.write(str(data))
            fs.close()
            
+    @classmethod
+    def path(cls, file):
+        base_dir = os.path.dirname(file)
+        if not os.path.exists(base_dir): os.makedirs(base_dir)
+        return file
+
 class CommonUtils:
    @classmethod
    def parseExec(cls,data,exec):
@ -29,11 +35,13 @@ class CommonUtils:
 class imageUtils:
   
    @classmethod
-    def deScrambleImagesByDir(cls,chapter_dir):
+    def descramble_images_by_dir(cls, chapter_dir):
+        if os.path.isfile(chapter_dir):
+            chapter_dir = os.path.dirname(chapter_dir)
        scramble_count = 0
        if os.path.exists(chapter_dir): #获取章节图片路径
-            dirs = os.listdir(chapter_dir)
-            for img in dirs:
+            while ComicPath.PREFIX_SCRAMBLE in os.listdir(chapter_dir):
+                for img in os.listdir(chapter_dir):
                    if img.startswith(ComicPath.PREFIX_SCRAMBLE):
                        imageUtils.encode_scramble_image(os.path.join(chapter_dir, img))
                        scramble_count += 1
@ -42,7 +50,8 @@ class imageUtils:

    @classmethod
    def deScrambleImagesByPath(cls, img_path, img_save=None):
-        if os.path.basename(img_path).startswith(ComicPath.PREFIX_SCRAMBLE):
+        if os.path.basename(img_path).\
+                startswith(ComicPath.PREFIX_SCRAMBLE) and os.path.exists(img_path):
           img_path =  imageUtils.encode_scramble_image(img_path, img_save)
        return img_path

@ -186,16 +195,18 @@ class imageUtils:
        if scramble_file_cache != None and os.path.exists(scramble_file_cache): os.remove(scramble_file_cache)

    @classmethod
-    def encode_scramble_image(cls,imgpath,img_save=None):
-        image = Image.open(imgpath)
+    def encode_scramble_image(cls, img_path, img_save=None):
+        if not os.path.exists(img_path):
+            return
+        image = Image.open(img_path)
        w, h = image.size
        #image.show()
-        file_str = str(imgpath).split("=")
+        file_str = str(img_path).split("=")
        #10_29.jpg
        base_fn = file_str[-1].split("_") 
        blocks = int(base_fn[0])
        if img_save == None:
-            save_path = os.path.join(os.path.dirname(imgpath),ComicPath.getFileScrambleImageSave(imgpath))
+            save_path = os.path.join(os.path.dirname(img_path),ComicPath.getFileScrambleImageSave(img_path))
        else: save_path = img_save
        # print(type(aid),type(img_name))
        if blocks:
@ -230,10 +241,10 @@ class imageUtils:

                newh += b_h
            newimage.save(save_path)
-            print("解密成功=",save_path)
-            if os.path.exists(imgpath):
-                os.remove(imgpath)
-                print("remove=",imgpath)
+            logging.info(f"解密成功 {save_path}")
+            if os.path.exists(img_path):
+                os.remove(img_path)
+                logging.debug(f"remove {img_path}")
            return save_path


@ -270,7 +281,7 @@ class CBZUtils:
                logging.info(f"打包完成：{target_file}")

    @classmethod
-    def packComicChapterCBZ(cls, comic, chapter, remove=True):
+    def packComicChapterCBZ(cls, comic, chapter, comic_info_images, remove=True):
        images_chapter_path = os.path.join(IMAGES_STORE, comic, chapter)
        cbz_chapter_path = os.path.join(CBZ_EXPORT_PATH, comic, chapter) + ".CBZ"
        if os.path.exists(images_chapter_path):
@ -278,13 +289,15 @@ class CBZUtils:
            for file in dirs:
                if file.startswith(ComicPath.PREFIX_SCRAMBLE):
                    try:
-                        os.remove(file)
+                        imageUtils.deScrambleImagesByPath(os.path.join(images_chapter_path,file)) 
                    except Exception as e:
                        print(f"删除 {file} 发生错误 {e}，已跳过")
                        return False
        cls.zip_compression(images_chapter_path, cbz_chapter_path)
        time.sleep(0.1)
        if remove: shutil.rmtree(images_chapter_path)
+        # validation
+        cls.cbz_validate(cbz_chapter_path, comic_info_images) 
        return True

    @classmethod
@ -338,3 +351,11 @@ class CBZUtils:
        except Exception as e:
            print(e)
        return result
+    
+    @classmethod
+    def cbz_validate(cls, zip_path, comic_info_images):
+        if len(cls.zip_info(zip_path)) == len(comic_info_images):
+            logging.info(f"validating successfully === {zip_path}")
+        else:
+            os.remove(zip_path)
+            logging.error(f"validating fail === {zip_path}")