This commit is contained in:
caiwx86 2024-07-22 00:52:50 +08:00
parent 76a1d75038
commit e45f173bbb
4 changed files with 64 additions and 8 deletions

View File

@ -103,10 +103,12 @@ class ComicLoader(ItemLoader):
# 图像链接
def image_urls(self, value=None, xpath=None, index=None, sexec=None): self.set_properties('image_urls', value, xpath, index, sexec)
def get_output_value(self, field_name):
def get_output_value(self, field_name, skip_field=["chapter"]):
value = super().get_output_value(field_name)
try:
if isinstance(value, list) and len(value) == 1 : value = value[0]
if isinstance(value, list) and len(value) == 1:
if field_name not in skip_field: value = value[0]
else: value = "".join(value)
except:
print(f"get_output_value value={value} type={type(value)}")
return value

View File

@ -11,7 +11,7 @@ from Comics.items import ComicItem
from Comics.loader import ComicLoader
from Comics.utils import CBZUtils,fileUtils as fu
from Comics.utils import ComicPath
from Comics.utils import checkUtils
from Comics.utils import checkUtils,oldUtils
from Comics.exporters import JsonExport,ItemExporter
from scrapy.pipelines.images import ImagesPipeline
from Comics._utils.ComicInfo import ComicInfoXml
@ -23,9 +23,13 @@ class ComicsPipeline():
if isinstance(item, ComicItem):
# 'output/rm_comic/json/壞X/第1話 壞X'
# 已存在漫画CBZ文件 调用转换
if fu.exists(ComicPath(item).PATH_CBZ()): return ItemExporter().export_obj(item)
result_item = None
if fu.exists(ComicPath(item).PATH_CBZ()): result_item = ItemExporter().export_obj(item)
# 不存在漫画CBZ文件
else: return JsonExport(file=ComicPath(item).getDirJosnComicChapter()).export_json(ComicLoader(item).load_item(), if_return=True)
else: result_item = JsonExport(file=ComicPath(item).getDirJosnComicChapter()).export_json(ComicLoader(item).load_item(), if_return=True)
c_chapter = ComicPath.chinese_convert(result_item["chapters"])
oldUtils().clean_old_files(files=c_chapter, folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_CBZ_DIR), move_folder=ComicPath(item).file_path(result_type=ComicPath.MAPPING_OLD_CBZ_DIR))
return result_item
class BaseImagesPipeline(ImagesPipeline):

View File

@ -124,5 +124,6 @@ LOG_LEVEL = "INFO" # 日志等级
LOG_STDOUT = True # 标准化输出
CBZ_EXPORT_PATH = os.path.join(BASE_OUTPUT,"CBZ")
OLD_CBZ_EXPORT_PATH = os.path.join(BASE_OUTPUT,"Old_CBZ")
#数据导出类 排序
COMIC_INFO_XML_FILE = "ComicInfo.xml"

View File

@ -1,4 +1,4 @@
import base64,hashlib,os,shutil,os.path
import base64,hashlib,os,shutil,os.path,pathlib
import math,time,json,datetime,logging
import re,requests,time,xmlschema
from datetime import date
@ -629,6 +629,8 @@ class ComicPath:
MAPPING_ICON = "icon"
MAPPING_ICON_CACHE = "icon_cache"
MAPPING_CBZ = "cbz"
MAPPING_CBZ_DIR = "cbz_dir"
MAPPING_OLD_CBZ_DIR = "old_cbz_dir"
MAPPING_IMAGES_DIR = "images_dir"
MAPPING_COMIC_JSON = "comic_json"
@ -644,7 +646,9 @@ class ComicPath:
self.MAPPING_ICON_CACHE: os.path.join(self.project, "icons", ".cache", self.name+".jpg"),
self.MAPPING_CBZ: os.path.join(settings.CBZ_EXPORT_PATH, self.project, self.name, self.chapter+".CBZ"),
self.MAPPING_IMAGES_DIR: os.path.join(settings.IMAGES_STORE, self.project, "images", self.name, self.chapter),
self.MAPPING_COMIC_JSON: os.path.join(settings.IMAGES_STORE, self.project, "json", self.name, self.chapter+".json")
self.MAPPING_COMIC_JSON: os.path.join(settings.IMAGES_STORE, self.project, "json", self.name, self.chapter+".json"),
self.MAPPING_CBZ_DIR: os.path.join(settings.CBZ_EXPORT_PATH, self.project, self.name),
self.MAPPING_OLD_CBZ_DIR: os.path.join(settings.OLD_CBZ_EXPORT_PATH, self.project, self.name)
}
def PATH_CBZ(self, result_type=MAPPING_CBZ): return self.file_path(result_type=result_type)
@ -691,6 +695,8 @@ class ComicPath:
#处理成符合规定的文件名
@classmethod
def fix_file_name(cls, filename, replace=None):
if isinstance(filename, list):
for file in filename: cls.fix_file_name(file)
if not isinstance(filename, str):
return filename
in_tab = r'[?*/\|.:><]'
@ -855,4 +861,47 @@ class DBUtils:
@classmethod
def remove(cls,name,db_name=None):
db = cls.init_db(db_name)
db.remove(Query().name == name)
db.remove(Query().name == name)
class oldUtils:
def clean_old_files(self, files, folder, move_folder, suffix="CBZ"):
# 方法三使用pathlib模块的iterdir方法获取文件夹下的所有文件和文件夹
# 如果只需要文件名而不是文件的绝对路径可以使用name属性获取文件名
if os.path.exists(folder):
file_names = [f.name for f in pathlib.Path(folder).iterdir() if f.is_file()]
else:
return None
old_item = []
for file_name in file_names:
file_split = file_name.split(".")
file_suffix = file_split[-1]
file_prefix = file_split[0]
if file_suffix == suffix:
old_item.append(file_prefix)
new_item = ComicPath.fix_file_name(files)
only_in_new_item = [item for item in new_item if item not in old_item]
only_in_old_item = [item for item in old_item if item not in new_item]
in_new_item_and_old_item = [item for item in new_item if item in old_item]
print(f"只在new_item中: {only_in_new_item}")
print(f"只在old_item中: {only_in_old_item}")
print(f"在new_item和old_item中都有: {in_new_item_and_old_item}")
def move_file():
"""移动文件
"""
if not os.path.exists(move_folder): os.makedirs(move_folder)
for old_file in only_in_old_item:
try:
suffixs = [ suffix, "jpg" ]
for suf in suffixs:
new_move_file = os.path.join(folder, old_file)+"."+suf
old_move_file = os.path.join(move_folder, old_file)+"."+suf
if os.path.exists(new_move_file):
shutil.move(new_move_file, old_move_file)
print(f"move old_file={new_move_file} --> {old_move_file}")
except:
print(f"Error: move old_file={new_move_file} --> {old_move_file}")
move_file()