This commit is contained in:
caiwx86 2024-07-15 05:07:01 +08:00
parent eb56830a5a
commit cf5bde7e08
2 changed files with 3 additions and 2 deletions

View File

@ -14,7 +14,7 @@ from scrapy.spiders import Spider
from scrapy.utils.python import to_bytes, to_unicode
from w3lib.http import headers_dict_to_raw, headers_raw_to_dict
from scrapy.extensions.httpcache import FilesystemCacheStorage
from Comics.settings import HTTPCACHE_ALLOW_PREFIXS
from Comics.settings import HTTPCACHE_ALLOW_PREFIXS, HTTPCACHE_PROXY_DOMAINS
# File cache settings end
# useful for handling different item types with a single interface
@ -24,7 +24,7 @@ class ProxyMiddleware(object):
def process_request(self, request, spider):
url = request.url
logging.debug(f"proxy url=== {url} {str(url).split('.')[-1]}")
if str(url).split('.')[-1] not in HTTPCACHE_ALLOW_PREFIXS:
if str(url).split('.')[-1] not in HTTPCACHE_ALLOW_PREFIXS or str(url).replace("https://", "").replace("http://", "").split("/") in HTTPCACHE_PROXY_DOMAINS:
if len(PROXY_LIST) != 0:
request.meta["proxy"] = random.choice(PROXY_LIST)
else:

View File

@ -116,6 +116,7 @@ HTTPCACHE_ENABLED = True
HTTPCACHE_EXPIRATION_SECS = 0
HTTPCACHE_DIR = os.path.join(BASE_OUTPUT,'httpcache')
HTTPCACHE_ALLOW_PREFIXS = [ 'jpg', 'png', 'gif', 'JPG', "PNG", "JPEG"]
HTTPCACHE_PROXY_DOMAINS = [ 'r5.rmcdn3.xyz' ]
HTTPCACHE_STORAGE = 'Comics.middlewares.MyFilesystemCacheStorage'
# Logging configuration