add header for download

This commit is contained in:
glowzz 2025-01-26 17:23:32 +08:00
parent 3df14930e4
commit 5737b297ad
2 changed files with 15 additions and 4 deletions

View File

@ -61,10 +61,21 @@ def download_images():
success_count = 0
url_mapping = {} # 存储URL到本地路径的映射
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
"Referer": "https://www.soujianzhu.cn/",
"Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": "Windows"
}
for url in image_urls:
if not url.startswith(('http://', 'https://')):
if 'www.soujianzhu.cn' not in url:
url = 'https://www.soujianzhu.cn' + url
try:
response = requests.get(url)
response = requests.get(url, headers=headers, timeout=30)
if response.status_code == 200:
image_name = clean_filename(url)
image_path = os.path.join(local_image_folder, image_name)

View File

@ -5,14 +5,14 @@ from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import random
import time
#import random
#import time
from bs4 import BeautifulSoup
import os
import re
import html2text
import requests
import base64
#import base64
# 自定义 HTML 转换器类,继承自 html2text.HTML2Text
class CustomHTML2Text(html2text.HTML2Text):