add header for download

This commit is contained in:
glowzz 2025-01-26 17:23:32 +08:00
parent 3df14930e4
commit 5737b297ad
2 changed files with 15 additions and 4 deletions

View File

@ -61,10 +61,21 @@ def download_images():
success_count = 0 success_count = 0
url_mapping = {} # 存储URL到本地路径的映射 url_mapping = {} # 存储URL到本地路径的映射
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
"Referer": "https://www.soujianzhu.cn/",
"Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": "Windows"
}
for url in image_urls: for url in image_urls:
if not url.startswith(('http://', 'https://')):
if 'www.soujianzhu.cn' not in url:
url = 'https://www.soujianzhu.cn' + url
try: try:
response = requests.get(url) response = requests.get(url, headers=headers, timeout=30)
if response.status_code == 200: if response.status_code == 200:
image_name = clean_filename(url) image_name = clean_filename(url)
image_path = os.path.join(local_image_folder, image_name) image_path = os.path.join(local_image_folder, image_name)

View File

@ -5,14 +5,14 @@ from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.action_chains import ActionChains
import random #import random
import time #import time
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import os import os
import re import re
import html2text import html2text
import requests import requests
import base64 #import base64
# 自定义 HTML 转换器类,继承自 html2text.HTML2Text # 自定义 HTML 转换器类,继承自 html2text.HTML2Text
class CustomHTML2Text(html2text.HTML2Text): class CustomHTML2Text(html2text.HTML2Text):