add header for download
This commit is contained in:
parent
3df14930e4
commit
5737b297ad
@ -61,10 +61,21 @@ def download_images():
|
||||
success_count = 0
|
||||
url_mapping = {} # 存储URL到本地路径的映射
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
|
||||
"Referer": "https://www.soujianzhu.cn/",
|
||||
"Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
|
||||
"Sec-Ch-Ua-Mobile": "?0",
|
||||
"Sec-Ch-Ua-Platform": "Windows"
|
||||
}
|
||||
|
||||
for url in image_urls:
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
if 'www.soujianzhu.cn' not in url:
|
||||
url = 'https://www.soujianzhu.cn' + url
|
||||
|
||||
try:
|
||||
response = requests.get(url)
|
||||
response = requests.get(url, headers=headers, timeout=30)
|
||||
if response.status_code == 200:
|
||||
image_name = clean_filename(url)
|
||||
image_path = os.path.join(local_image_folder, image_name)
|
||||
|
@ -5,14 +5,14 @@ from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
import random
|
||||
import time
|
||||
#import random
|
||||
#import time
|
||||
from bs4 import BeautifulSoup
|
||||
import os
|
||||
import re
|
||||
import html2text
|
||||
import requests
|
||||
import base64
|
||||
#import base64
|
||||
|
||||
# 自定义 HTML 转换器类,继承自 html2text.HTML2Text
|
||||
class CustomHTML2Text(html2text.HTML2Text):
|
||||
|
Loading…
Reference in New Issue
Block a user