add header for download
This commit is contained in:
parent
3df14930e4
commit
5737b297ad
@ -61,10 +61,21 @@ def download_images():
|
|||||||
success_count = 0
|
success_count = 0
|
||||||
url_mapping = {} # 存储URL到本地路径的映射
|
url_mapping = {} # 存储URL到本地路径的映射
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
|
||||||
|
"Referer": "https://www.soujianzhu.cn/",
|
||||||
|
"Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
|
||||||
|
"Sec-Ch-Ua-Mobile": "?0",
|
||||||
|
"Sec-Ch-Ua-Platform": "Windows"
|
||||||
|
}
|
||||||
|
|
||||||
for url in image_urls:
|
for url in image_urls:
|
||||||
|
if not url.startswith(('http://', 'https://')):
|
||||||
|
if 'www.soujianzhu.cn' not in url:
|
||||||
|
url = 'https://www.soujianzhu.cn' + url
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = requests.get(url)
|
response = requests.get(url, headers=headers, timeout=30)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
image_name = clean_filename(url)
|
image_name = clean_filename(url)
|
||||||
image_path = os.path.join(local_image_folder, image_name)
|
image_path = os.path.join(local_image_folder, image_name)
|
||||||
|
@ -5,14 +5,14 @@ from selenium.webdriver.common.by import By
|
|||||||
from selenium.webdriver.support.ui import WebDriverWait
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
from selenium.webdriver.common.action_chains import ActionChains
|
from selenium.webdriver.common.action_chains import ActionChains
|
||||||
import random
|
#import random
|
||||||
import time
|
#import time
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import html2text
|
import html2text
|
||||||
import requests
|
import requests
|
||||||
import base64
|
#import base64
|
||||||
|
|
||||||
# 自定义 HTML 转换器类,继承自 html2text.HTML2Text
|
# 自定义 HTML 转换器类,继承自 html2text.HTML2Text
|
||||||
class CustomHTML2Text(html2text.HTML2Text):
|
class CustomHTML2Text(html2text.HTML2Text):
|
||||||
|
Loading…
Reference in New Issue
Block a user