add header for download
This commit is contained in:
		| @@ -61,10 +61,21 @@ def download_images(): | ||||
|     success_count = 0 | ||||
|     url_mapping = {}  # 存储URL到本地路径的映射 | ||||
|  | ||||
|     headers = { | ||||
|                 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", | ||||
|                 "Referer": "https://www.soujianzhu.cn/", | ||||
|                 "Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"', | ||||
|                 "Sec-Ch-Ua-Mobile": "?0", | ||||
|                 "Sec-Ch-Ua-Platform": "Windows" | ||||
|                 }     | ||||
|  | ||||
|     for url in image_urls: | ||||
|         if not url.startswith(('http://', 'https://')): | ||||
|             if 'www.soujianzhu.cn' not in url: | ||||
|                 url = 'https://www.soujianzhu.cn' + url | ||||
|  | ||||
|         try: | ||||
|             response = requests.get(url) | ||||
|             response = requests.get(url, headers=headers, timeout=30) | ||||
|             if response.status_code == 200: | ||||
|                 image_name = clean_filename(url) | ||||
|                 image_path = os.path.join(local_image_folder, image_name) | ||||
|   | ||||
| @@ -5,14 +5,14 @@ from selenium.webdriver.common.by import By | ||||
| from selenium.webdriver.support.ui import WebDriverWait | ||||
| from selenium.webdriver.support import expected_conditions as EC | ||||
| from selenium.webdriver.common.action_chains import ActionChains | ||||
| import random | ||||
| import time | ||||
| #import random | ||||
| #import time | ||||
| from bs4 import BeautifulSoup | ||||
| import os | ||||
| import re | ||||
| import html2text | ||||
| import requests | ||||
| import base64 | ||||
| #import base64 | ||||
|  | ||||
| # 自定义 HTML 转换器类,继承自 html2text.HTML2Text | ||||
| class CustomHTML2Text(html2text.HTML2Text): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user