add header for download
This commit is contained in:
		| @@ -61,10 +61,21 @@ def download_images(): | |||||||
|     success_count = 0 |     success_count = 0 | ||||||
|     url_mapping = {}  # 存储URL到本地路径的映射 |     url_mapping = {}  # 存储URL到本地路径的映射 | ||||||
|  |  | ||||||
|  |     headers = { | ||||||
|  |                 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", | ||||||
|  |                 "Referer": "https://www.soujianzhu.cn/", | ||||||
|  |                 "Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"', | ||||||
|  |                 "Sec-Ch-Ua-Mobile": "?0", | ||||||
|  |                 "Sec-Ch-Ua-Platform": "Windows" | ||||||
|  |                 }     | ||||||
|  |  | ||||||
|     for url in image_urls: |     for url in image_urls: | ||||||
|  |         if not url.startswith(('http://', 'https://')): | ||||||
|  |             if 'www.soujianzhu.cn' not in url: | ||||||
|  |                 url = 'https://www.soujianzhu.cn' + url | ||||||
|  |  | ||||||
|         try: |         try: | ||||||
|             response = requests.get(url) |             response = requests.get(url, headers=headers, timeout=30) | ||||||
|             if response.status_code == 200: |             if response.status_code == 200: | ||||||
|                 image_name = clean_filename(url) |                 image_name = clean_filename(url) | ||||||
|                 image_path = os.path.join(local_image_folder, image_name) |                 image_path = os.path.join(local_image_folder, image_name) | ||||||
|   | |||||||
| @@ -5,14 +5,14 @@ from selenium.webdriver.common.by import By | |||||||
| from selenium.webdriver.support.ui import WebDriverWait | from selenium.webdriver.support.ui import WebDriverWait | ||||||
| from selenium.webdriver.support import expected_conditions as EC | from selenium.webdriver.support import expected_conditions as EC | ||||||
| from selenium.webdriver.common.action_chains import ActionChains | from selenium.webdriver.common.action_chains import ActionChains | ||||||
| import random | #import random | ||||||
| import time | #import time | ||||||
| from bs4 import BeautifulSoup | from bs4 import BeautifulSoup | ||||||
| import os | import os | ||||||
| import re | import re | ||||||
| import html2text | import html2text | ||||||
| import requests | import requests | ||||||
| import base64 | #import base64 | ||||||
|  |  | ||||||
| # 自定义 HTML 转换器类,继承自 html2text.HTML2Text | # 自定义 HTML 转换器类,继承自 html2text.HTML2Text | ||||||
| class CustomHTML2Text(html2text.HTML2Text): | class CustomHTML2Text(html2text.HTML2Text): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user