add header for download
This commit is contained in:
		@@ -61,10 +61,21 @@ def download_images():
 | 
				
			|||||||
    success_count = 0
 | 
					    success_count = 0
 | 
				
			||||||
    url_mapping = {}  # 存储URL到本地路径的映射
 | 
					    url_mapping = {}  # 存储URL到本地路径的映射
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    headers = {
 | 
				
			||||||
 | 
					                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
 | 
				
			||||||
 | 
					                "Referer": "https://www.soujianzhu.cn/",
 | 
				
			||||||
 | 
					                "Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
 | 
				
			||||||
 | 
					                "Sec-Ch-Ua-Mobile": "?0",
 | 
				
			||||||
 | 
					                "Sec-Ch-Ua-Platform": "Windows"
 | 
				
			||||||
 | 
					                }    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for url in image_urls:
 | 
					    for url in image_urls:
 | 
				
			||||||
 | 
					        if not url.startswith(('http://', 'https://')):
 | 
				
			||||||
 | 
					            if 'www.soujianzhu.cn' not in url:
 | 
				
			||||||
 | 
					                url = 'https://www.soujianzhu.cn' + url
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            response = requests.get(url)
 | 
					            response = requests.get(url, headers=headers, timeout=30)
 | 
				
			||||||
            if response.status_code == 200:
 | 
					            if response.status_code == 200:
 | 
				
			||||||
                image_name = clean_filename(url)
 | 
					                image_name = clean_filename(url)
 | 
				
			||||||
                image_path = os.path.join(local_image_folder, image_name)
 | 
					                image_path = os.path.join(local_image_folder, image_name)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -5,14 +5,14 @@ from selenium.webdriver.common.by import By
 | 
				
			|||||||
from selenium.webdriver.support.ui import WebDriverWait
 | 
					from selenium.webdriver.support.ui import WebDriverWait
 | 
				
			||||||
from selenium.webdriver.support import expected_conditions as EC
 | 
					from selenium.webdriver.support import expected_conditions as EC
 | 
				
			||||||
from selenium.webdriver.common.action_chains import ActionChains
 | 
					from selenium.webdriver.common.action_chains import ActionChains
 | 
				
			||||||
import random
 | 
					#import random
 | 
				
			||||||
import time
 | 
					#import time
 | 
				
			||||||
from bs4 import BeautifulSoup
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
import html2text
 | 
					import html2text
 | 
				
			||||||
import requests
 | 
					import requests
 | 
				
			||||||
import base64
 | 
					#import base64
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# 自定义 HTML 转换器类,继承自 html2text.HTML2Text
 | 
					# 自定义 HTML 转换器类,继承自 html2text.HTML2Text
 | 
				
			||||||
class CustomHTML2Text(html2text.HTML2Text):
 | 
					class CustomHTML2Text(html2text.HTML2Text):
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user