diff --git a/dialogue_download_change.py b/dialogue_download_change.py index 2992802..2ac5281 100644 --- a/dialogue_download_change.py +++ b/dialogue_download_change.py @@ -61,10 +61,21 @@ def download_images(): success_count = 0 url_mapping = {} # 存储URL到本地路径的映射 + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", + "Referer": "https://www.soujianzhu.cn/", + "Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"', + "Sec-Ch-Ua-Mobile": "?0", + "Sec-Ch-Ua-Platform": "Windows" + } + for url in image_urls: + if not url.startswith(('http://', 'https://')): + if 'www.soujianzhu.cn' not in url: + url = 'https://www.soujianzhu.cn' + url try: - response = requests.get(url) + response = requests.get(url, headers=headers, timeout=30) if response.status_code == 200: image_name = clean_filename(url) image_path = os.path.join(local_image_folder, image_name) diff --git a/download2markdown - one.py b/download2markdown - one.py index ac4198e..60156c5 100644 --- a/download2markdown - one.py +++ b/download2markdown - one.py @@ -5,14 +5,14 @@ from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.action_chains import ActionChains -import random -import time +#import random +#import time from bs4 import BeautifulSoup import os import re import html2text import requests -import base64 +#import base64 # 自定义 HTML 转换器类,继承自 html2text.HTML2Text class CustomHTML2Text(html2text.HTML2Text):