diff --git a/dialogue_download_change.py b/dialogue_download_change.py index e3ab39e..2992802 100644 --- a/dialogue_download_change.py +++ b/dialogue_download_change.py @@ -62,6 +62,7 @@ def download_images(): url_mapping = {} # 存储URL到本地路径的映射 for url in image_urls: + try: response = requests.get(url) if response.status_code == 200: @@ -130,16 +131,18 @@ def upload_images(): for url in image_urls: try: - # 下载图片 + if not url.startswith(('http://', 'https://')): + if 'image.lqsjy.cn' not in url: + - # 上传到图床 - new_url = upload_image_to_lsky_pro(url) - if new_url: - url_mapping[url] = new_url - print(f"处理成功: {url} -> {new_url}") - else: - print(f"上传失败: {url}") + # 上传到图床 + new_url = upload_image_to_lsky_pro(url) + if new_url: + url_mapping[url] = new_url + print(f"处理成功: {url} -> {new_url}") + else: + print(f"上传失败: {url}") except Exception as e: print(f"处理出错 {url}: {str(e)}") @@ -151,8 +154,12 @@ def upload_images(): for old_url, new_url in url_mapping.items(): new_content = new_content.replace(old_url, new_url) - # 保存更新后的文件 - with open(markdown_file_path, 'w', encoding='utf-8') as file: + # 构造新的文件名 + file_name, file_ext = os.path.splitext(markdown_file_path) + new_file_path = f"{file_name}_lsky{file_ext}" + + # 保存为新文件 + with open(new_file_path, 'w', encoding='utf-8') as file: file.write(new_content) messagebox.showinfo("完成", f"成功处理 {len(url_mapping)} 张图片") diff --git a/download2markdown - one.py b/download2markdown - one.py index c510db9..ac4198e 100644 --- a/download2markdown - one.py +++ b/download2markdown - one.py @@ -12,7 +12,7 @@ import os import re import html2text import requests -#import base64 +import base64 # 自定义 HTML 转换器类,继承自 html2text.HTML2Text class CustomHTML2Text(html2text.HTML2Text): @@ -79,40 +79,32 @@ def process_strikethrough(element): def download_image(url, save_path): - # 设置请求头 + + + + # 处理普通URL图片 headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", - "Referer": "https://www.soujianzhu.cn/", - "Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"', - "Sec-Ch-Ua-Mobile": "?0", - "Sec-Ch-Ua-Platform": "Windows" - } + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", + "Referer": "https://www.soujianzhu.cn/", + "Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"', + "Sec-Ch-Ua-Mobile": "?0", + "Sec-Ch-Ua-Platform": "Windows" + } + if not url.startswith(('http://', 'https://')): if 'www.soujianzhu.cn' not in url: url = 'https://www.soujianzhu.cn' + url + try: - # 发送HTTP请求下载图片 - response = requests.get(url, timeout=30) - - # 检查响应状态码 - if response.status_code != 200: - print(f"下载失败,状态码: {response.status_code}") - return False - - # 将图片内容写入文件 - with open(save_path, 'wb') as f: - f.write(response.content) - - # 验证文件是否成功保存 - if os.path.exists(save_path) and os.path.getsize(save_path) > 0: + response = requests.get(url, headers=headers, timeout=30) + if response.status_code == 200: + with open(save_path, 'wb') as f: + f.write(response.content) return True - else: - print("文件保存失败") - return False - except Exception as e: - print(f"下载图片时发生错误: {str(e)}") + print(f"下载图片失败: {url}\n错误: {str(e)}") return False + return False @@ -135,7 +127,12 @@ def save_lemma_content_as_markdown(driver, title_name): # 下载图片并更新图片地址 downloaded_images = [] for img in content_div.find_all('img'): - img_url = img['src'] + img_url = img.get('data-original') or img.get('src') + + # 确保 img_url 不为空 + if not img_url: + print(f"找不到图片URL: {img}") + continue img_name = os.path.basename(img_url) img_path = os.path.join('images', img_name) try: