处理lazy加载的图片,上传图片的markdowng更新为新文件
This commit is contained in:
		@@ -62,6 +62,7 @@ def download_images():
 | 
			
		||||
    url_mapping = {}  # 存储URL到本地路径的映射
 | 
			
		||||
 | 
			
		||||
    for url in image_urls:
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            response = requests.get(url)
 | 
			
		||||
            if response.status_code == 200:
 | 
			
		||||
@@ -130,16 +131,18 @@ def upload_images():
 | 
			
		||||
 | 
			
		||||
    for url in image_urls:
 | 
			
		||||
        try:
 | 
			
		||||
            # 下载图片
 | 
			
		||||
            if not url.startswith(('http://', 'https://')):
 | 
			
		||||
                if 'image.lqsjy.cn' not in url:            
 | 
			
		||||
            
 | 
			
		||||
 | 
			
		||||
                
 | 
			
		||||
                # 上传到图床
 | 
			
		||||
                new_url = upload_image_to_lsky_pro(url)
 | 
			
		||||
                if new_url:
 | 
			
		||||
                    url_mapping[url] = new_url
 | 
			
		||||
                    print(f"处理成功: {url} -> {new_url}")
 | 
			
		||||
                else:
 | 
			
		||||
                    print(f"上传失败: {url}")
 | 
			
		||||
                    # 上传到图床
 | 
			
		||||
                    new_url = upload_image_to_lsky_pro(url)
 | 
			
		||||
                    if new_url:
 | 
			
		||||
                        url_mapping[url] = new_url
 | 
			
		||||
                        print(f"处理成功: {url} -> {new_url}")
 | 
			
		||||
                    else:
 | 
			
		||||
                        print(f"上传失败: {url}")
 | 
			
		||||
            
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            print(f"处理出错 {url}: {str(e)}")
 | 
			
		||||
@@ -151,8 +154,12 @@ def upload_images():
 | 
			
		||||
        for old_url, new_url in url_mapping.items():
 | 
			
		||||
            new_content = new_content.replace(old_url, new_url)
 | 
			
		||||
        
 | 
			
		||||
        # 保存更新后的文件
 | 
			
		||||
        with open(markdown_file_path, 'w', encoding='utf-8') as file:
 | 
			
		||||
        # 构造新的文件名
 | 
			
		||||
        file_name, file_ext = os.path.splitext(markdown_file_path)
 | 
			
		||||
        new_file_path = f"{file_name}_lsky{file_ext}"
 | 
			
		||||
        
 | 
			
		||||
        # 保存为新文件
 | 
			
		||||
        with open(new_file_path, 'w', encoding='utf-8') as file:
 | 
			
		||||
            file.write(new_content)
 | 
			
		||||
        
 | 
			
		||||
        messagebox.showinfo("完成", f"成功处理 {len(url_mapping)} 张图片")
 | 
			
		||||
 
 | 
			
		||||
@@ -12,7 +12,7 @@ import os
 | 
			
		||||
import re
 | 
			
		||||
import html2text
 | 
			
		||||
import requests
 | 
			
		||||
#import base64
 | 
			
		||||
import base64
 | 
			
		||||
 | 
			
		||||
# 自定义 HTML 转换器类,继承自 html2text.HTML2Text
 | 
			
		||||
class CustomHTML2Text(html2text.HTML2Text):
 | 
			
		||||
@@ -79,40 +79,32 @@ def process_strikethrough(element):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def download_image(url, save_path):
 | 
			
		||||
        # 设置请求头
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    # 处理普通URL图片
 | 
			
		||||
    headers = {
 | 
			
		||||
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
 | 
			
		||||
        "Referer": "https://www.soujianzhu.cn/",
 | 
			
		||||
        "Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
 | 
			
		||||
        "Sec-Ch-Ua-Mobile": "?0",
 | 
			
		||||
        "Sec-Ch-Ua-Platform": "Windows"
 | 
			
		||||
    }
 | 
			
		||||
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
 | 
			
		||||
                "Referer": "https://www.soujianzhu.cn/",
 | 
			
		||||
                "Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
 | 
			
		||||
                "Sec-Ch-Ua-Mobile": "?0",
 | 
			
		||||
                "Sec-Ch-Ua-Platform": "Windows"
 | 
			
		||||
                }
 | 
			
		||||
    
 | 
			
		||||
    if not url.startswith(('http://', 'https://')):
 | 
			
		||||
        if 'www.soujianzhu.cn' not in url:
 | 
			
		||||
            url = 'https://www.soujianzhu.cn' + url
 | 
			
		||||
            
 | 
			
		||||
    try:
 | 
			
		||||
        # 发送HTTP请求下载图片
 | 
			
		||||
        response = requests.get(url, timeout=30)
 | 
			
		||||
        
 | 
			
		||||
        # 检查响应状态码
 | 
			
		||||
        if response.status_code != 200:
 | 
			
		||||
            print(f"下载失败,状态码: {response.status_code}")
 | 
			
		||||
            return False
 | 
			
		||||
            
 | 
			
		||||
        # 将图片内容写入文件
 | 
			
		||||
        with open(save_path, 'wb') as f:
 | 
			
		||||
            f.write(response.content)
 | 
			
		||||
            
 | 
			
		||||
        # 验证文件是否成功保存
 | 
			
		||||
        if os.path.exists(save_path) and os.path.getsize(save_path) > 0:
 | 
			
		||||
        response = requests.get(url, headers=headers, timeout=30)
 | 
			
		||||
        if response.status_code == 200:
 | 
			
		||||
            with open(save_path, 'wb') as f:
 | 
			
		||||
                f.write(response.content)
 | 
			
		||||
            return True
 | 
			
		||||
        else:
 | 
			
		||||
            print("文件保存失败")
 | 
			
		||||
            return False
 | 
			
		||||
            
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        print(f"下载图片时发生错误: {str(e)}")
 | 
			
		||||
        print(f"下载图片失败: {url}\n错误: {str(e)}")
 | 
			
		||||
        return False
 | 
			
		||||
    return False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -135,7 +127,12 @@ def save_lemma_content_as_markdown(driver, title_name):
 | 
			
		||||
        # 下载图片并更新图片地址
 | 
			
		||||
        downloaded_images = []
 | 
			
		||||
        for img in content_div.find_all('img'):
 | 
			
		||||
            img_url = img['src']
 | 
			
		||||
            img_url = img.get('data-original') or img.get('src')
 | 
			
		||||
            
 | 
			
		||||
            # 确保 img_url 不为空
 | 
			
		||||
            if not img_url:
 | 
			
		||||
                print(f"找不到图片URL: {img}")
 | 
			
		||||
                continue
 | 
			
		||||
            img_name = os.path.basename(img_url)
 | 
			
		||||
            img_path = os.path.join('images', img_name)
 | 
			
		||||
            try:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user