处理lazy加载的图片,上传图片的markdowng更新为新文件
This commit is contained in:
parent
4ffeb3ef50
commit
3df14930e4
@ -62,6 +62,7 @@ def download_images():
|
|||||||
url_mapping = {} # 存储URL到本地路径的映射
|
url_mapping = {} # 存储URL到本地路径的映射
|
||||||
|
|
||||||
for url in image_urls:
|
for url in image_urls:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
@ -130,16 +131,18 @@ def upload_images():
|
|||||||
|
|
||||||
for url in image_urls:
|
for url in image_urls:
|
||||||
try:
|
try:
|
||||||
# 下载图片
|
if not url.startswith(('http://', 'https://')):
|
||||||
|
if 'image.lqsjy.cn' not in url:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 上传到图床
|
# 上传到图床
|
||||||
new_url = upload_image_to_lsky_pro(url)
|
new_url = upload_image_to_lsky_pro(url)
|
||||||
if new_url:
|
if new_url:
|
||||||
url_mapping[url] = new_url
|
url_mapping[url] = new_url
|
||||||
print(f"处理成功: {url} -> {new_url}")
|
print(f"处理成功: {url} -> {new_url}")
|
||||||
else:
|
else:
|
||||||
print(f"上传失败: {url}")
|
print(f"上传失败: {url}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"处理出错 {url}: {str(e)}")
|
print(f"处理出错 {url}: {str(e)}")
|
||||||
@ -151,8 +154,12 @@ def upload_images():
|
|||||||
for old_url, new_url in url_mapping.items():
|
for old_url, new_url in url_mapping.items():
|
||||||
new_content = new_content.replace(old_url, new_url)
|
new_content = new_content.replace(old_url, new_url)
|
||||||
|
|
||||||
# 保存更新后的文件
|
# 构造新的文件名
|
||||||
with open(markdown_file_path, 'w', encoding='utf-8') as file:
|
file_name, file_ext = os.path.splitext(markdown_file_path)
|
||||||
|
new_file_path = f"{file_name}_lsky{file_ext}"
|
||||||
|
|
||||||
|
# 保存为新文件
|
||||||
|
with open(new_file_path, 'w', encoding='utf-8') as file:
|
||||||
file.write(new_content)
|
file.write(new_content)
|
||||||
|
|
||||||
messagebox.showinfo("完成", f"成功处理 {len(url_mapping)} 张图片")
|
messagebox.showinfo("完成", f"成功处理 {len(url_mapping)} 张图片")
|
||||||
|
@ -12,7 +12,7 @@ import os
|
|||||||
import re
|
import re
|
||||||
import html2text
|
import html2text
|
||||||
import requests
|
import requests
|
||||||
#import base64
|
import base64
|
||||||
|
|
||||||
# 自定义 HTML 转换器类,继承自 html2text.HTML2Text
|
# 自定义 HTML 转换器类,继承自 html2text.HTML2Text
|
||||||
class CustomHTML2Text(html2text.HTML2Text):
|
class CustomHTML2Text(html2text.HTML2Text):
|
||||||
@ -79,40 +79,32 @@ def process_strikethrough(element):
|
|||||||
|
|
||||||
|
|
||||||
def download_image(url, save_path):
|
def download_image(url, save_path):
|
||||||
# 设置请求头
|
|
||||||
|
|
||||||
|
|
||||||
|
# 处理普通URL图片
|
||||||
headers = {
|
headers = {
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
|
||||||
"Referer": "https://www.soujianzhu.cn/",
|
"Referer": "https://www.soujianzhu.cn/",
|
||||||
"Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
|
"Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
|
||||||
"Sec-Ch-Ua-Mobile": "?0",
|
"Sec-Ch-Ua-Mobile": "?0",
|
||||||
"Sec-Ch-Ua-Platform": "Windows"
|
"Sec-Ch-Ua-Platform": "Windows"
|
||||||
}
|
}
|
||||||
|
|
||||||
if not url.startswith(('http://', 'https://')):
|
if not url.startswith(('http://', 'https://')):
|
||||||
if 'www.soujianzhu.cn' not in url:
|
if 'www.soujianzhu.cn' not in url:
|
||||||
url = 'https://www.soujianzhu.cn' + url
|
url = 'https://www.soujianzhu.cn' + url
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 发送HTTP请求下载图片
|
response = requests.get(url, headers=headers, timeout=30)
|
||||||
response = requests.get(url, timeout=30)
|
if response.status_code == 200:
|
||||||
|
with open(save_path, 'wb') as f:
|
||||||
# 检查响应状态码
|
f.write(response.content)
|
||||||
if response.status_code != 200:
|
|
||||||
print(f"下载失败,状态码: {response.status_code}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# 将图片内容写入文件
|
|
||||||
with open(save_path, 'wb') as f:
|
|
||||||
f.write(response.content)
|
|
||||||
|
|
||||||
# 验证文件是否成功保存
|
|
||||||
if os.path.exists(save_path) and os.path.getsize(save_path) > 0:
|
|
||||||
return True
|
return True
|
||||||
else:
|
|
||||||
print("文件保存失败")
|
|
||||||
return False
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"下载图片时发生错误: {str(e)}")
|
print(f"下载图片失败: {url}\n错误: {str(e)}")
|
||||||
return False
|
return False
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -135,7 +127,12 @@ def save_lemma_content_as_markdown(driver, title_name):
|
|||||||
# 下载图片并更新图片地址
|
# 下载图片并更新图片地址
|
||||||
downloaded_images = []
|
downloaded_images = []
|
||||||
for img in content_div.find_all('img'):
|
for img in content_div.find_all('img'):
|
||||||
img_url = img['src']
|
img_url = img.get('data-original') or img.get('src')
|
||||||
|
|
||||||
|
# 确保 img_url 不为空
|
||||||
|
if not img_url:
|
||||||
|
print(f"找不到图片URL: {img}")
|
||||||
|
continue
|
||||||
img_name = os.path.basename(img_url)
|
img_name = os.path.basename(img_url)
|
||||||
img_path = os.path.join('images', img_name)
|
img_path = os.path.join('images', img_name)
|
||||||
try:
|
try:
|
||||||
|
Loading…
Reference in New Issue
Block a user