diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..29cb188 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +markdown_files/ + +# 忽略所有 .log 文件 +*.log + +# 忽略特定文件 +debug.ini + +# 忽略目录 +tmp/ +build/ + +# 不忽略重要的.log文件 +!important.log \ No newline at end of file diff --git a/markdown_image2minio.py b/markdown_image2minio.py new file mode 100644 index 0000000..9d3e5c4 --- /dev/null +++ b/markdown_image2minio.py @@ -0,0 +1,130 @@ +# 引用minio_api.py中的需要的函数 +# 对话框选择一个markdown文件,将其中的图片上传到MinIO,其中图片的链接格式为![](image.jpg), +# MinIO的bucket_name = "markdown_image",object_name="{markdown的文件名}/{图片出现的顺序}.jpg",图片出现的顺序号如0001.jpg + +# 更新markdonw文件中的图片链接为MinIO的链接, 链接格式为http://127.0.0.1:9000/markdown_image/{markdown的文件名}/{图片出现的顺序}.jpg +# 将更新后的markdown文件保存/markdown_image_processed/{markdown的文件名}.md +# 输出必要的处理信息 +from minio import Minio +import os +import re +from tkinter import Tk, filedialog +from minio.error import S3Error + +MINIO_BUCKET = "md-img" +MINIO_URL = "http://127.0.0.1:9000" +PROCESSED_DIR = "markdown_image_processed" + +MINIO_HOST = os.getenv("MINIO_HOST", "127.0.0.1") +MINIO_CONFIG = { + "endpoint": f"{MINIO_HOST}:{os.getenv('MINIO_PORT', '9000')}", + "access_key": os.getenv("MINIO_USER", "rag_flow"), + "secret_key": os.getenv("MINIO_PASSWORD", "infini_rag_flow"), + "secure": False +} + +def get_minio_client(): + """创建MinIO客户端""" + print("当前MinIO配置:", MINIO_CONFIG) + return Minio( + endpoint=MINIO_CONFIG["endpoint"], + access_key=MINIO_CONFIG["access_key"], + secret_key=MINIO_CONFIG["secret_key"], + secure=MINIO_CONFIG["secure"] + ) + +def upload_file_to_minio(client, bucket_name, object_name, file_path): + """上传文件到MinIO""" + try: + if not client.bucket_exists(bucket_name): + client.make_bucket(bucket_name) + print(f"Bucket '{bucket_name}' created") + client.fput_object(bucket_name, object_name, file_path) + print(f"文件 '{file_path}' 成功上传到存储桶 '{bucket_name}' 为 '{object_name}'") + except S3Error as exc: + print("MinIO错误:", exc) + except Exception as e: + print("发生错误:", e) + +def select_markdown_file(): + """弹出对话框选择Markdown文件,返回文件路径""" + Tk().withdraw() + md_path = filedialog.askopenfilename( + title="选择Markdown文件", + filetypes=[("Markdown files", "*.md")] + ) + return md_path + +def return_markdown_files(md_path): + """返回指定目录下的所有Markdown文件路径""" + if not os.path.isdir(md_path): + print(f"路径 {md_path} 不是一个有效的目录。") + return [] + + md_files = [os.path.join(md_path, f) for f in os.listdir(md_path) if f.endswith('.md')] + if not md_files: + print(f"在目录 {md_path} 中未找到Markdown文件。") + return md_files + + + +def process_markdown_images(md_path, minio_client): + """处理Markdown文件中的图片并上传到MinIO,返回处理后的内容和新文件路径""" + if not md_path: + print("未选择文件,程序退出。") + return + + md_filename = os.path.basename(md_path) + md_dir = os.path.dirname(md_path) + md_name, _ = os.path.splitext(md_filename) + print(f"处理文件: {md_filename}") + + with open(md_path, "r", encoding="utf-8") as f: + content = f.read() + + img_pattern = r'!\[.*?\]\((.*?)\)' + img_paths = re.findall(img_pattern, content) + print(f"检测到{len(img_paths)}张图片。") + + new_content = content + for idx, img_path in enumerate(img_paths): + img_ext = os.path.splitext(img_path)[1] or ".jpg" + img_seq = f"{idx+1:04d}{img_ext}" + print(f"处理图片: {img_path} -> {img_seq}") + object_name = f"{md_name}/{img_seq}" + + # 绝对路径处理 + if not os.path.isabs(img_path): + img_abs_path = os.path.join(os.path.dirname(md_path), img_path) + else: + img_abs_path = img_path + print(f"图片绝对路径: {img_abs_path}") + + if not os.path.exists(img_abs_path): + print(f"图片未找到: {img_abs_path},跳过。") + continue + + upload_file_to_minio(minio_client, MINIO_BUCKET, object_name, img_abs_path) + minio_link = f"{MINIO_URL}/{MINIO_BUCKET}/{object_name}" + print(f"图片 {img_path} 已上传为 {object_name}") + + new_content = new_content.replace(f"]({img_path})", f"]({minio_link})", 1) + + os.makedirs(os.path.join(md_dir,PROCESSED_DIR), exist_ok=True) + processed_path = os.path.join(md_dir,PROCESSED_DIR, md_filename) + with open(processed_path, "w", encoding="utf-8") as f: + f.write(new_content) + print(f"处理后的Markdown已保存到: {processed_path}") + return processed_path + +def main(): + #md_path = select_markdown_file() + md_path = "G:\\11\\ragflow_api_test\\markdown_files" + md_files = return_markdown_files(md_path) + + minio_client = get_minio_client() + for md_path in md_files: + process_markdown_images(md_path, minio_client) + +if __name__ == "__main__": + main() \ No newline at end of file