添加.gitignore文件以忽略特定文件和目录;新增markdown_image2minio.py脚本以处理Markdown文件中的图片并上传到MinIO
This commit is contained in:
		
							
								
								
									
										14
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | ||||
| markdown_files/ | ||||
|  | ||||
| # 忽略所有 .log 文件 | ||||
| *.log | ||||
|  | ||||
| # 忽略特定文件 | ||||
| debug.ini | ||||
|  | ||||
| # 忽略目录 | ||||
| tmp/ | ||||
| build/ | ||||
|  | ||||
| # 不忽略重要的.log文件 | ||||
| !important.log | ||||
							
								
								
									
										130
									
								
								markdown_image2minio.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										130
									
								
								markdown_image2minio.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,130 @@ | ||||
| # 引用minio_api.py中的需要的函数 | ||||
| # 对话框选择一个markdown文件,将其中的图片上传到MinIO,其中图片的链接格式为, | ||||
| # MinIO的bucket_name = "markdown_image",object_name="{markdown的文件名}/{图片出现的顺序}.jpg",图片出现的顺序号如0001.jpg | ||||
|  | ||||
| # 更新markdonw文件中的图片链接为MinIO的链接, 链接格式为http://127.0.0.1:9000/markdown_image/{markdown的文件名}/{图片出现的顺序}.jpg | ||||
| # 将更新后的markdown文件保存/markdown_image_processed/{markdown的文件名}.md | ||||
| # 输出必要的处理信息 | ||||
| from minio import Minio | ||||
| import os | ||||
| import re | ||||
| from tkinter import Tk, filedialog | ||||
| from minio.error import S3Error | ||||
|  | ||||
| MINIO_BUCKET = "md-img" | ||||
| MINIO_URL = "http://127.0.0.1:9000" | ||||
| PROCESSED_DIR = "markdown_image_processed" | ||||
|  | ||||
| MINIO_HOST = os.getenv("MINIO_HOST", "127.0.0.1") | ||||
| MINIO_CONFIG = { | ||||
|     "endpoint": f"{MINIO_HOST}:{os.getenv('MINIO_PORT', '9000')}", | ||||
|     "access_key": os.getenv("MINIO_USER", "rag_flow"), | ||||
|     "secret_key": os.getenv("MINIO_PASSWORD", "infini_rag_flow"), | ||||
|     "secure": False | ||||
| } | ||||
|  | ||||
| def get_minio_client(): | ||||
|     """创建MinIO客户端""" | ||||
|     print("当前MinIO配置:", MINIO_CONFIG)  | ||||
|     return Minio( | ||||
|         endpoint=MINIO_CONFIG["endpoint"], | ||||
|         access_key=MINIO_CONFIG["access_key"], | ||||
|         secret_key=MINIO_CONFIG["secret_key"], | ||||
|         secure=MINIO_CONFIG["secure"] | ||||
|     ) | ||||
|  | ||||
| def upload_file_to_minio(client, bucket_name, object_name, file_path): | ||||
|     """上传文件到MinIO""" | ||||
|     try: | ||||
|         if not client.bucket_exists(bucket_name): | ||||
|             client.make_bucket(bucket_name) | ||||
|             print(f"Bucket '{bucket_name}' created") | ||||
|         client.fput_object(bucket_name, object_name, file_path) | ||||
|         print(f"文件 '{file_path}' 成功上传到存储桶 '{bucket_name}' 为 '{object_name}'") | ||||
|     except S3Error as exc: | ||||
|         print("MinIO错误:", exc) | ||||
|     except Exception as e: | ||||
|         print("发生错误:", e) | ||||
|  | ||||
| def select_markdown_file(): | ||||
|     """弹出对话框选择Markdown文件,返回文件路径""" | ||||
|     Tk().withdraw() | ||||
|     md_path = filedialog.askopenfilename( | ||||
|         title="选择Markdown文件", | ||||
|         filetypes=[("Markdown files", "*.md")] | ||||
|     ) | ||||
|     return md_path | ||||
|  | ||||
| def return_markdown_files(md_path): | ||||
|     """返回指定目录下的所有Markdown文件路径""" | ||||
|     if not os.path.isdir(md_path): | ||||
|         print(f"路径 {md_path} 不是一个有效的目录。") | ||||
|         return [] | ||||
|  | ||||
|     md_files = [os.path.join(md_path, f) for f in os.listdir(md_path) if f.endswith('.md')] | ||||
|     if not md_files: | ||||
|         print(f"在目录 {md_path} 中未找到Markdown文件。") | ||||
|     return md_files | ||||
|  | ||||
|  | ||||
|  | ||||
| def process_markdown_images(md_path, minio_client): | ||||
|     """处理Markdown文件中的图片并上传到MinIO,返回处理后的内容和新文件路径""" | ||||
|     if not md_path: | ||||
|         print("未选择文件,程序退出。") | ||||
|         return | ||||
|  | ||||
|     md_filename = os.path.basename(md_path) | ||||
|     md_dir = os.path.dirname(md_path) | ||||
|     md_name, _ = os.path.splitext(md_filename) | ||||
|     print(f"处理文件: {md_filename}") | ||||
|  | ||||
|     with open(md_path, "r", encoding="utf-8") as f: | ||||
|         content = f.read() | ||||
|  | ||||
|     img_pattern = r'!\[.*?\]\((.*?)\)' | ||||
|     img_paths = re.findall(img_pattern, content) | ||||
|     print(f"检测到{len(img_paths)}张图片。") | ||||
|  | ||||
|     new_content = content | ||||
|     for idx, img_path in enumerate(img_paths): | ||||
|         img_ext = os.path.splitext(img_path)[1] or ".jpg" | ||||
|         img_seq = f"{idx+1:04d}{img_ext}" | ||||
|         print(f"处理图片: {img_path} -> {img_seq}") | ||||
|         object_name = f"{md_name}/{img_seq}" | ||||
|  | ||||
|         # 绝对路径处理 | ||||
|         if not os.path.isabs(img_path): | ||||
|             img_abs_path = os.path.join(os.path.dirname(md_path), img_path) | ||||
|         else: | ||||
|             img_abs_path = img_path | ||||
|         print(f"图片绝对路径: {img_abs_path}") | ||||
|  | ||||
|         if not os.path.exists(img_abs_path): | ||||
|             print(f"图片未找到: {img_abs_path},跳过。") | ||||
|             continue | ||||
|  | ||||
|         upload_file_to_minio(minio_client, MINIO_BUCKET, object_name, img_abs_path) | ||||
|         minio_link = f"{MINIO_URL}/{MINIO_BUCKET}/{object_name}" | ||||
|         print(f"图片 {img_path} 已上传为 {object_name}") | ||||
|  | ||||
|         new_content = new_content.replace(f"]({img_path})", f"]({minio_link})", 1) | ||||
|  | ||||
|     os.makedirs(os.path.join(md_dir,PROCESSED_DIR), exist_ok=True) | ||||
|     processed_path = os.path.join(md_dir,PROCESSED_DIR, md_filename) | ||||
|     with open(processed_path, "w", encoding="utf-8") as f: | ||||
|         f.write(new_content) | ||||
|     print(f"处理后的Markdown已保存到: {processed_path}") | ||||
|     return processed_path | ||||
|  | ||||
| def main(): | ||||
|     #md_path = select_markdown_file() | ||||
|     md_path = "G:\\11\\ragflow_api_test\\markdown_files" | ||||
|     md_files = return_markdown_files(md_path) | ||||
|  | ||||
|     minio_client = get_minio_client() | ||||
|     for md_path in md_files: | ||||
|         process_markdown_images(md_path, minio_client) | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
		Reference in New Issue
	
	Block a user