From bc2aac4eea0b2ede132caf27cbfb47d5131b8319 Mon Sep 17 00:00:00 2001 From: glowz <24627181@qq.com> Date: Sat, 5 Jul 2025 11:36:29 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=96=87=E6=A1=A3=E5=A4=84?= =?UTF-8?q?=E7=90=86=E9=80=BB=E8=BE=91=EF=BC=8C=E6=B7=BB=E5=8A=A0=E4=BB=8E?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E8=AF=BB=E5=8F=96=E6=96=87=E6=9C=AC=E5=9D=97?= =?UTF-8?q?=E5=B9=B6=E4=B8=8A=E4=BC=A0=E8=87=B3=E7=9F=A5=E8=AF=86=E5=BA=93?= =?UTF-8?q?=E7=9A=84=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- chunk.py | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/chunk.py b/chunk.py index fed0ce3..1ee2dfd 100644 --- a/chunk.py +++ b/chunk.py @@ -1,28 +1,48 @@ from ragflow_sdk import RAGFlow -api_key = "ragflow-I5ZDNjMWNhNTdlMjExZjBiOTEwMzI0ZT" -base_url = "http://192.168.107.165:8099" +#api_key = "ragflow-I5ZDNjMWNhNTdlMjExZjBiOTEwMzI0ZT" +#base_url = "http://192.168.107.165:8099" +base_url = "http://localhost" +api_key = "ragflow-MyMjM2ODE2NThlMTExZjBiMzJlNzY5Mj" rag_object = RAGFlow(api_key=api_key, base_url=base_url) #dataset = rag_object.create_dataset(name="kb_1") +datasets = rag_object.list_datasets() +#dataset = rag_object.list_datasets(name="kb_1") +dataset = rag_object.list_datasets(name="制度") -dataset = rag_object.list_datasets(name="kb_1") dataset = dataset[0] # filename1 = "ragflow.txt" # blob = open(filename1 , "rb").read() # dataset.upload_documents([{"display_name":filename1,"blob":blob}]) -for doc in dataset.list_documents( page=0, page_size=12): - print(doc) - print("=========================================") +# for doc in dataset.list_documents( page=0, page_size=12): +# print(doc) +# print("=========================================") -doc = dataset.list_documents(name= 'ragflow.txt') +doc = dataset.list_documents(name= '科技创新管理办法(试行).pdf') doc = doc[0] -doc.update({"parser_config": {"chunk_token_count": 256}}) -chunk = doc.add_chunk(content="xxxxxxx") -print(doc) +# doc.update({"parser_config": {"chunk_token_count": 256}}) +file_path ="G:\\11\\ragflow_api_test\\1.txt" +with open(file_path, 'r', encoding='utf-8') as file: + file_content = file.read() +for num,txt_chunk in enumerate(file_content.split('\n\n')): + print(f"处理文本块: {txt_chunk[:30]}...") # 打印前30个字符以示例 + + if txt_chunk.strip(): # 确保不是空行 + chunk = doc.add_chunk(content=txt_chunk) + print(f"第{num+1} Chunk添加成功! ID: {chunk.id}") + + + +# content = ''' +# 第二章 部门职责>第六条 【财务管理部】 +# (一) 配合投标相关费用的办理工作; +# (二) 负责提供投标所需的相关财务资料。''' +# chunk = doc.add_chunk(content=content) +# print(f"Chunk添加成功! ID: {chunk.id}") #dataset.upload_documents([{"display_name": "1.txt", "blob": open('1.txt',"rb").read()}]) # 查询所有知识库