diff --git a/chunk.py b/chunk.py index fed0ce3..1ee2dfd 100644 --- a/chunk.py +++ b/chunk.py @@ -1,28 +1,48 @@ from ragflow_sdk import RAGFlow -api_key = "ragflow-I5ZDNjMWNhNTdlMjExZjBiOTEwMzI0ZT" -base_url = "http://192.168.107.165:8099" +#api_key = "ragflow-I5ZDNjMWNhNTdlMjExZjBiOTEwMzI0ZT" +#base_url = "http://192.168.107.165:8099" +base_url = "http://localhost" +api_key = "ragflow-MyMjM2ODE2NThlMTExZjBiMzJlNzY5Mj" rag_object = RAGFlow(api_key=api_key, base_url=base_url) #dataset = rag_object.create_dataset(name="kb_1") +datasets = rag_object.list_datasets() +#dataset = rag_object.list_datasets(name="kb_1") +dataset = rag_object.list_datasets(name="制度") -dataset = rag_object.list_datasets(name="kb_1") dataset = dataset[0] # filename1 = "ragflow.txt" # blob = open(filename1 , "rb").read() # dataset.upload_documents([{"display_name":filename1,"blob":blob}]) -for doc in dataset.list_documents( page=0, page_size=12): - print(doc) - print("=========================================") +# for doc in dataset.list_documents( page=0, page_size=12): +# print(doc) +# print("=========================================") -doc = dataset.list_documents(name= 'ragflow.txt') +doc = dataset.list_documents(name= '科技创新管理办法(试行).pdf') doc = doc[0] -doc.update({"parser_config": {"chunk_token_count": 256}}) -chunk = doc.add_chunk(content="xxxxxxx") -print(doc) +# doc.update({"parser_config": {"chunk_token_count": 256}}) +file_path ="G:\\11\\ragflow_api_test\\1.txt" +with open(file_path, 'r', encoding='utf-8') as file: + file_content = file.read() +for num,txt_chunk in enumerate(file_content.split('\n\n')): + print(f"处理文本块: {txt_chunk[:30]}...") # 打印前30个字符以示例 + + if txt_chunk.strip(): # 确保不是空行 + chunk = doc.add_chunk(content=txt_chunk) + print(f"第{num+1} Chunk添加成功! ID: {chunk.id}") + + + +# content = ''' +# 第二章 部门职责>第六条 【财务管理部】 +# (一) 配合投标相关费用的办理工作; +# (二) 负责提供投标所需的相关财务资料。''' +# chunk = doc.add_chunk(content=content) +# print(f"Chunk添加成功! ID: {chunk.id}") #dataset.upload_documents([{"display_name": "1.txt", "blob": open('1.txt',"rb").read()}]) # 查询所有知识库