添加初始文件和功能，包括数据集和文档类的定义，以及添加文档块的用户界面

2025-07-04 18:26:36 +08:00
parent db6f3b865f
commit 587305f070
7 changed files with 133 additions and 0 deletions
--- a/1.txt
+++ b/1.txt
@@ -0,0 +1,2 @@
+ragflow api test!!
+hello world
--- a/add_chunk.py
+++ b/add_chunk.py
@@ -0,0 +1,20 @@
+# 0.提供1个对话框
+# 1.下拉选择rag_object.list_datasets()中的一个数据集
+# 2.根据选择的数据集，下拉选择rag_object.list_documents()中的一个文档
+# 3.按钮选择txt文件，将其内容添加到选中的文档中doc.add_chunk
+
+
+
+
+
+# from ragflow_sdk import RAGFlow
+
+# api_key = "ragflow-I5ZDNjMWNhNTdlMjExZjBiOTEwMzI0ZT"
+# base_url = "http://192.168.107.165:8099"
+
+# rag_object = RAGFlow(api_key=api_key, base_url=base_url)
+# datasets = rag_object.list_datasets()
+# for ds in datasets:
+#     print(ds.name, ds.id)
+#dataset = dataset[0]
+
--- a/chunk.py
+++ b/chunk.py
@@ -0,0 +1,36 @@
+from ragflow_sdk import RAGFlow
+
+api_key = "ragflow-I5ZDNjMWNhNTdlMjExZjBiOTEwMzI0ZT"
+base_url = "http://192.168.107.165:8099"
+
+rag_object = RAGFlow(api_key=api_key, base_url=base_url)
+#dataset = rag_object.create_dataset(name="kb_1")
+
+
+dataset = rag_object.list_datasets(name="kb_1")
+dataset = dataset[0]
+
+# filename1 = "ragflow.txt"
+# blob = open(filename1 , "rb").read()
+# dataset.upload_documents([{"display_name":filename1,"blob":blob}])
+for doc in dataset.list_documents( page=0, page_size=12):
+    print(doc)
+    print("=========================================")
+
+
+doc = dataset.list_documents(name= 'ragflow.txt')
+doc = doc[0]
+doc.update({"parser_config": {"chunk_token_count": 256}})
+chunk = doc.add_chunk(content="xxxxxxx")
+print(doc)
+#dataset.upload_documents([{"display_name": "1.txt", "blob": open('1.txt',"rb").read()}])
+
+# 查询所有知识库
+# for dataset in rag_object.list_datasets():
+#     print(dataset)
+
+
+# dataset = rag_object.list_datasets(name = "制度")
+#print(dataset)    
+
+
--- a/ragflow.txt
+++ b/ragflow.txt
@@ -0,0 +1,2 @@
+ragflow api test!!
+hello world
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+ragflow_sdk
+tkinter
+pandas
--- a/src/add_chunk.py
+++ b/src/add_chunk.py
@@ -0,0 +1,45 @@
+from tkinter import Tk, StringVar, Label, OptionMenu, Button, filedialog
+from ragflow_sdk import RAGFlow
+
+api_key = "ragflow-I5ZDNjMWNhNTdlMjExZjBiOTEwMzI0ZT"
+base_url = "http://192.168.107.165:8099"
+
+rag_object = RAGFlow(api_key=api_key, base_url=base_url)
+
+def add_chunk_to_document():
+    dataset_id = dataset_var.get()
+    document_id = document_var.get()
+    file_path = filedialog.askopenfilename(filetypes=[("Text files", "*.txt")])
+    
+    if file_path:
+        with open(file_path, 'r') as file:
+            content = file.read()
+            rag_object.add_chunk(dataset_id, document_id, content)
+
+def update_documents(*args):
+    dataset_name = dataset_var.get()
+    dataset = rag_object.list_datasets(name=dataset_name)
+    dataset = dataset[0]
+    documents = dataset.list_documents()
+    document_menu['menu'].delete(0, 'end')
+    for doc in documents:
+        document_menu['menu'].add_command(label=doc.name, command=lambda value=doc.name: document_var.set(value))
+
+root = Tk()
+root.title("Add Chunk to Document")
+
+dataset_var = StringVar(root)
+document_var = StringVar(root)
+
+datasets = rag_object.list_datasets()
+dataset_menu = OptionMenu(root, dataset_var, *[ds.name for ds in datasets], command=update_documents)
+dataset_menu.pack()
+
+
+document_menu = OptionMenu(root, document_var, "")
+document_menu.pack()
+
+add_chunk_button = Button(root, text="Add Chunk", command=add_chunk_to_document)
+add_chunk_button.pack()
+
+root.mainloop()
--- a/src/types/index.py
+++ b/src/types/index.py
@@ -0,0 +1,25 @@
+from typing import List, Dict, Any
+
+class Dataset:
+    def __init__(self, id: str, name: str):
+        self.id = id
+        self.name = name
+
+class Document:
+    def __init__(self, id: str, title: str, content: str):
+        self.id = id
+        self.title = title
+        self.content = content
+
+class RAGFlowResponse:
+    def __init__(self, status: str, data: Any):
+        self.status = status
+        self.data = data
+
+def list_datasets() -> List[Dataset]:
+    # This function should return a list of Dataset objects
+    pass
+
+def list_documents(dataset_id: str) -> List[Document]:
+    # This function should return a list of Document objects for the given dataset_id
+    pass