更新主函数,添加PDF和TXT文件处理说明,注释掉文件选择和处理逻辑
This commit is contained in:
		| @@ -157,19 +157,27 @@ def process_pdf_txt_pairs(pdf_dict, txt_dict, dataset): | ||||
|             process_txt_chunks(document, txt_path) | ||||
|  | ||||
| def main(): | ||||
|     file_path = "g:\\11\\22\\规范\\" | ||||
|     pdf_dict, txt_dict = pair_pdf_and_txt(file_path, file_path) | ||||
|  | ||||
|     """主函数,处理PDF和TXT文件对 | ||||
|      | ||||
|     if not pdf_dict: | ||||
|         print("未选择任何文件。") | ||||
|         return | ||||
|     dataset.id = bucket_name | ||||
|     chunk_id = object_name | ||||
|     """ | ||||
|     file_path = "g:\\11\\22\\规范\\" | ||||
|     #pdf_dict, txt_dict = pair_pdf_and_txt(file_path, file_path) | ||||
|      | ||||
|     # if not pdf_dict: | ||||
|     #     print("未选择任何文件。") | ||||
|     #     return | ||||
|          | ||||
|     dataset = select_dataset(rag_object) | ||||
|     print(f"选择的数据集: {dataset.name}") | ||||
|     print(f"选择的数据集id: {dataset.id}") | ||||
|     if not dataset: | ||||
|         print("未选择数据集。") | ||||
|         return | ||||
|          | ||||
|     process_pdf_txt_pairs(pdf_dict, txt_dict, dataset) | ||||
|     #process_pdf_txt_pairs(pdf_dict, txt_dict, dataset) | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user