更新主函数,添加PDF和TXT文件处理说明,注释掉文件选择和处理逻辑
This commit is contained in:
		| @@ -157,19 +157,27 @@ def process_pdf_txt_pairs(pdf_dict, txt_dict, dataset): | |||||||
|             process_txt_chunks(document, txt_path) |             process_txt_chunks(document, txt_path) | ||||||
|  |  | ||||||
| def main(): | def main(): | ||||||
|     file_path = "g:\\11\\22\\规范\\" |  | ||||||
|     pdf_dict, txt_dict = pair_pdf_and_txt(file_path, file_path) |     """主函数,处理PDF和TXT文件对 | ||||||
|      |      | ||||||
|     if not pdf_dict: |     dataset.id = bucket_name | ||||||
|         print("未选择任何文件。") |     chunk_id = object_name | ||||||
|         return |     """ | ||||||
|  |     file_path = "g:\\11\\22\\规范\\" | ||||||
|  |     #pdf_dict, txt_dict = pair_pdf_and_txt(file_path, file_path) | ||||||
|  |      | ||||||
|  |     # if not pdf_dict: | ||||||
|  |     #     print("未选择任何文件。") | ||||||
|  |     #     return | ||||||
|          |          | ||||||
|     dataset = select_dataset(rag_object) |     dataset = select_dataset(rag_object) | ||||||
|  |     print(f"选择的数据集: {dataset.name}") | ||||||
|  |     print(f"选择的数据集id: {dataset.id}") | ||||||
|     if not dataset: |     if not dataset: | ||||||
|         print("未选择数据集。") |         print("未选择数据集。") | ||||||
|         return |         return | ||||||
|          |          | ||||||
|     process_pdf_txt_pairs(pdf_dict, txt_dict, dataset) |     #process_pdf_txt_pairs(pdf_dict, txt_dict, dataset) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user