更新主函数,添加PDF和TXT文件处理说明,注释掉文件选择和处理逻辑

This commit is contained in:
2025-07-22 18:29:48 +08:00
parent 2cc9dbfcd0
commit 40211521a2

View File

@@ -157,19 +157,27 @@ def process_pdf_txt_pairs(pdf_dict, txt_dict, dataset):
process_txt_chunks(document, txt_path)
def main():
file_path = "g:\\11\\22\\规范\\"
pdf_dict, txt_dict = pair_pdf_and_txt(file_path, file_path)
if not pdf_dict:
print("未选择任何文件。")
return
"""主函数处理PDF和TXT文件对
dataset.id = bucket_name
chunk_id = object_name
"""
file_path = "g:\\11\\22\\规范\\"
#pdf_dict, txt_dict = pair_pdf_and_txt(file_path, file_path)
# if not pdf_dict:
# print("未选择任何文件。")
# return
dataset = select_dataset(rag_object)
print(f"选择的数据集: {dataset.name}")
print(f"选择的数据集id: {dataset.id}")
if not dataset:
print("未选择数据集。")
return
process_pdf_txt_pairs(pdf_dict, txt_dict, dataset)
#process_pdf_txt_pairs(pdf_dict, txt_dict, dataset)