更新主函数,添加PDF和TXT文件处理说明,注释掉文件选择和处理逻辑
This commit is contained in:
@@ -157,19 +157,27 @@ def process_pdf_txt_pairs(pdf_dict, txt_dict, dataset):
|
||||
process_txt_chunks(document, txt_path)
|
||||
|
||||
def main():
|
||||
file_path = "g:\\11\\22\\规范\\"
|
||||
pdf_dict, txt_dict = pair_pdf_and_txt(file_path, file_path)
|
||||
|
||||
"""主函数,处理PDF和TXT文件对
|
||||
|
||||
if not pdf_dict:
|
||||
print("未选择任何文件。")
|
||||
return
|
||||
dataset.id = bucket_name
|
||||
chunk_id = object_name
|
||||
"""
|
||||
file_path = "g:\\11\\22\\规范\\"
|
||||
#pdf_dict, txt_dict = pair_pdf_and_txt(file_path, file_path)
|
||||
|
||||
# if not pdf_dict:
|
||||
# print("未选择任何文件。")
|
||||
# return
|
||||
|
||||
dataset = select_dataset(rag_object)
|
||||
print(f"选择的数据集: {dataset.name}")
|
||||
print(f"选择的数据集id: {dataset.id}")
|
||||
if not dataset:
|
||||
print("未选择数据集。")
|
||||
return
|
||||
|
||||
process_pdf_txt_pairs(pdf_dict, txt_dict, dataset)
|
||||
#process_pdf_txt_pairs(pdf_dict, txt_dict, dataset)
|
||||
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user