更新主函数,添加PDF和TXT文件处理说明,注释掉文件选择和处理逻辑
This commit is contained in:
@@ -157,19 +157,27 @@ def process_pdf_txt_pairs(pdf_dict, txt_dict, dataset):
|
|||||||
process_txt_chunks(document, txt_path)
|
process_txt_chunks(document, txt_path)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
file_path = "g:\\11\\22\\规范\\"
|
|
||||||
pdf_dict, txt_dict = pair_pdf_and_txt(file_path, file_path)
|
"""主函数,处理PDF和TXT文件对
|
||||||
|
|
||||||
if not pdf_dict:
|
dataset.id = bucket_name
|
||||||
print("未选择任何文件。")
|
chunk_id = object_name
|
||||||
return
|
"""
|
||||||
|
file_path = "g:\\11\\22\\规范\\"
|
||||||
|
#pdf_dict, txt_dict = pair_pdf_and_txt(file_path, file_path)
|
||||||
|
|
||||||
|
# if not pdf_dict:
|
||||||
|
# print("未选择任何文件。")
|
||||||
|
# return
|
||||||
|
|
||||||
dataset = select_dataset(rag_object)
|
dataset = select_dataset(rag_object)
|
||||||
|
print(f"选择的数据集: {dataset.name}")
|
||||||
|
print(f"选择的数据集id: {dataset.id}")
|
||||||
if not dataset:
|
if not dataset:
|
||||||
print("未选择数据集。")
|
print("未选择数据集。")
|
||||||
return
|
return
|
||||||
|
|
||||||
process_pdf_txt_pairs(pdf_dict, txt_dict, dataset)
|
#process_pdf_txt_pairs(pdf_dict, txt_dict, dataset)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user