diff --git a/chunk_operations.py b/chunk_operations.py index e8b5751..15e0024 100644 --- a/chunk_operations.py +++ b/chunk_operations.py @@ -12,7 +12,6 @@ def update_img_id_in_elasticsearch(tenant_id, doc_id, chunk_id, new_img_id): 如果img_id不存在,则增加一个新的 img_id。 :param tenant_id: 租户 ID - :param dataset_id: 数据集 ID :param doc_id: 文档 ID :param chunk_id: 文档块 ID :param new_img_id: 新的 img_id @@ -43,25 +42,7 @@ def update_img_id_in_elasticsearch(tenant_id, doc_id, chunk_id, new_img_id): # 获取目标文档的 ID hit = result['hits']['hits'][0] doc_id_in_es = hit['_id'] - # print(doc_id_in_es) - #print(hit) - #print(len(hit['_source']['img_id'])) - # image_id = hit['_source'].get('img_id', None) - # if (image_id): - # mapping = es.indices.get_mapping(index=index_name) - # print(mapping) - - # else: - # # img_id 不存在,添加新的 img_id - # # 获取索引的映射 - # mapping = es.indices.get_mapping(index=index_name) - # mapping[index_name]['mappings']['properties']['img_id'] = {'type': 'text'} - # es.indices.put_mapping(index="my_index", body=mapping) - # print(mapping) - - - # # 构建更新请求 update_body = { "doc": { "img_id": new_img_id diff --git a/minio_api.py b/minio_api.py index d1442bc..8edd852 100644 --- a/minio_api.py +++ b/minio_api.py @@ -122,4 +122,8 @@ try: except S3Error as exc: print("MinIO错误:", exc) except Exception as e: - print("发生错误:", e) \ No newline at end of file + print("发生错误:", e) + + + + \ No newline at end of file diff --git a/src/add_chunk_cli.py b/src/add_chunk_cli.py index 71e01b5..a0277f8 100644 --- a/src/add_chunk_cli.py +++ b/src/add_chunk_cli.py @@ -71,7 +71,7 @@ def pair_pdf_and_txt(pdf_path, txt_path): def main(): - file_path = "g:\\11\\22\\路桥设计党建\\" + file_path = "g:\\11\\22\\规范\\" pdf_dict, txt_dict = pair_pdf_and_txt(file_path,file_path) if not pdf_dict: