From ca92e349e02342b340a4dd2a4dd51f9fbf084c6a Mon Sep 17 00:00:00 2001 From: glowz <24627181@qq.com> Date: Tue, 15 Jul 2025 21:52:38 +0800 Subject: [PATCH] =?UTF-8?q?=E6=95=B4=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- chunk_operations.py | 19 ------------------- minio_api.py | 6 +++++- src/add_chunk_cli.py | 2 +- 3 files changed, 6 insertions(+), 21 deletions(-) diff --git a/chunk_operations.py b/chunk_operations.py index e8b5751..15e0024 100644 --- a/chunk_operations.py +++ b/chunk_operations.py @@ -12,7 +12,6 @@ def update_img_id_in_elasticsearch(tenant_id, doc_id, chunk_id, new_img_id): 如果img_id不存在,则增加一个新的 img_id。 :param tenant_id: 租户 ID - :param dataset_id: 数据集 ID :param doc_id: 文档 ID :param chunk_id: 文档块 ID :param new_img_id: 新的 img_id @@ -43,25 +42,7 @@ def update_img_id_in_elasticsearch(tenant_id, doc_id, chunk_id, new_img_id): # 获取目标文档的 ID hit = result['hits']['hits'][0] doc_id_in_es = hit['_id'] - # print(doc_id_in_es) - #print(hit) - #print(len(hit['_source']['img_id'])) - # image_id = hit['_source'].get('img_id', None) - # if (image_id): - # mapping = es.indices.get_mapping(index=index_name) - # print(mapping) - - # else: - # # img_id 不存在,添加新的 img_id - # # 获取索引的映射 - # mapping = es.indices.get_mapping(index=index_name) - # mapping[index_name]['mappings']['properties']['img_id'] = {'type': 'text'} - # es.indices.put_mapping(index="my_index", body=mapping) - # print(mapping) - - - # # 构建更新请求 update_body = { "doc": { "img_id": new_img_id diff --git a/minio_api.py b/minio_api.py index d1442bc..8edd852 100644 --- a/minio_api.py +++ b/minio_api.py @@ -122,4 +122,8 @@ try: except S3Error as exc: print("MinIO错误:", exc) except Exception as e: - print("发生错误:", e) \ No newline at end of file + print("发生错误:", e) + + + + \ No newline at end of file diff --git a/src/add_chunk_cli.py b/src/add_chunk_cli.py index 71e01b5..a0277f8 100644 --- a/src/add_chunk_cli.py +++ b/src/add_chunk_cli.py @@ -71,7 +71,7 @@ def pair_pdf_and_txt(pdf_path, txt_path): def main(): - file_path = "g:\\11\\22\\路桥设计党建\\" + file_path = "g:\\11\\22\\规范\\" pdf_dict, txt_dict = pair_pdf_and_txt(file_path,file_path) if not pdf_dict: