整理
This commit is contained in:
		@@ -12,7 +12,6 @@ def update_img_id_in_elasticsearch(tenant_id, doc_id, chunk_id, new_img_id):
 | 
			
		||||
    如果img_id不存在,则增加一个新的 img_id。
 | 
			
		||||
 | 
			
		||||
    :param tenant_id: 租户 ID
 | 
			
		||||
    :param dataset_id: 数据集 ID
 | 
			
		||||
    :param doc_id: 文档 ID
 | 
			
		||||
    :param chunk_id: 文档块 ID
 | 
			
		||||
    :param new_img_id: 新的 img_id
 | 
			
		||||
@@ -43,25 +42,7 @@ def update_img_id_in_elasticsearch(tenant_id, doc_id, chunk_id, new_img_id):
 | 
			
		||||
    # 获取目标文档的 ID
 | 
			
		||||
    hit = result['hits']['hits'][0]
 | 
			
		||||
    doc_id_in_es = hit['_id']
 | 
			
		||||
    # print(doc_id_in_es)
 | 
			
		||||
    #print(hit)
 | 
			
		||||
    #print(len(hit['_source']['img_id']))
 | 
			
		||||
    # image_id = hit['_source'].get('img_id', None)
 | 
			
		||||
 | 
			
		||||
    # if (image_id):
 | 
			
		||||
    #     mapping = es.indices.get_mapping(index=index_name)
 | 
			
		||||
    #     print(mapping)
 | 
			
		||||
 | 
			
		||||
    # else:
 | 
			
		||||
    #     # img_id 不存在,添加新的 img_id
 | 
			
		||||
    #     # 获取索引的映射
 | 
			
		||||
    #     mapping = es.indices.get_mapping(index=index_name)
 | 
			
		||||
    #     mapping[index_name]['mappings']['properties']['img_id'] = {'type': 'text'}
 | 
			
		||||
    #     es.indices.put_mapping(index="my_index", body=mapping)
 | 
			
		||||
    #     print(mapping)
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    #         # 构建更新请求
 | 
			
		||||
    update_body = {
 | 
			
		||||
        "doc": {
 | 
			
		||||
            "img_id": new_img_id
 | 
			
		||||
 
 | 
			
		||||
@@ -122,4 +122,8 @@ try:
 | 
			
		||||
except S3Error as exc:
 | 
			
		||||
    print("MinIO错误:", exc)
 | 
			
		||||
except Exception as e:
 | 
			
		||||
    print("发生错误:", e)
 | 
			
		||||
    print("发生错误:", e)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
@@ -71,7 +71,7 @@ def pair_pdf_and_txt(pdf_path, txt_path):
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
 | 
			
		||||
    file_path = "g:\\11\\22\\路桥设计党建\\"
 | 
			
		||||
    file_path = "g:\\11\\22\\规范\\"
 | 
			
		||||
    pdf_dict, txt_dict = pair_pdf_and_txt(file_path,file_path)
 | 
			
		||||
 | 
			
		||||
    if not pdf_dict:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user