test positions
This commit is contained in:
@@ -10,7 +10,7 @@ from elasticsearch import Elasticsearch
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
|
||||
from get_pos_pdf import smart_fuzzy_find_text_batch, find_text_positions_batch
|
||||
# from get_pos_pdf import smart_fuzzy_find_text_batch, find_text_positions_batch
|
||||
|
||||
|
||||
from dotenv import load_dotenv # 新增
|
||||
@@ -59,6 +59,7 @@ def update_positon_img_id_in_elasticsearch(tenant_id, doc_id, chunk_id, position
|
||||
:return: 更新结果
|
||||
"""
|
||||
try:
|
||||
|
||||
# 构建索引名称
|
||||
index_name = f"ragflow_{tenant_id}"
|
||||
|
||||
@@ -93,7 +94,23 @@ def update_positon_img_id_in_elasticsearch(tenant_id, doc_id, chunk_id, position
|
||||
|
||||
# 只有当 position 存在时才更新 positions
|
||||
if position is not None:
|
||||
update_body["doc"]["positions"] = position
|
||||
# 如果传入的是嵌套字典格式的 position
|
||||
if isinstance(position, list) and all(isinstance(p, dict) for p in position):
|
||||
# 将字典格式转换为整数列表格式
|
||||
formatted_positions = []
|
||||
for pos in position:
|
||||
pos_list = [
|
||||
pos.get('page', 0), # 页码
|
||||
int(round(float(pos.get('x0', 0)))), # x0
|
||||
int(round(float(pos.get('x1', 0)))), # x1
|
||||
int(round(float(pos.get('y0', 0)))), # y0
|
||||
int(round(float(pos.get('y1', 0)))) # y1
|
||||
]
|
||||
formatted_positions.append(pos_list)
|
||||
update_body["doc"]["positions"] = formatted_positions
|
||||
# 如果已经是整数列表格式
|
||||
elif isinstance(position, list):
|
||||
update_body["doc"]["positions"] = position
|
||||
|
||||
# 如果没有需要更新的字段,直接返回成功
|
||||
if not update_body["doc"]:
|
||||
|
Reference in New Issue
Block a user