mirror of
				https://gitee.com/hhyykk/ipms-sjy.git
				synced 2025-11-04 12:18:42 +08:00 
			
		
		
		
	【解决todo】AI 知识库: 字段命名统一 补充注释
This commit is contained in:
		@@ -29,7 +29,7 @@ public class AiKnowledgeSegmentController {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    @GetMapping("/page")
 | 
					    @GetMapping("/page")
 | 
				
			||||||
    @Operation(summary = "获取段落分页")
 | 
					    @Operation(summary = "获取段落分页")
 | 
				
			||||||
    public CommonResult<PageResult<AiKnowledgeSegmentRespVO>> getKnowledgeSegmentPageMy(@Valid AiKnowledgeSegmentPageReqVO pageReqVO) {
 | 
					    public CommonResult<PageResult<AiKnowledgeSegmentRespVO>> getKnowledgeSegmentPage(@Valid AiKnowledgeSegmentPageReqVO pageReqVO) {
 | 
				
			||||||
        PageResult<AiKnowledgeSegmentDO> pageResult = segmentService.getKnowledgeSegmentPage(pageReqVO);
 | 
					        PageResult<AiKnowledgeSegmentDO> pageResult = segmentService.getKnowledgeSegmentPage(pageReqVO);
 | 
				
			||||||
        return success(BeanUtils.toBean(pageResult, AiKnowledgeSegmentRespVO.class));
 | 
					        return success(BeanUtils.toBean(pageResult, AiKnowledgeSegmentRespVO.class));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -23,21 +23,21 @@ public class AiKnowledgeDocumentCreateReqVO {
 | 
				
			|||||||
    @URL(message = "文档 URL 格式不正确")
 | 
					    @URL(message = "文档 URL 格式不正确")
 | 
				
			||||||
    private String url;
 | 
					    private String url;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @Schema(description = "每个文本块的目标 token 数", requiredMode = Schema.RequiredMode.REQUIRED, example = "800")
 | 
					    @Schema(description = "每个段落的目标 token 数", requiredMode = Schema.RequiredMode.REQUIRED, example = "800")
 | 
				
			||||||
    @NotNull(message = "每个文本块的目标 token 数不能为空")
 | 
					    @NotNull(message = "每个段落的目标 token 数不能为空")
 | 
				
			||||||
    private Integer defaultChunkSize;
 | 
					    private Integer defaultSegmentTokens;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @Schema(description = "每个文本块的最小字符数", requiredMode = Schema.RequiredMode.REQUIRED, example = "350")
 | 
					    @Schema(description = "每个段落的最小字符数", requiredMode = Schema.RequiredMode.REQUIRED, example = "350")
 | 
				
			||||||
    @NotNull(message = "每个文本块的最小字符数不能为空")
 | 
					    @NotNull(message = "每个段落的最小字符数不能为空")
 | 
				
			||||||
    private Integer minChunkSizeChars;
 | 
					    private Integer minSegmentWordCount;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @Schema(description = "丢弃阈值", requiredMode = Schema.RequiredMode.REQUIRED, example = "5")
 | 
					    @Schema(description = "丢弃阈值:低于此阈值的段落会被丢弃", requiredMode = Schema.RequiredMode.REQUIRED, example = "5")
 | 
				
			||||||
    @NotNull(message = "丢弃阈值不能为空")
 | 
					    @NotNull(message = "丢弃阈值不能为空")
 | 
				
			||||||
    private Integer minChunkLengthToEmbed;
 | 
					    private Integer minChunkLengthToEmbed;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @Schema(description = "最大块数", requiredMode = Schema.RequiredMode.REQUIRED, example = "10000")
 | 
					    @Schema(description = "最大段落数", requiredMode = Schema.RequiredMode.REQUIRED, example = "10000")
 | 
				
			||||||
    @NotNull(message = "最大块数不能为空")
 | 
					    @NotNull(message = "最大段落数不能为空")
 | 
				
			||||||
    private Integer maxNumChunks;
 | 
					    private Integer maxNumSegments;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @Schema(description = "分块是否保留分隔符", requiredMode = Schema.RequiredMode.REQUIRED, example = "true")
 | 
					    @Schema(description = "分块是否保留分隔符", requiredMode = Schema.RequiredMode.REQUIRED, example = "true")
 | 
				
			||||||
    @NotNull(message = "分块是否保留分隔符不能为空")
 | 
					    @NotNull(message = "分块是否保留分隔符不能为空")
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -38,9 +38,11 @@ public class AiKnowledgeDO extends BaseDO {
 | 
				
			|||||||
     * 知识库描述
 | 
					     * 知识库描述
 | 
				
			||||||
     */
 | 
					     */
 | 
				
			||||||
    private String description;
 | 
					    private String description;
 | 
				
			||||||
    // TODO @新:如果全部可见,需要怎么设置?
 | 
					
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * 可见权限,只能选择哪些人可见
 | 
					     * 可见权限,选择哪些人可见
 | 
				
			||||||
 | 
					     * <p>
 | 
				
			||||||
 | 
					     * -1 所有人可见,其他为各自用户编号
 | 
				
			||||||
     */
 | 
					     */
 | 
				
			||||||
    @TableField(typeHandler = JacksonTypeHandler.class)
 | 
					    @TableField(typeHandler = JacksonTypeHandler.class)
 | 
				
			||||||
    private List<Long> visibilityPermissions;
 | 
					    private List<Long> visibilityPermissions;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -40,23 +40,25 @@ public class AiKnowledgeDocumentDO extends BaseDO {
 | 
				
			|||||||
     */
 | 
					     */
 | 
				
			||||||
    private String url;
 | 
					    private String url;
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * token 数量
 | 
					     * 文档 token 数量
 | 
				
			||||||
     */
 | 
					     */
 | 
				
			||||||
    private Integer tokens;
 | 
					    private Integer tokens;
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * 字符数
 | 
					     * 文档字符数
 | 
				
			||||||
     */
 | 
					     */
 | 
				
			||||||
    private Integer wordCount;
 | 
					    private Integer wordCount;
 | 
				
			||||||
    // TODO @新:chunk 1)是不是 segment,这样命名保持一致会好点哈?2)Size 是不是改成 Tokens 会统一点;3)defaultChunkSize、defaultChunkSize、minChunkSizeChars、maxNumChunks 这几个字段的命名,可能要微信一起讨论下。尽量命名保持风格统一哈。
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // ========== 自定义分段所用参数 ==========
 | 
				
			||||||
 | 
					    // TODO @新:3)defaultChunkSize、defaultChunkSize、minChunkSizeChars、maxNumChunks 这几个字段的命名,可能要微信一起讨论下。尽量命名保持风格统一哈。
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * 每个文本块的目标 token 数
 | 
					     * 每个文本块的目标 token 数
 | 
				
			||||||
     */
 | 
					     */
 | 
				
			||||||
    private Integer defaultChunkSize;
 | 
					    private Integer defaultSegmentTokens;
 | 
				
			||||||
    // TODO @xin:SizeChars 和 wordCount 好像是一个意思,是不是也要统一哈。
 | 
					 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * 每个文本块的最小字符数
 | 
					     * 每个文本块的最小字符数
 | 
				
			||||||
     */
 | 
					     */
 | 
				
			||||||
    private Integer minChunkSizeChars;
 | 
					    private Integer minSegmentWordCount;
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * 低于此值的块会被丢弃
 | 
					     * 低于此值的块会被丢弃
 | 
				
			||||||
     */
 | 
					     */
 | 
				
			||||||
@@ -64,11 +66,13 @@ public class AiKnowledgeDocumentDO extends BaseDO {
 | 
				
			|||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * 最大块数
 | 
					     * 最大块数
 | 
				
			||||||
     */
 | 
					     */
 | 
				
			||||||
    private Integer maxNumChunks;
 | 
					    private Integer maxNumSegments;
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * 分块是否保留分隔符
 | 
					     * 分块是否保留分隔符
 | 
				
			||||||
     */
 | 
					     */
 | 
				
			||||||
    private Boolean keepSeparator;
 | 
					    private Boolean keepSeparator;
 | 
				
			||||||
 | 
					    // ===================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * 切片状态
 | 
					     * 切片状态
 | 
				
			||||||
     * <p>
 | 
					     * <p>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -2,8 +2,6 @@ package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
 | 
					import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
 | 
				
			||||||
import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
 | 
					import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
 | 
				
			||||||
import com.baomidou.mybatisplus.annotation.FieldStrategy;
 | 
					 | 
				
			||||||
import com.baomidou.mybatisplus.annotation.TableField;
 | 
					 | 
				
			||||||
import com.baomidou.mybatisplus.annotation.TableId;
 | 
					import com.baomidou.mybatisplus.annotation.TableId;
 | 
				
			||||||
import com.baomidou.mybatisplus.annotation.TableName;
 | 
					import com.baomidou.mybatisplus.annotation.TableName;
 | 
				
			||||||
import lombok.Data;
 | 
					import lombok.Data;
 | 
				
			||||||
@@ -27,7 +25,6 @@ public class AiKnowledgeSegmentDO extends BaseDO {
 | 
				
			|||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * 向量库的编号
 | 
					     * 向量库的编号
 | 
				
			||||||
     */
 | 
					     */
 | 
				
			||||||
    @TableField(updateStrategy = FieldStrategy.ALWAYS) // TODO @新:尽量规避要这个注解。万一后面加个 status 单独更新,可能会踩坑。
 | 
					 | 
				
			||||||
    private String vectorId;
 | 
					    private String vectorId;
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * 知识库编号
 | 
					     * 知识库编号
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -25,8 +25,7 @@ public interface AiKnowledgeSegmentMapper extends BaseMapperX<AiKnowledgeSegment
 | 
				
			|||||||
                .orderByDesc(AiKnowledgeSegmentDO::getId));
 | 
					                .orderByDesc(AiKnowledgeSegmentDO::getId));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // TODO @新:selectListByXXX 哈
 | 
					    default List<AiKnowledgeSegmentDO> selectListByVectorIds(List<String> vectorIdList) {
 | 
				
			||||||
    default List<AiKnowledgeSegmentDO> selectList(List<String> vectorIdList) {
 | 
					 | 
				
			||||||
        return selectList(new LambdaQueryWrapperX<AiKnowledgeSegmentDO>()
 | 
					        return selectList(new LambdaQueryWrapperX<AiKnowledgeSegmentDO>()
 | 
				
			||||||
                .in(AiKnowledgeSegmentDO::getVectorId, vectorIdList)
 | 
					                .in(AiKnowledgeSegmentDO::getVectorId, vectorIdList)
 | 
				
			||||||
                .orderByDesc(AiKnowledgeSegmentDO::getId));
 | 
					                .orderByDesc(AiKnowledgeSegmentDO::getId));
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -71,8 +71,8 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // 2 构造文本分段器
 | 
					        // 2 构造文本分段器
 | 
				
			||||||
        TokenTextSplitter tokenTextSplitter = new TokenTextSplitter(createReqVO.getDefaultChunkSize(), createReqVO.getMinChunkSizeChars(), createReqVO.getMinChunkLengthToEmbed(),
 | 
					        TokenTextSplitter tokenTextSplitter = new TokenTextSplitter(createReqVO.getDefaultSegmentTokens(), createReqVO.getMinSegmentWordCount(), createReqVO.getMinChunkLengthToEmbed(),
 | 
				
			||||||
                createReqVO.getMaxNumChunks(), createReqVO.getKeepSeparator());
 | 
					                createReqVO.getMaxNumSegments(), createReqVO.getKeepSeparator());
 | 
				
			||||||
        // 2.1 文档分段
 | 
					        // 2.1 文档分段
 | 
				
			||||||
        List<Document> segments = tokenTextSplitter.apply(documents);
 | 
					        List<Document> segments = tokenTextSplitter.apply(documents);
 | 
				
			||||||
        // 2.2 分段内容入库
 | 
					        // 2.2 分段内容入库
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -90,7 +90,7 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
 | 
				
			|||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            // 2.2 禁用删除向量
 | 
					            // 2.2 禁用删除向量
 | 
				
			||||||
            vectorStore.delete(List.of(oldKnowledgeSegment.getVectorId()));
 | 
					            vectorStore.delete(List.of(oldKnowledgeSegment.getVectorId()));
 | 
				
			||||||
            knowledgeSegment.setVectorId(null);
 | 
					            knowledgeSegment.setVectorId("");
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        // 3 更新段落状态
 | 
					        // 3 更新段落状态
 | 
				
			||||||
        segmentMapper.updateById(knowledgeSegment);
 | 
					        segmentMapper.updateById(knowledgeSegment);
 | 
				
			||||||
@@ -114,7 +114,7 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
 | 
				
			|||||||
            return ListUtil.empty();
 | 
					            return ListUtil.empty();
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        // 3.2 段落召回
 | 
					        // 3.2 段落召回
 | 
				
			||||||
        return segmentMapper.selectList(CollUtil.getFieldValues(documentList, "id", String.class));
 | 
					        return segmentMapper.selectListByVectorIds(CollUtil.getFieldValues(documentList, "id", String.class));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -47,13 +47,12 @@ public interface AiKnowledgeService {
 | 
				
			|||||||
     */
 | 
					     */
 | 
				
			||||||
    PageResult<AiKnowledgeDO> getKnowledgePageMy(Long userId, PageParam pageReqVO);
 | 
					    PageResult<AiKnowledgeDO> getKnowledgePageMy(Long userId, PageParam pageReqVO);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // TODO @新:knowledgeId 和 validateKnowledgeExists 的 id 是同一个么?如果是的话,建议变量也用 id 哈,然后两边的 id 注释,保持一致
 | 
					 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * 根据知识库编号获取向量存储实例
 | 
					     * 根据知识库编号获取向量存储实例
 | 
				
			||||||
     *
 | 
					     *
 | 
				
			||||||
     * @param knowledgeId 知识库编号
 | 
					     * @param id 知识库编号
 | 
				
			||||||
     * @return 向量存储实例
 | 
					     * @return 向量存储实例
 | 
				
			||||||
     */
 | 
					     */
 | 
				
			||||||
    VectorStore getVectorStoreById(Long knowledgeId);
 | 
					    VectorStore getVectorStoreById(Long id);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -29,21 +29,18 @@ import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.KNOWLEDGE_NOT_
 | 
				
			|||||||
@Slf4j
 | 
					@Slf4j
 | 
				
			||||||
public class AiKnowledgeServiceImpl implements AiKnowledgeService {
 | 
					public class AiKnowledgeServiceImpl implements AiKnowledgeService {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @Resource
 | 
					 | 
				
			||||||
    private AiChatModelService chatModalService;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    @Resource
 | 
					    @Resource
 | 
				
			||||||
    private AiKnowledgeMapper knowledgeMapper;
 | 
					    private AiKnowledgeMapper knowledgeMapper;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @Resource
 | 
					    @Resource
 | 
				
			||||||
    private AiChatModelService chatModelService;
 | 
					    private AiChatModelService chatModelService;
 | 
				
			||||||
    @Resource
 | 
					    @Resource
 | 
				
			||||||
    private AiApiKeyService apiKeyService;
 | 
					    private AiApiKeyService apiKeyService;
 | 
				
			||||||
    // TODO @新:chatModelService 和 apiKeyService 可以放到 33 行的 chatModalService 后面。尽量保持,想通类型的变量在一块。例如说,Service 一块,Mapper 一块。
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @Override
 | 
					    @Override
 | 
				
			||||||
    public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
 | 
					    public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
 | 
				
			||||||
        // 1. 校验模型配置
 | 
					        // 1. 校验模型配置
 | 
				
			||||||
        AiChatModelDO model = chatModalService.validateChatModel(createReqVO.getModelId());
 | 
					        AiChatModelDO model = chatModelService.validateChatModel(createReqVO.getModelId());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // 2. 插入知识库
 | 
					        // 2. 插入知识库
 | 
				
			||||||
        AiKnowledgeDO knowledgeBase = BeanUtils.toBean(createReqVO, AiKnowledgeDO.class)
 | 
					        AiKnowledgeDO knowledgeBase = BeanUtils.toBean(createReqVO, AiKnowledgeDO.class)
 | 
				
			||||||
@@ -60,7 +57,7 @@ public class AiKnowledgeServiceImpl implements AiKnowledgeService {
 | 
				
			|||||||
            throw exception(KNOWLEDGE_NOT_EXISTS);
 | 
					            throw exception(KNOWLEDGE_NOT_EXISTS);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        // 1.2 校验模型配置
 | 
					        // 1.2 校验模型配置
 | 
				
			||||||
        AiChatModelDO model = chatModalService.validateChatModel(updateReqVO.getModelId());
 | 
					        AiChatModelDO model = chatModelService.validateChatModel(updateReqVO.getModelId());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // 2. 更新知识库
 | 
					        // 2. 更新知识库
 | 
				
			||||||
        AiKnowledgeDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeDO.class);
 | 
					        AiKnowledgeDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeDO.class);
 | 
				
			||||||
@@ -83,8 +80,8 @@ public class AiKnowledgeServiceImpl implements AiKnowledgeService {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @Override
 | 
					    @Override
 | 
				
			||||||
    public VectorStore getVectorStoreById(Long knowledgeId) {
 | 
					    public VectorStore getVectorStoreById(Long id) {
 | 
				
			||||||
        AiKnowledgeDO knowledge = validateKnowledgeExists(knowledgeId);
 | 
					        AiKnowledgeDO knowledge = validateKnowledgeExists(id);
 | 
				
			||||||
        AiChatModelDO model = chatModelService.validateChatModel(knowledge.getModelId());
 | 
					        AiChatModelDO model = chatModelService.validateChatModel(knowledge.getModelId());
 | 
				
			||||||
        // 创建或获取 VectorStore 对象
 | 
					        // 创建或获取 VectorStore 对象
 | 
				
			||||||
        return apiKeyService.getOrCreateVectorStore(model.getKeyId());
 | 
					        return apiKeyService.getOrCreateVectorStore(model.getKeyId());
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -197,7 +197,6 @@ public class AiModelFactoryImpl implements AiModelFactory {
 | 
				
			|||||||
        });
 | 
					        });
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // TODO @新:貌似可以创建一个大的 VectorStore。然后搜的时候,通过 Filter.Expression 过滤对应的数据。
 | 
					 | 
				
			||||||
    @Override
 | 
					    @Override
 | 
				
			||||||
    public VectorStore getOrCreateVectorStore(EmbeddingModel embeddingModel, AiPlatformEnum platform, String apiKey, String url) {
 | 
					    public VectorStore getOrCreateVectorStore(EmbeddingModel embeddingModel, AiPlatformEnum platform, String apiKey, String url) {
 | 
				
			||||||
        String cacheKey = buildClientCacheKey(VectorStore.class, platform, apiKey, url);
 | 
					        String cacheKey = buildClientCacheKey(VectorStore.class, platform, apiKey, url);
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user