mirror of
				https://gitee.com/hhyykk/ipms-sjy.git
				synced 2025-10-31 18:28:43 +08:00 
			
		
		
		
	【新增】AI 知识库:文档切片向量化入库
This commit is contained in:
		| @@ -0,0 +1,39 @@ | ||||
| package cn.iocoder.yudao.module.ai.enums.knowledge; | ||||
|  | ||||
| import cn.iocoder.yudao.framework.common.core.IntArrayValuable; | ||||
| import lombok.AllArgsConstructor; | ||||
| import lombok.Getter; | ||||
|  | ||||
| import java.util.Arrays; | ||||
|  | ||||
| /** | ||||
|  * AI 知识库-文档状态的枚举 | ||||
|  * | ||||
|  * @author xiaoxin | ||||
|  */ | ||||
| @AllArgsConstructor | ||||
| @Getter | ||||
| public enum AiKnowledgeDocumentStatusEnum implements IntArrayValuable { | ||||
|  | ||||
|     IN_PROGRESS(10, "索引中"), | ||||
|     SUCCESS(20, "可用"), | ||||
|     FAIL(30, "失败"); | ||||
|  | ||||
|     /** | ||||
|      * 状态 | ||||
|      */ | ||||
|     private final Integer status; | ||||
|  | ||||
|     /** | ||||
|      * 状态名 | ||||
|      */ | ||||
|     private final String name; | ||||
|  | ||||
|     public static final int[] ARRAYS = Arrays.stream(values()).mapToInt(AiKnowledgeDocumentStatusEnum::getStatus).toArray(); | ||||
|  | ||||
|     @Override | ||||
|     public int[] array() { | ||||
|         return ARRAYS; | ||||
|     } | ||||
|  | ||||
| } | ||||
| @@ -14,11 +14,11 @@ import java.util.List; | ||||
| @Data | ||||
| public class AiKnowledgeCreateMyReqVO { | ||||
|  | ||||
|     @Schema(description = "知识库名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "") | ||||
|     @Schema(description = "知识库名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "ruoyi-vue-pro 用户指南") | ||||
|     @NotBlank(message = "知识库名称不能为空") | ||||
|     private String name; | ||||
|  | ||||
|     @Schema(description = "知识库描述", requiredMode = Schema.RequiredMode.REQUIRED, example = "") | ||||
|     @Schema(description = "知识库描述", requiredMode = Schema.RequiredMode.REQUIRED, example = "存储 ruoyi-vue-pro 操作文档") | ||||
|     private String description; | ||||
|  | ||||
|     @Schema(description = "可见权限,只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]") | ||||
|   | ||||
| @@ -0,0 +1,27 @@ | ||||
| package cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo; | ||||
|  | ||||
| import io.swagger.v3.oas.annotations.media.Schema; | ||||
| import jakarta.validation.constraints.NotBlank; | ||||
| import jakarta.validation.constraints.NotNull; | ||||
| import lombok.Data; | ||||
|  | ||||
| /** | ||||
|  * @author xiaoxin | ||||
|  */ | ||||
| @Schema(description = "管理后台 - AI 知识库【创建文档】 Request VO") | ||||
| @Data | ||||
| public class AiKnowledgeDocumentCreateReqVO { | ||||
|  | ||||
|  | ||||
|     @Schema(description = "知识库编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1204") | ||||
|     @NotNull(message = "知识库编号不能为空") | ||||
|     private Long knowledgeId; | ||||
|  | ||||
|     @Schema(description = "文档名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "三方登陆") | ||||
|     @NotBlank(message = "文档名称不能为空") | ||||
|     private String name; | ||||
|  | ||||
|     @Schema(description = "文档 url", requiredMode = Schema.RequiredMode.REQUIRED, example = "https://doc.iocoder.cn") | ||||
|     private String url; | ||||
|  | ||||
| } | ||||
| @@ -1,10 +1,13 @@ | ||||
| package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge; | ||||
|  | ||||
|  | ||||
| import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum; | ||||
| import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO; | ||||
| import com.baomidou.mybatisplus.annotation.IdType; | ||||
| import com.baomidou.mybatisplus.annotation.TableField; | ||||
| import com.baomidou.mybatisplus.annotation.TableId; | ||||
| import com.baomidou.mybatisplus.annotation.TableName; | ||||
| import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler; | ||||
| import lombok.Data; | ||||
|  | ||||
| import java.util.List; | ||||
| @@ -40,7 +43,8 @@ public class AiKnowledgeBaseDO extends BaseDO { | ||||
|     /** | ||||
|      * 可见权限,只能选择哪些人可见 | ||||
|      */ | ||||
|     private List<String> visibilityPermissions; | ||||
|     @TableField(typeHandler = JacksonTypeHandler.class) | ||||
|     private List<Long> visibilityPermissions; | ||||
|     /** | ||||
|      * 嵌入模型编号,高质量模式时维护 | ||||
|      */ | ||||
| @@ -50,7 +54,9 @@ public class AiKnowledgeBaseDO extends BaseDO { | ||||
|      */ | ||||
|     private String model; | ||||
|     /** | ||||
|      * 是否启用 | ||||
|      * 状态 | ||||
|      * <p> | ||||
|      * 枚举 {@link CommonStatusEnum} | ||||
|      */ | ||||
|     private Boolean status; | ||||
|     private Integer status; | ||||
| } | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge; | ||||
|  | ||||
| import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum; | ||||
| import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO; | ||||
| import cn.iocoder.yudao.module.ai.enums.knowledge.AiKnowledgeDocumentStatusEnum; | ||||
| import com.baomidou.mybatisplus.annotation.IdType; | ||||
| import com.baomidou.mybatisplus.annotation.TableId; | ||||
| import com.baomidou.mybatisplus.annotation.TableName; | ||||
| @@ -46,10 +48,15 @@ public class AiKnowledgeDocumentDO extends BaseDO { | ||||
|     private Integer wordCount; | ||||
|     /** | ||||
|      * 切片状态 | ||||
|      * <p> | ||||
|      * 枚举 {@link AiKnowledgeDocumentStatusEnum} | ||||
|      */ | ||||
|     private Integer sliceStatus; | ||||
|  | ||||
|     /** | ||||
|      * 是否启用 | ||||
|      * 状态 | ||||
|      * <p> | ||||
|      * 枚举 {@link CommonStatusEnum} | ||||
|      */ | ||||
|     private Boolean status; | ||||
|     private Integer status; | ||||
| } | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge; | ||||
|  | ||||
| import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum; | ||||
| import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO; | ||||
| import com.baomidou.mybatisplus.annotation.IdType; | ||||
| import com.baomidou.mybatisplus.annotation.TableId; | ||||
| @@ -41,8 +42,10 @@ public class AiKnowledgeSegmentDO extends BaseDO { | ||||
|      */ | ||||
|     private Integer tokens; | ||||
|     /** | ||||
|      * 是否启用 | ||||
|      * 状态 | ||||
|      * <p> | ||||
|      * 枚举 {@link CommonStatusEnum} | ||||
|      */ | ||||
|     private Boolean status; | ||||
|     private Integer status; | ||||
|  | ||||
| } | ||||
|   | ||||
| @@ -2,11 +2,13 @@ package cn.iocoder.yudao.module.ai.dal.mysql.knowledge; | ||||
|  | ||||
| import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX; | ||||
| import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeBaseDO; | ||||
| import org.apache.ibatis.annotations.Mapper; | ||||
|  | ||||
| /** | ||||
|  * AI 知识库基础信息 Mapper | ||||
|  * | ||||
|  * @author xiaoxin | ||||
|  */ | ||||
| @Mapper | ||||
| public interface AiKnowledgeBaseMapper extends BaseMapperX<AiKnowledgeBaseDO> { | ||||
| } | ||||
|   | ||||
| @@ -2,11 +2,13 @@ package cn.iocoder.yudao.module.ai.dal.mysql.knowledge; | ||||
|  | ||||
| import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX; | ||||
| import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO; | ||||
| import org.apache.ibatis.annotations.Mapper; | ||||
|  | ||||
| /** | ||||
|  * AI 知识库-文档 Mapper | ||||
|  * | ||||
|  * @author xiaoxin | ||||
|  */ | ||||
| @Mapper | ||||
| public interface AiKnowledgeDocumentMapper extends BaseMapperX<AiKnowledgeDocumentDO> { | ||||
| } | ||||
|   | ||||
| @@ -2,11 +2,13 @@ package cn.iocoder.yudao.module.ai.dal.mysql.knowledge; | ||||
|  | ||||
| import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX; | ||||
| import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO; | ||||
| import org.apache.ibatis.annotations.Mapper; | ||||
|  | ||||
| /** | ||||
|  * AI 知识库-分片 Mapper | ||||
|  * | ||||
|  * @author xiaoxin | ||||
|  */ | ||||
| @Mapper | ||||
| public interface AiKnowledgeSegmentMapper extends BaseMapperX<AiKnowledgeSegmentDO> { | ||||
| } | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| package cn.iocoder.yudao.module.ai.service.knowledge; | ||||
|  | ||||
| import org.springframework.ai.document.Document; | ||||
| import org.springframework.ai.vectorstore.SearchRequest; | ||||
|  | ||||
| import java.util.List; | ||||
|  | ||||
| @@ -12,9 +13,9 @@ import java.util.List; | ||||
| public interface AiEmbeddingService { | ||||
|  | ||||
|     /** | ||||
|      * 向量化文档 | ||||
|      * 向量化文档并存储 | ||||
|      */ | ||||
|     void embeddingDoc(); | ||||
|     void add(List<Document> documents); | ||||
|  | ||||
|  | ||||
|     /** | ||||
| @@ -22,5 +23,5 @@ public interface AiEmbeddingService { | ||||
|      * | ||||
|      * @param content 查询内容 | ||||
|      */ | ||||
|     List<Document> similaritySearch(String content); | ||||
|     List<Document> similaritySearch(SearchRequest request); | ||||
| } | ||||
|   | ||||
| @@ -2,11 +2,9 @@ package cn.iocoder.yudao.module.ai.service.knowledge; | ||||
|  | ||||
| import jakarta.annotation.Resource; | ||||
| import org.springframework.ai.document.Document; | ||||
| import org.springframework.ai.reader.tika.TikaDocumentReader; | ||||
| import org.springframework.ai.transformer.splitter.TokenTextSplitter; | ||||
| import org.springframework.ai.vectorstore.RedisVectorStore; | ||||
| import org.springframework.ai.vectorstore.SearchRequest; | ||||
| import org.springframework.beans.factory.annotation.Value; | ||||
| import org.springframework.scheduling.annotation.Async; | ||||
| import org.springframework.stereotype.Service; | ||||
|  | ||||
| import java.util.List; | ||||
| @@ -21,27 +19,14 @@ public class AiEmbeddingServiceImpl implements AiEmbeddingService { | ||||
|  | ||||
|     @Resource | ||||
|     private RedisVectorStore vectorStore; | ||||
|     @Resource | ||||
|     private TokenTextSplitter tokenTextSplitter; | ||||
|  | ||||
|     // TODO @xin 临时测试用,后续删 | ||||
|     @Value("classpath:/webapp/test/Fel.pdf") | ||||
|     private org.springframework.core.io.Resource data; | ||||
|  | ||||
|     @Override | ||||
|     public void embeddingDoc() { | ||||
|         // 读取文件 | ||||
|         TikaDocumentReader loader = new TikaDocumentReader(data); | ||||
|         List<Document> documents = loader.get(); | ||||
|         // 文档分段 | ||||
|         List<Document> segments = tokenTextSplitter.apply(documents); | ||||
|         // 向量化并存储 | ||||
|         vectorStore.add(segments); | ||||
|     public void add(List<Document> documents) { | ||||
|         vectorStore.add(documents); | ||||
|     } | ||||
|  | ||||
|     @Override | ||||
|     public List<Document> similaritySearch(String content) { | ||||
|         SearchRequest request = SearchRequest.query(content); | ||||
|     public List<Document> similaritySearch(SearchRequest request) { | ||||
|         return vectorStore.similaritySearch(request); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -2,6 +2,7 @@ package cn.iocoder.yudao.module.ai.service.knowledge; | ||||
|  | ||||
| import cn.hutool.core.lang.Assert; | ||||
| import cn.hutool.core.util.ObjUtil; | ||||
| import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum; | ||||
| import cn.iocoder.yudao.framework.common.util.object.BeanUtils; | ||||
| import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeCreateMyReqVO; | ||||
| import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdateMyReqVO; | ||||
| @@ -25,17 +26,19 @@ import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.KNOWLEDGE_NOT_ | ||||
| @Slf4j | ||||
| public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService { | ||||
|  | ||||
|     @Resource | ||||
|     private AiKnowledgeBaseMapper knowledgeBaseMapper; | ||||
|     @Resource | ||||
|     private AiChatModelService chatModalService; | ||||
|  | ||||
|     @Resource | ||||
|     private AiKnowledgeBaseMapper knowledgeBaseMapper; | ||||
|  | ||||
|  | ||||
|     @Override | ||||
|     public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) { | ||||
|         AiChatModelDO model = validateChatModel(createReqVO.getModelId()); | ||||
|  | ||||
|         AiKnowledgeBaseDO knowledgeBaseDO = BeanUtils.toBean(createReqVO, AiKnowledgeBaseDO.class); | ||||
|         knowledgeBaseDO.setModel(model.getModel()).setUserId(userId); | ||||
|         knowledgeBaseDO.setModel(model.getModel()).setUserId(userId).setStatus(CommonStatusEnum.ENABLE.getStatus()); | ||||
|  | ||||
|         knowledgeBaseMapper.insert(knowledgeBaseDO); | ||||
|         return knowledgeBaseDO.getId(); | ||||
|   | ||||
| @@ -1,5 +1,7 @@ | ||||
| package cn.iocoder.yudao.module.ai.service.knowledge; | ||||
|  | ||||
| import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeDocumentCreateReqVO; | ||||
|  | ||||
| /** | ||||
|  * AI 知识库-文档 Service 接口 | ||||
|  * | ||||
| @@ -7,4 +9,13 @@ package cn.iocoder.yudao.module.ai.service.knowledge; | ||||
|  */ | ||||
| public interface AiKnowledgeDocumentService { | ||||
|  | ||||
|  | ||||
|     /** | ||||
|      * 创建文档 | ||||
|      * | ||||
|      * @param createReqVO 文档创建 Request VO | ||||
|      * @return 文档编号 | ||||
|      */ | ||||
|     Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO); | ||||
|  | ||||
| } | ||||
|   | ||||
| @@ -1,7 +1,25 @@ | ||||
| package cn.iocoder.yudao.module.ai.service.knowledge; | ||||
|  | ||||
| import cn.hutool.core.collection.CollUtil; | ||||
| import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum; | ||||
| import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils; | ||||
| import cn.iocoder.yudao.framework.common.util.object.BeanUtils; | ||||
| import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeDocumentCreateReqVO; | ||||
| import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO; | ||||
| import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO; | ||||
| import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeDocumentMapper; | ||||
| import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeSegmentMapper; | ||||
| import cn.iocoder.yudao.module.ai.enums.knowledge.AiKnowledgeDocumentStatusEnum; | ||||
| import jakarta.annotation.Resource; | ||||
| import lombok.extern.slf4j.Slf4j; | ||||
| import org.springframework.ai.document.Document; | ||||
| import org.springframework.ai.reader.tika.TikaDocumentReader; | ||||
| import org.springframework.ai.transformer.splitter.TokenTextSplitter; | ||||
| import org.springframework.beans.factory.annotation.Value; | ||||
| import org.springframework.stereotype.Service; | ||||
| import org.springframework.transaction.annotation.Transactional; | ||||
|  | ||||
| import java.util.List; | ||||
|  | ||||
| /** | ||||
|  * AI 知识库-文档 Service 实现类 | ||||
| @@ -12,5 +30,55 @@ import org.springframework.stereotype.Service; | ||||
| @Slf4j | ||||
| public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentService { | ||||
|  | ||||
|     @Resource | ||||
|     private AiKnowledgeDocumentMapper documentMapper; | ||||
|     @Resource | ||||
|     private AiKnowledgeSegmentMapper segmentMapper; | ||||
|  | ||||
|     @Resource | ||||
|     private TokenTextSplitter tokenTextSplitter; | ||||
|  | ||||
|     @Resource | ||||
|     private AiEmbeddingService embeddingService; | ||||
|  | ||||
|     // TODO @xin 临时测试用,后续删 | ||||
|     @Value("classpath:/webapp/test/Fel.pdf") | ||||
|     private org.springframework.core.io.Resource data; | ||||
|  | ||||
|  | ||||
|     @Override | ||||
|     @Transactional(rollbackFor = Exception.class) | ||||
|     public Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO) { | ||||
|         AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class); | ||||
|         documentDO | ||||
|                 //todo | ||||
|                 .setTokens(0).setWordCount(0) | ||||
|                 .setStatus(CommonStatusEnum.ENABLE.getStatus()).setSliceStatus(AiKnowledgeDocumentStatusEnum.SUCCESS.getStatus()); | ||||
|         documentMapper.insert(documentDO); | ||||
|  | ||||
|         TikaDocumentReader loader = new TikaDocumentReader(data); | ||||
|         List<Document> documents = loader.get(); | ||||
|         Long documentId = documentDO.getId(); | ||||
|         if (CollUtil.isEmpty(documents)) { | ||||
|             log.info("文档内容为空"); | ||||
|             return documentId; | ||||
|         } | ||||
|  | ||||
|         // 文档分段 | ||||
|         List<Document> segments = tokenTextSplitter.apply(documents); | ||||
|  | ||||
|         List<AiKnowledgeSegmentDO> segmentDOList = CollectionUtils.convertList(segments, | ||||
|                 segment -> new AiKnowledgeSegmentDO().setContent(segment.getContent()).setDocumentId(documentId) | ||||
|                         //todo | ||||
|                         .setTokens(0).setWordCount(0) | ||||
|                         .setStatus(CommonStatusEnum.ENABLE.getStatus())); | ||||
|  | ||||
|         // 分段内容入库 | ||||
|         segmentMapper.insertBatch(segmentDOList); | ||||
|  | ||||
|         //向量化并存储 | ||||
|         embeddingService.add(segments); | ||||
|  | ||||
|         return documentId; | ||||
|     } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 xiaoxin
					xiaoxin