【解决todo】AI 知识库
This commit is contained in:
parent
6cceab5ba4
commit
ed2296e4c7
|
@ -3,7 +3,7 @@ package cn.iocoder.yudao.module.ai.controller.admin.knowledge;
|
|||
import cn.iocoder.yudao.framework.common.pojo.CommonResult;
|
||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeCreateMyReqVO;
|
||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdateMyReqVO;
|
||||
import cn.iocoder.yudao.module.ai.service.knowledge.AiKnowledgeBaseService;
|
||||
import cn.iocoder.yudao.module.ai.service.knowledge.AiKnowledgeService;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import jakarta.annotation.Resource;
|
||||
|
@ -19,7 +19,7 @@ import static cn.iocoder.yudao.framework.security.core.util.SecurityFrameworkUti
|
|||
public class AiKnowledgeController {
|
||||
|
||||
@Resource
|
||||
private AiKnowledgeBaseService knowledgeBaseService;
|
||||
private AiKnowledgeService knowledgeBaseService;
|
||||
|
||||
@PostMapping("/create-my")
|
||||
@Operation(summary = "创建【我的】知识库")
|
||||
|
|
|
@ -10,15 +10,14 @@ import lombok.Data;
|
|||
|
||||
import java.util.List;
|
||||
|
||||
// TODO @xin:要不把 AiKnowledgeBaseDO 改成 AiKnowledgeDO。感觉 base 后缀,感觉有点奇怪(让人以为是基类)。然后,我们很多地方的外键编号,都是 knowledgeId
|
||||
/**
|
||||
* AI 知识库 DO
|
||||
*
|
||||
* @author xiaoxin
|
||||
*/
|
||||
@TableName(value = "ai_knowledge_base", autoResultMap = true)
|
||||
@TableName(value = "ai_knowledge", autoResultMap = true)
|
||||
@Data
|
||||
public class AiKnowledgeBaseDO extends BaseDO {
|
||||
public class AiKnowledgeDO extends BaseDO {
|
||||
|
||||
/**
|
||||
* 编号
|
|
@ -24,7 +24,7 @@ public class AiKnowledgeDocumentDO extends BaseDO {
|
|||
/**
|
||||
* 知识库编号
|
||||
*
|
||||
* 关联 {@link AiKnowledgeBaseDO#getId()}
|
||||
* 关联 {@link AiKnowledgeDO#getId()}
|
||||
*/
|
||||
private Long knowledgeId;
|
||||
/**
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
package cn.iocoder.yudao.module.ai.dal.mysql.knowledge;
|
||||
|
||||
import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX;
|
||||
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeBaseDO;
|
||||
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDO;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
|
||||
/**
|
||||
|
@ -10,5 +10,5 @@ import org.apache.ibatis.annotations.Mapper;
|
|||
* @author xiaoxin
|
||||
*/
|
||||
@Mapper
|
||||
public interface AiKnowledgeBaseMapper extends BaseMapperX<AiKnowledgeBaseDO> {
|
||||
public interface AiKnowledgeBaseMapper extends BaseMapperX<AiKnowledgeDO> {
|
||||
}
|
||||
|
|
|
@ -1,27 +0,0 @@
|
|||
package cn.iocoder.yudao.module.ai.service.knowledge;
|
||||
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.vectorstore.SearchRequest;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* AI 嵌入 Service 接口
|
||||
*
|
||||
* @author xiaoxin
|
||||
*/
|
||||
public interface AiEmbeddingService {
|
||||
|
||||
/**
|
||||
* 向量化文档并存储
|
||||
*/
|
||||
void add(List<Document> documents);
|
||||
|
||||
/**
|
||||
* 相似查询
|
||||
*
|
||||
* @param request 查询实体
|
||||
*/
|
||||
List<Document> similaritySearch(SearchRequest request);
|
||||
|
||||
}
|
|
@ -1,35 +0,0 @@
|
|||
package cn.iocoder.yudao.module.ai.service.knowledge;
|
||||
|
||||
import jakarta.annotation.Resource;
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.vectorstore.RedisVectorStore;
|
||||
import org.springframework.ai.vectorstore.SearchRequest;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
// TODO @xin:是不是不用 AiEmbeddingServiceImpl,直接 vectorStore 注入到需要的地方就好啦。通过 KnowledgeDocumentService 返回就好。
|
||||
/**
|
||||
* AI 嵌入 Service 实现类
|
||||
*
|
||||
* @author xiaoxin
|
||||
*/
|
||||
@Service
|
||||
public class AiEmbeddingServiceImpl implements AiEmbeddingService {
|
||||
|
||||
@Resource
|
||||
private RedisVectorStore vectorStore;
|
||||
|
||||
@Override
|
||||
// @Async
|
||||
// TODO xiaoxin 报错先注释
|
||||
public void add(List<Document> documents) {
|
||||
vectorStore.add(documents);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Document> similaritySearch(SearchRequest request) {
|
||||
return vectorStore.similaritySearch(request);
|
||||
}
|
||||
|
||||
}
|
|
@ -14,8 +14,9 @@ import jakarta.annotation.Resource;
|
|||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.reader.tika.TikaDocumentReader;
|
||||
import org.springframework.ai.tokenizer.JTokkitTokenCountEstimator;
|
||||
import org.springframework.ai.tokenizer.TokenCountEstimator;
|
||||
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
|
||||
import org.springframework.ai.vectorstore.RedisVectorStore;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
@ -39,52 +40,49 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
|
|||
|
||||
@Resource
|
||||
private TokenTextSplitter tokenTextSplitter;
|
||||
|
||||
@Resource
|
||||
private AiEmbeddingService embeddingService;
|
||||
private TokenCountEstimator TOKEN_COUNT_ESTIMATOR;
|
||||
@Resource
|
||||
private RedisVectorStore vectorStore;
|
||||
|
||||
// TODO @xin:@Resource 注入
|
||||
private static final JTokkitTokenCountEstimator TOKEN_COUNT_ESTIMATOR = new JTokkitTokenCountEstimator();
|
||||
|
||||
// TODO xiaoxin 临时测试用,后续删
|
||||
@Value("classpath:/webapp/test/Fel.pdf")
|
||||
private org.springframework.core.io.Resource data;
|
||||
|
||||
// TODO 芋艿:需要 review 下,代码格式;
|
||||
// TODO @xin:最好有 1、/2、/3 这种,让代码更有层次感
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO) {
|
||||
// TODO xiaoxin 后续从 url 加载
|
||||
TikaDocumentReader loader = new TikaDocumentReader(data);
|
||||
// 加载文档
|
||||
// 1.1 加载文档
|
||||
List<Document> documents = loader.get();
|
||||
Document document = CollUtil.getFirst(documents);
|
||||
// TODO @xin:是不是不存在,就抛出异常呀;厚泽 return 呀;
|
||||
// TODO 芋艿 文档层面有没有可能会比较大,这两个字段是否可以从分段表计算得出?回复:先直接算;
|
||||
Integer tokens = Objects.nonNull(document) ? TOKEN_COUNT_ESTIMATOR.estimate(document.getContent()) : 0;
|
||||
Integer wordCount = Objects.nonNull(document) ? document.getContent().length() : 0;
|
||||
|
||||
AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class)
|
||||
.setTokens(tokens).setWordCount(wordCount)
|
||||
.setStatus(CommonStatusEnum.ENABLE.getStatus()).setSliceStatus(AiKnowledgeDocumentStatusEnum.SUCCESS.getStatus());
|
||||
// 文档记录入库
|
||||
// 1.2 文档记录入库
|
||||
documentMapper.insert(documentDO);
|
||||
Long documentId = documentDO.getId();
|
||||
if (CollUtil.isEmpty(documents)) {
|
||||
return documentId;
|
||||
}
|
||||
|
||||
// 文档分段
|
||||
// 2.1 文档分段
|
||||
List<Document> segments = tokenTextSplitter.apply(documents);
|
||||
// 分段内容入库
|
||||
// 2.2 分段内容入库
|
||||
List<AiKnowledgeSegmentDO> segmentDOList = CollectionUtils.convertList(segments,
|
||||
segment -> new AiKnowledgeSegmentDO().setContent(segment.getContent()).setDocumentId(documentId)
|
||||
.setTokens(TOKEN_COUNT_ESTIMATOR.estimate(segment.getContent())).setWordCount(segment.getContent().length())
|
||||
.setStatus(CommonStatusEnum.ENABLE.getStatus()));
|
||||
segmentMapper.insertBatch(segmentDOList);
|
||||
// 向量化并存储
|
||||
embeddingService.add(segments);
|
||||
// 3 向量化并存储
|
||||
vectorStore.add(segments);
|
||||
return documentId;
|
||||
}
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdat
|
|||
*
|
||||
* @author xiaoxin
|
||||
*/
|
||||
public interface AiKnowledgeBaseService {
|
||||
public interface AiKnowledgeService {
|
||||
|
||||
/**
|
||||
* 创建【我的】知识库
|
|
@ -1,12 +1,11 @@
|
|||
package cn.iocoder.yudao.module.ai.service.knowledge;
|
||||
|
||||
import cn.hutool.core.lang.Assert;
|
||||
import cn.hutool.core.util.ObjUtil;
|
||||
import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
|
||||
import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
|
||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeCreateMyReqVO;
|
||||
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdateMyReqVO;
|
||||
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeBaseDO;
|
||||
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDO;
|
||||
import cn.iocoder.yudao.module.ai.dal.dataobject.model.AiChatModelDO;
|
||||
import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeBaseMapper;
|
||||
import cn.iocoder.yudao.module.ai.service.model.AiChatModelService;
|
||||
|
@ -24,7 +23,7 @@ import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.KNOWLEDGE_NOT_
|
|||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
|
||||
public class AiKnowledgeServiceImpl implements AiKnowledgeService {
|
||||
|
||||
@Resource
|
||||
private AiChatModelService chatModalService;
|
||||
|
@ -34,42 +33,34 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
|
|||
|
||||
@Override
|
||||
public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
|
||||
// TODO @xin:貌似直接调用 chatModalService.validateChatModel(id) 完事,不用搞个方法
|
||||
// 1. 校验模型配置
|
||||
AiChatModelDO model = validateChatModel(createReqVO.getModelId());
|
||||
AiChatModelDO model = chatModalService.validateChatModel(createReqVO.getModelId());
|
||||
|
||||
// 2. 插入知识库
|
||||
// TODO @xin:不用 DO 结尾
|
||||
AiKnowledgeBaseDO knowledgeBaseDO = BeanUtils.toBean(createReqVO, AiKnowledgeBaseDO.class)
|
||||
AiKnowledgeDO knowledgeBase = BeanUtils.toBean(createReqVO, AiKnowledgeDO.class)
|
||||
.setModel(model.getModel()).setUserId(userId).setStatus(CommonStatusEnum.ENABLE.getStatus());
|
||||
knowledgeBaseMapper.insert(knowledgeBaseDO);
|
||||
return knowledgeBaseDO.getId();
|
||||
knowledgeBaseMapper.insert(knowledgeBase);
|
||||
return knowledgeBase.getId();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateKnowledgeMy(AiKnowledgeUpdateMyReqVO updateReqVO, Long userId) {
|
||||
// 1.1 校验知识库存在
|
||||
AiKnowledgeBaseDO knowledgeBaseDO = validateKnowledgeExists(updateReqVO.getId());
|
||||
AiKnowledgeDO knowledgeBaseDO = validateKnowledgeExists(updateReqVO.getId());
|
||||
if (ObjUtil.notEqual(knowledgeBaseDO.getUserId(), userId)) {
|
||||
throw exception(KNOWLEDGE_NOT_EXISTS);
|
||||
}
|
||||
// 1.2 校验模型配置
|
||||
AiChatModelDO model = validateChatModel(updateReqVO.getModelId());
|
||||
AiChatModelDO model = chatModalService.validateChatModel(updateReqVO.getModelId());
|
||||
|
||||
// 2. 更新知识库
|
||||
AiKnowledgeBaseDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeBaseDO.class);
|
||||
AiKnowledgeDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeDO.class);
|
||||
updateDO.setModel(model.getModel());
|
||||
knowledgeBaseMapper.updateById(updateDO);
|
||||
}
|
||||
|
||||
private AiChatModelDO validateChatModel(Long id) {
|
||||
AiChatModelDO model = chatModalService.validateChatModel(id);
|
||||
Assert.notNull(model, "未找到对应嵌入模型");
|
||||
return model;
|
||||
}
|
||||
|
||||
public AiKnowledgeBaseDO validateKnowledgeExists(Long id) {
|
||||
AiKnowledgeBaseDO knowledgeBase = knowledgeBaseMapper.selectById(id);
|
||||
public AiKnowledgeDO validateKnowledgeExists(Long id) {
|
||||
AiKnowledgeDO knowledgeBase = knowledgeBaseMapper.selectById(id);
|
||||
if (knowledgeBase == null) {
|
||||
throw exception(KNOWLEDGE_NOT_EXISTS);
|
||||
}
|
|
@ -13,6 +13,8 @@ import lombok.extern.slf4j.Slf4j;
|
|||
import org.springframework.ai.autoconfigure.vectorstore.redis.RedisVectorStoreProperties;
|
||||
import org.springframework.ai.document.MetadataMode;
|
||||
import org.springframework.ai.embedding.EmbeddingModel;
|
||||
import org.springframework.ai.tokenizer.JTokkitTokenCountEstimator;
|
||||
import org.springframework.ai.tokenizer.TokenCountEstimator;
|
||||
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
|
||||
import org.springframework.ai.transformers.TransformersEmbeddingModel;
|
||||
import org.springframework.ai.vectorstore.RedisVectorStore;
|
||||
|
@ -90,7 +92,7 @@ public class YudaoAiAutoConfiguration {
|
|||
}
|
||||
|
||||
/**
|
||||
* 我们启动有加载很多 Embedding 模型,不晓得取哪个好,先 new 个 TransformersEmbeddingModel 跑
|
||||
* TODO @xin 抽离出去,根据具体模型走
|
||||
*/
|
||||
@Bean
|
||||
@Lazy // TODO 芋艿:临时注释,避免无法启动
|
||||
|
@ -114,4 +116,10 @@ public class YudaoAiAutoConfiguration {
|
|||
return new TokenTextSplitter(500, 100, 5, 10000, true);
|
||||
}
|
||||
|
||||
@Bean
|
||||
@Lazy // TODO 芋艿:临时注释,避免无法启动
|
||||
public TokenCountEstimator tokenCountEstimator() {
|
||||
return new JTokkitTokenCountEstimator();
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue