🎯 学习目标:学会知识库构建:文档索引、RAG 检索、知识更新
⏱️ 阅读时间:约 12 分钟
💡 前置要求:无
📚 知识库能做什么?
核心功能总览
| 文档索引 | ||
| RAG 检索 | ||
| 知识更新 | ||
| 知识图谱 |
📄 文档索引
创建知识库
使用方式
# 创建知识库"创建一个产品知识库"# 导入文档"把产品文档目录导入到知识库"技能实现
module.exports = {name: 'knowledge-base-create',asyncexecute({ name, description, source }) {try {// 1. 创建知识库const kb = {id: generateId(), name, description,createdAt: newDate(),documents: [] };// 2. 导入文档if (source) {const documents = awaitimportDocuments(source); kb.documents = documents; }// 3. 建立索引awaitbuildIndex(kb);// 4. 保存知识库awaitsaveKnowledgeBase(kb);return`✅ 知识库已创建📚 名称:${name}📄 文档数:${kb.documents.length}🔍 索引状态:已完成 `.trim(); } catch (error) {console.error('创建知识库失败:', error);throw error; } }};// 导入文档asyncfunctionimportDocuments(source) {const documents = [];// 从目录导入if (source.type === 'directory') {const files = awaitlistFiles(source.path, '*.md');for (const file of files) {const content = awaitreadFile(file); documents.push({id: generateId(),title: extractTitle(content), content,source: file,updatedAt: newDate() }); } }// 从飞书文档导入if (source.type === 'feishu') {const docs = awaitfeishu_search_doc_wiki('search', {query: source.query });for (const doc of docs) {const content = awaitfeishu_fetch_doc({doc_id: doc.obj_token }); documents.push({id: generateId(),title: doc.title,content: content.markdown,source: doc.url,updatedAt: newDate() }); } }return documents;}文档切片
为什么需要切片?
技能实现
module.exports = {name: 'document-chunking',asyncexecute({ document, strategy = 'semantic' }) {const chunks = [];if (strategy === 'semantic') {// 语义切片:按段落和主题 chunks.push(...semanticChunking(document.content)); }if (strategy === 'fixed') {// 固定长度切片 chunks.push(...fixedSizeChunking(document.content, 500)); }if (strategy === 'recursive') {// 递归切片:先按章节,再按段落 chunks.push(...recursiveChunking(document.content)); }// 为每个切片生成摘要for (const chunk of chunks) { chunk.summary = awaitgenerateSummary(chunk.content); chunk.keywords = awaitextractKeywords(chunk.content); }return chunks; }};// 语义切片functionsemanticChunking(content) {const chunks = [];const sections = content.split(/\n#{2,}\s+/); // 按章节分割for (const section of sections) {const paragraphs = section.split('\n\n');let currentChunk = '';for (const para of paragraphs) {if (currentChunk.length + para.length > 1000) {// 当前块已满,创建新块 chunks.push({content: currentChunk.trim(),type: 'section' }); currentChunk = para; } else { currentChunk += '\n\n' + para; } }if (currentChunk.trim()) { chunks.push({content: currentChunk.trim(),type: 'section' }); } }return chunks;}向量化
module.exports = {name: 'vector-embedding',asyncexecute({ chunks, model = 'text-embedding-3-small' }) {const embeddings = [];for (const chunk of chunks) {// 调用嵌入模型const embedding = awaitcreateEmbedding({ model,input: chunk.content }); embeddings.push({ ...chunk,embedding: embedding.vector,createdAt: newDate() }); }// 存储到向量数据库awaitstoreEmbeddings(embeddings);return`✅ 已向量化 ${embeddings.length} 个文档切片`; }};🔍 RAG 检索 ⭐
RAG 检索流程图

实现 RAG
module.exports = {name: 'rag-query',asyncexecute({ question, kbId, topK = 5 }) {try {// 1. 向量化问题const questionEmbedding = awaitcreateEmbedding({model: 'text-embedding-3-small',input: question });// 2. 检索相关文档const results = awaitvectorSearch({embedding: questionEmbedding.vector, kbId, topK });// 3. 构建上下文const context = buildContext(results);// 4. 生成回答const answer = awaitgenerateAnswer({ question, context,instructions: '基于以下信息回答问题,如果信息不足请说明' });// 5. 添加引用const answerWithCitations = addCitations(answer, results);return answerWithCitations; } catch (error) {console.error('RAG 检索失败:', error);throw error; } }};functionbuildContext(results) {let context = '相关文档:\n\n'; results.forEach((result, index) => { context += `[${index + 1}] ${result.title}\n`; context += `${result.content}\n\n`; context += `来源:${result.source}\n\n`; });return context;}functionaddCitations(answer, results) {let result = answer + '\n\n**参考资料**:\n'; results.forEach((r, i) => { result += `${i + 1}. [${r.title}](${r.source})\n`; });return result;}💡 实用场景
场景 1:产品知识问答
module.exports = {name: 'product-qa',asyncinit() {try {// 1. 创建产品知识库const productKB = awaitcreateKnowledgeBase({name: '产品知识库',sources: [ { type: 'directory', path: '~/docs/product' }, { type: 'feishu', query: '产品文档' } ] });// 2. 监听问题awaitsubscribeEvent('message.receive', async (event) => {const message = event.message;const text = message.content.text;// 判断是否是产品相关问题if (isProductQuestion(text)) {// RAG 检索const answer = awaitragQuery({question: text,kbId: productKB.id,topK: 3 });awaitreply(message, answer); } });return'✅ 产品知识问答已启用'; } catch (error) {console.error('产品知识问答初始化失败:', error);throw error; } }};functionisProductQuestion(text) {const patterns = [/产品.*功能/,/怎么.*使用/,/支持.*吗/,/价格.*多少/,/如何.*配置/ ];return patterns.some(p => p.test(text));}场景 2:技术支持助手
module.exports = {name: 'tech-support',asyncinit() {try {// 1. 创建技术知识库const techKB = awaitcreateKnowledgeBase({name: '技术知识库',sources: [ { type: 'directory', path: '~/docs/tech' }, { type: 'feishu', query: '技术方案' }, { type: 'github', repo: 'openclaw/openclaw' } ] });awaitsubscribeEvent('message.receive', async (event) => {const message = event.message;const text = message.content.text;// 判断是否是技术问题if (isTechQuestion(text)) {// 检索知识库const answer = awaitragQuery({question: text,kbId: techKB.id,topK: 5 });// 如果置信度低,转人工if (answer.confidence < 0.7) {awaitforwardToUser(message, getTechLeadId());awaitautoReply(message, '这个问题比较复杂,已转给技术负责人处理');return; }awaitreply(message, answer); } });return'✅ 技术支持助手已启用'; } catch (error) {console.error('技术支持助手初始化失败:', error);throw error; } }};🔄 知识更新
自动同步
module.exports = {name: 'knowledge-sync',asyncinit() {try {// 定时同步:每天凌晨 3 点awaitcreateCronJob({cron: '0 3 * * *',task: async () => {const knowledgeBases = awaitlistKnowledgeBases();for (const kb of knowledgeBases) {console.log(`同步知识库:${kb.name}`);// 检查文档变更const changes = awaitcheckDocumentChanges(kb.sources);if (changes.added > 0 || changes.updated > 0 || changes.deleted > 0) {// 更新知识库awaitupdateKnowledgeBase(kb.id, changes);// 通知管理员awaitnotifyAdmin(`知识库同步完成📚 ${kb.name}➕ 新增:${changes.added}✏️ 更新:${changes.updated}🗑️ 删除:${changes.deleted} `.trim()); } } } });return'✅ 知识同步已启用,每天凌晨 3 点执行'; } catch (error) {console.error('知识同步初始化失败:', error);throw error; } }};版本管理
module.exports = {name: 'knowledge-versioning',asyncexecute({ kbId, action }) {try {if (action === 'snapshot') {// 创建快照const kb = awaitgetKnowledgeBase(kbId);const snapshot = { kbId,version: generateVersion(),documents: kb.documents,createdAt: newDate() };awaitsaveSnapshot(snapshot);return`✅ 已创建快照:${snapshot.version}`; }if (action === 'rollback') {// 回滚到指定版本const snapshot = awaitgetSnapshot(kbId, action.version);awaitrestoreKnowledgeBase(snapshot);return`✅ 已回滚到版本:${snapshot.version}`; }if (action === 'list') {// 查看版本历史const versions = awaitlistVersions(kbId);returnformatVersions(versions); } } catch (error) {console.error('知识版本管理失败:', error);throw error; } }};📊 知识库质量评估
module.exports = {name: 'knowledge-quality',asyncexecute({ kbId }) {try {const kb = awaitgetKnowledgeBase(kbId);// 分析文档质量const quality = {completeness: awaitcheckCompleteness(kb),accuracy: awaitcheckAccuracy(kb),freshness: awaitcheckFreshness(kb),coverage: awaitcheckCoverage(kb) };// 生成改进建议const suggestions = generateSuggestions(quality);return`📈 **知识库质量报告**完整性:${quality.completeness.score}/100准确性:${quality.accuracy.score}/100时效性:${quality.freshness.score}/100覆盖率:${quality.coverage.score}/100💡 **改进建议**${suggestions.join('\n')} `.trim(); } catch (error) {console.error('质量评估失败:', error);throw error; } }};// 检查完整性asyncfunctioncheckCompleteness(kb) {// 检查是否有必要的文档类别const categories = ['产品文档', '技术文档', 'FAQ', '最佳实践'];const existingCategories = newSet(kb.documents.map(d => d.category));const missingCategories = categories.filter(c => !existingCategories.has(c));const score = ((categories.length - missingCategories.length) / categories.length) * 100;return { score,suggestions: missingCategories.map(c =>`补充${c}`) };}// 检查时效性asyncfunctioncheckFreshness(kb) {const now = Date.now();const thirtyDays = 30 * 24 * 60 * 60 * 1000;const outdatedDocs = kb.documents.filter(d => now - newDate(d.updatedAt).getTime() > thirtyDays );const score = ((kb.documents.length - outdatedDocs.length) / kb.documents.length) * 100;return { score,suggestions: outdatedDocs.length > 0 ? ['更新过期文档'] : [] };}📊 知识库管理
查看统计
module.exports = {name: 'knowledge-stats',asyncexecute({ kbId }) {try {const kb = awaitgetKnowledgeBase(kbId);const stats = {totalDocuments: kb.documents.length,totalChunks: kb.chunks?.length || 0,lastUpdated: kb.updatedAt,totalQueries: kb.queryCount || 0,avgConfidence: kb.avgConfidence || 0 };return`📊 **知识库统计**📄 文档数:${stats.totalDocuments}🔹 切片数:${stats.totalChunks}🔍 查询次数:${stats.totalQueries}📈 平均置信度:${(stats.avgConfidence * 100).toFixed(1)}%🕐 最后更新:${formatDate(stats.lastUpdated)} `.trim(); } catch (error) {console.error('获取统计失败:', error);throw error; } }};✅ 学完这篇你能做什么
学完 Day 19,你将能够:
✅ 创建和管理知识库 ✅ 实现 RAG 检索 ✅ 配置知识自动同步 ✅ 进行知识库质量评估 ✅ 处理知识库相关错误
🔜 下篇预告
Day 20:API 集成:调用外部 API、Webhook、数据同步
🌐 外部 API 调用 🔔 Webhook 配置 🔄 数据同步方法
💬 互动环节
你想让 AI 助理学习什么领域的知识?留言分享!
公众号:OpenClaw 研习社系列:OpenClaw 30 天入门到精通作者:OpenClaw 研习社
夜雨聆风