QwenPaw源码解析系列(四):Skills系统与技能扩展机制
前言
Skills(技能)系统是QwenPaw实现能力扩展的核心机制。通过Skills,用户可以自定义智能体的能力,实现从文件处理到API调用等各种功能。今天让我们深入解析Skills系统的设计与实现。
Skills架构概览
QwenPaw的Skills系统采用插件化设计:
Skills/
├── browser_cdp-en/ # 浏览器控制技能
├── docx-en/ # Word文档处理
├── excel-en/ # Excel处理
├── pdf-en/ # PDF处理
├── news_digest_en/ # 新闻摘要
├── scheduled_task_en/ # 定时任务
└── ...更多技能
每个技能都是一个独立目录,具有标准化的结构。
技能目录结构
一个典型的Skill目录结构如下:
skill_name/
├── config.json # 技能配置
├── prompts.py # 提示词模块
├── scripts/ # 脚本目录
│ ├── __init__.py
│ ├── main_script.py # 主脚本
│ └── helpers/
│ ├── __init__.py
│ └── utils.py # 工具函数
├── md_files/ # Markdown说明文件
│ ├── en/
│ │ └── README.md
│ └── zh/
│ └── README.md
└── requirements.txt # 依赖(可选)
config.json 配置示例
{
"name": "pdf_process",
"version": "1.0.0",
"description": "PDF文档处理技能",
"channels": ["console", "feishu", "dingtalk"],
"tools": ["read_pdf", "extract_text", "merge_pdf"],
"env": {
"PDF_MAX_PAGES": "100"
},
"security": {
"scan_required": true,
"allowed_operations": ["read", "extract"]
}
}
SkillsManager:技能管理器
SkillsManager是技能系统的核心管理组件:
class SkillsManager:
"""技能管理器,负责加载和管理所有技能"""
def __init__(self, workspace_dir: str):
self.workspace_dir = workspace_dir
self.skills_dir = get_workspace_skills_dir(workspace_dir)
self._skills: dict[str, SkillConfig] = {}
async def load_skills(self) -> None:
"""加载工作目录中的所有技能"""
for skill_dir in self.skills_dir.iterdir():
if skill_dir.is_dir():
await self._load_skill(skill_dir)
async def _load_skill(self, skill_dir: Path) -> None:
"""加载单个技能"""
config = self._load_config(skill_dir)
self._skills[config.name] = config
# 安全扫描
if config.security.scan_required:
await self._scan_skill_security(skill_dir)
def get_skill(self, name: str) -> SkillConfig | None:
"""获取指定技能配置"""
return self._skills.get(name)
技能加载流程
1. 初始化检查
def ensure_skills_initialized(workspace_dir: Path) -> None:
"""确保技能目录已初始化"""
skills_dir = get_workspace_skills_dir(workspace_dir)
if not skills_dir.exists():
# 创建默认技能目录
skills_dir.mkdir(parents=True, exist_ok=True)
# 复制内置技能
_install_default_skills(skills_dir)
2. 渠道特定技能解析
def resolve_effective_skills(
workspace_dir: Path,
channel_name: str,
) -> list[str]:
"""根据渠道解析启用的技能列表"""
skills_dir = get_workspace_skills_dir(workspace_dir)
effective_skills = []
for skill_dir in skills_dir.iterdir():
config = _load_skill_config(skill_dir)
# 检查渠道是否匹配
if config.channels and channel_name not in config.channels:
continue
effective_skills.append(config.name)
return effective_skills
3. 技能注册到工具包
def _register_skills(self, toolkit: Toolkit) -> None:
"""将技能注册到工具包"""
workspace_dir = self._workspace_dir or WORKING_DIR
channel_name = self._request_context.get("channel", "console")
effective_skills = resolve_effective_skills(workspace_dir, channel_name)
for skill_name in effective_skills:
skill_dir = working_skills_dir / skill_name
if skill_dir.exists():
try:
toolkit.register_agent_skill(str(skill_dir))
logger.debug("Registered skill: %s", skill_name)
except Exception as e:
logger.error("Failed to register skill '%s': %s", skill_name, e)
技能执行机制
技能调用流程
用户请求 → 技能检测 → 参数解析 → 脚本执行 → 结果处理
Toolkit.register_agent_skill
class Toolkit:
"""工具包,负责工具和技能的管理"""
def register_agent_skill(self, skill_dir: str) -> None:
"""注册一个技能到工具包"""
skill_config = self._load_skill_config(skill_dir)
# 加载技能脚本
sys.path.insert(0, os.path.join(skill_dir, "scripts"))
module = importlib.import_module(skill_config.module_name)
# 获取技能函数
for tool_name in skill_config.tools:
tool_func = getattr(module, tool_name, None)
if tool_func:
self.register_tool_function(
tool_func,
namesake_strategy="skip",
)
技能函数模板
# skills/pdf-en/scripts/read_pdf.py
from agentscope.tool import ToolResponse
from .helpers.pdf_utils import parse_pdf
def read_pdf(file_path: str, max_pages: int = 10) -> ToolResponse:
"""
读取PDF文件内容
Args:
file_path: PDF文件路径
max_pages: 最大读取页数
Returns:
ToolResponse: 包含PDF内容的响应
"""
try:
content = parse_pdf(file_path, max_pages)
return ToolResponse(
success=True,
content=content,
)
except Exception as e:
return ToolResponse(
success=False,
error=str(e),
)
安全扫描机制
Skills系统在安装技能前会进行安全扫描:
class SkillSecurityScanner:
"""技能安全扫描器"""
async def scan(self, skill_dir: Path) -> ScanResult:
"""扫描技能目录,检测潜在安全风险"""
results = []
# 1. 提示词注入检测
if await self._check_prompt_injection(skill_dir):
results.append(SecurityIssue(
type="prompt_injection",
severity="high",
message="检测到潜在的提示词注入风险"
))
# 2. 命令注入检测
if await self._check_command_injection(skill_dir):
results.append(SecurityIssue(
type="command_injection",
severity="critical",
message="检测到命令注入风险"
))
# 3. 硬编码密钥检测
if await self._check_hardcoded_secrets(skill_dir):
results.append(SecurityIssue(
type="hardcoded_secret",
severity="medium",
message="检测到硬编码的密钥或令牌"
))
# 4. 数据外泄检测
if await self._check_data_exfiltration(skill_dir):
results.append(SecurityIssue(
type="data_exfiltration",
severity="high",
message="检测到潜在的数据外泄风险"
))
return ScanResult(issues=results)
async def _check_prompt_injection(self, skill_dir: Path) -> bool:
"""检测提示词注入"""
# 扫描脚本中是否包含可疑的提示词模式
suspicious_patterns = [
"ignore previous instructions",
"disregard system prompt",
"you are now",
]
for py_file in skill_dir.rglob("*.py"):
content = py_file.read_text()
for pattern in suspicious_patterns:
if pattern.lower() in content.lower():
return True
return False
async def _check_command_injection(self, skill_dir: Path) -> bool:
"""检测命令注入"""
for py_file in skill_dir.rglob("*.py"):
content = py_file.read_text()
# 检查是否有危险的shell调用
if re.search(r"os\.system|subprocess\.(call|run|Popen)", content):
# 进一步检查是否有用户输入拼接
if "input" in content or "request" in content:
return True
return False
技能配置环境覆盖
@contextmanager
def apply_skill_config_env_overrides(
workspace_dir: Path,
channel_name: str,
):
"""应用技能配置中的环境变量覆盖"""
skills_dir = get_workspace_skills_dir(workspace_dir)
overrides = {}
for skill_dir in skills_dir.iterdir():
config = _load_skill_config(skill_dir)
if config.env:
overrides.update(config.env)
# 保存原始环境变量
original_env = {}
for key, value in overrides.items():
if key in os.environ:
original_env[key] = os.environ[key]
os.environ[key] = value
try:
yield
finally:
# 恢复原始环境变量
for key in overrides:
if key in original_env:
os.environ[key] = original_env[key]
elif key in os.environ:
del os.environ[key]
内置技能详解
1. 浏览器控制技能 (browser_cdp)
# 使用Chrome DevTools Protocol控制浏览器
async def browser_navigate(url: str) -> ToolResponse:
"""导航到指定URL"""
...
async def browser_screenshot() -> ToolResponse:
"""截取当前页面截图"""
...
async def browser_click(selector: str) -> ToolResponse:
"""点击页面元素"""
...
2. 文档处理技能
# PDF处理
def read_pdf(file_path: str) -> ToolResponse:
"""读取PDF内容"""
def extract_images_from_pdf(pdf_path: str) -> ToolResponse:
"""从PDF中提取图片"""
# Word处理
def read_docx(file_path: str) -> ToolResponse:
"""读取Word文档"""
def edit_docx(file_path: str, operations: list) -> ToolResponse:
"""编辑Word文档"""
3. 定时任务技能
def schedule_task(
cron_expression: str,
task_name: str,
task_script: str,
) -> ToolResponse:
"""创建定时任务"""
def list_scheduled_tasks() -> ToolResponse:
"""列出所有定时任务"""
def cancel_task(task_id: str) -> ToolResponse:
"""取消定时任务"""
技能市场与分发
QwenPaw支持技能的导入导出,方便用户分享和复用:
def export_skill(skill_name: str, output_path: Path) -> None:
"""导出技能为压缩包"""
skill_dir = get_workspace_skills_dir() / skill_name
shutil.make_archive(output_path, "zip", skill_dir)
def import_skill(archive_path: Path) -> None:
"""从压缩包导入技能"""
with tempfile.TemporaryDirectory() as tmpdir:
shutil.unpack_archive(archive_path, tmpdir)
# 安全扫描
scanner = SkillSecurityScanner()
result = await scanner.scan(Path(tmpdir))
if result.is_safe():
# 复制到技能目录
...
总结
QwenPaw的Skills系统设计体现了以下核心原则:
通过这套Skills系统,QwenPaw具备了极强的扩展能力,用户可以根据自己的需求定制智能体的功能。
往期回顾:
下期预告:
如果对你有帮助,欢迎点赞、在看!
夜雨聆风