模拟大模型API服务端,减少openclaw等智能体测试时的 token 消耗
新建文件
notepad E:\mock_llm_server.py
python E:\mock_llm_server.py --port 8000 --mode smart
openclaw关键配置如下:"baseUrl": "http://127.0.0.1:8000/v1","api": "openai-completions","apiKey": "mock-api-key","id": "mock-gpt-4","name": "Mock GPT-4 (本地仿真)","contextWindow": 128000,"maxTokens": 4096,
服务程序如下:
"""Mock LLM Server - 模拟大模型服务端,减少测试时的 token 消耗功能:1. 提供 OpenAI 兼容的 /v1/chat/completions 接口2. 支持流式和非流式响应3. 可配置回复模式:固定回复、模板回复、随机回复4. 记录请求和响应日志启动方式:python mock_llm_server.py [--port 8000] [--mode fixed|template|random]"""import argparseimport jsonimport loggingimport osimport randomimport reimport sysimport uuidfrom datetime import datetimefrom http.server import HTTPServer, BaseHTTPRequestHandlerfrom urllib.parse import urlparse, parse_qsimport threading# 配置日志logging.basicConfig(level=logging.INFO,format='%(asctime)s - %(levelname)s - %(message)s',handlers=[logging.StreamHandler(sys.stdout),logging.FileHandler('mock_llm.log', encoding='utf-8')])logger = logging.getLogger(__name__)# 默认回复模板DEFAULT_RESPONSES = ["好的,我明白了。","收到,我会处理的。","明白了,稍等一下。","了解,我来帮你完成。","好的,开始执行。","收到请求,正在处理。","明白,我会尽快完成。","好的,这个任务我接下了。","了解,需要一点时间。","收到,正在操作中。",]# 工具调用模板DEFAULT_TOOL_CALLS = [{"type": "function","id": "call_" + str(uuid.uuid4())[:8],"function": {"name": "echo","arguments": json.dumps({"message": "这是模拟的工具调用回复"}, ensure_ascii=False)}}]class MockLLMHandler(BaseHTTPRequestHandler):"""处理 OpenAI 兼容 API 请求"""# 类变量:共享配置server_version = "MockLLM/1.0"def log_message(self, format, *args):"""自定义日志格式"""logger.info(f"{self.client_address[0]}:{self.client_address[1]} - {format % args}")def send_json_response(self, data, status=200):"""发送 JSON 响应"""self.send_response(status)self.send_header('Content-Type', 'application/json')self.send_header('Access-Control-Allow-Origin', '*')self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')self.send_header('Access-Control-Allow-Headers', 'Content-Type, Authorization')self.end_headers()if isinstance(data, str):self.wfile.write(data.encode('utf-8'))else:self.wfile.write(json.dumps(data, ensure_ascii=False).encode('utf-8'))def do_OPTIONS(self):"""处理 CORS 预检请求"""self.send_response(200)self.send_header('Access-Control-Allow-Origin', '*')self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')self.send_header('Access-Control-Allow-Headers', 'Content-Type, Authorization')self.end_headers()def do_GET(self):"""处理 GET 请求"""parsed = urlparse(self.path)# 根路径 - 显示状态if parsed.path == '/' or parsed.path == '/health':self.send_json_response({"status": "ok","service": "Mock LLM Server","version": "1.0.0","mode": self.server.mode,"timestamp": datetime.now().isoformat()})# 模型列表elif parsed.path == '/v1/models':self.send_json_response({"object": "list","data": [{"id": "mock-gpt-4","object": "model","created": 1700000000,"owned_by": "mock","permission": [],"root": "mock-gpt-4","parent": None},{"id": "mock-claude-3","object": "model","created": 1700000001,"owned_by": "mock","permission": [],"root": "mock-claude-3","parent": None}]})else:self.send_json_response({"error": "Not found"}, 404)def do_POST(self):"""处理 POST 请求"""parsed = urlparse(self.path)# 读取请求体content_length = int(self.headers.get('Content-Length', 0))body = self.rfile.read(content_length).decode('utf-8')try:request_data = json.loads(body) if body else {}except json.JSONDecodeError:self.send_json_response({"error": "Invalid JSON"}, 400)return# 聊天完成接口if parsed.path == '/v1/chat/completions':self.handle_chat_completions(request_data)# 其他接口else:self.send_json_response({"error": "Not found"}, 404)def handle_chat_completions(self, request_data):"""处理聊天完成请求"""messages = request_data.get('messages', [])stream = request_data.get('stream', False)model = request_data.get('model', 'mock-gpt-4')# 提取用户消息用于日志user_message = ""for msg in reversed(messages):if msg.get('role') == 'user':content = msg.get('content', '')# 处理多模态消息:可能是字符串或数组if isinstance(content, list):# 多模态消息,取第一个文本块for item in content:if isinstance(item, dict) and item.get('type') == 'text':user_message = item.get('text', '')[:100]breakelse:user_message = content[:100]breaklogger.info(f"[请求] model={model}, stream={stream}, message={user_message}...")# 生成回复内容reply_content = self.generate_response(messages, request_data)if stream:# 流式响应self.handle_stream_response(model, messages, request_data, reply_content)else:# 非流式响应response = {"id": f"chatcmpl-{uuid.uuid4().hex[:8]}","object": "chat.completion","created": int(datetime.now().timestamp()),"model": model,"choices": [{"index": 0,"message": {"role": "assistant","content": reply_content},"finish_reason": "stop"}],"usage": {"prompt_tokens": random.randint(50, 200),"completion_tokens": random.randint(10, 50),"total_tokens": random.randint(60, 250)}}logger.info(f"[响应] content={reply_content[:50]}...")self.send_json_response(response)def generate_response(self, messages, request_data):"""根据模式生成回复"""# 检查是否有工具调用tools = request_data.get('tools', [])has_tools = len(tools) > 0# 检查最后一条用户消息 - 处理多模态消息格式last_user_msg = ""for msg in reversed(messages):if msg.get('role') == 'user':content = msg.get('content', '')# 处理多模态消息:可能是字符串或数组if isinstance(content, list):for item in content:if isinstance(item, dict) and item.get('type') == 'text':last_user_msg = item.get('text', '')breakelse:last_user_msg = contentbreakmode = self.server.mode# 模式1:固定回复if mode == 'fixed':return "好的,我明白了。"# 模式2:随机回复elif mode == 'random':return random.choice(DEFAULT_RESPONSES)# 模式3:智能模板(根据消息内容生成合适回复)elif mode == 'smart':return self.smart_response(last_user_msg, has_tools)# 模式4:echo 回显elif mode == 'echo':if last_user_msg:return f"收到你的消息:{last_user_msg}"return "收到空消息"# 默认:随机回复else:return random.choice(DEFAULT_RESPONSES)def smart_response(self, user_message: str, has_tools: bool) -> str:"""智能回复 - 根据用户消息内容生成合适的回复"""user_lower = user_message.lower()# 问候if any(w in user_lower for w in ['你好', 'hello', 'hi', '嗨']):return random.choice(["你好!有什么可以帮你的?", "你好!请告诉我你需要什么。", "嗨!需要帮助吗?"])# 确认/执行if any(w in user_lower for w in ['好的', '可以', '执行', '开始', '确认']):return random.choice(["好的,正在执行...", "明白,开始操作。", "收到,立即执行!"])# 询问if any(w in user_lower for w in ['什么是', '怎么', '如何', '?']):return "这个问题我需要一点时间来思考。让我先查看相关信息。"# 文件操作if any(w in user_lower for w in ['读', '写', '文件', '查看']):return f"好的,{user_message[:20]}... 这个操作我来帮你完成。"# 浏览器操作if any(w in user_lower for w in ['浏览器', '打开', '点击', '网页']):return "明白,操作浏览器中..."# 工具调用时if has_tools:tool_names = []if 'tools' in self.server.last_request:for t in self.server.last_request.get('tools', []):tool_names.append(t.get('function', {}).get('name', 'unknown'))return f"收到,我需要调用工具: {', '.join(tool_names) if tool_names else'处理中'}"# 默认回复return random.choice(DEFAULT_RESPONSES)def handle_stream_response(self, model, messages, request_data, reply_content):"""处理流式响应"""# 发送流式响应 (SSE 格式)self.send_response(200)self.send_header('Content-Type', 'text/event-stream')self.send_header('Cache-Control', 'no-cache')self.send_header('Access-Control-Allow-Origin', '*')self.end_headers()# 分割回复为多个块chunks = [reply_content[i:i+3] for i in range(0, len(reply_content), 3)]if not chunks:chunks = ["好", "的"]for i, chunk in enumerate(chunks):chunk_data = {"id": f"chatcmpl-{uuid.uuid4().hex[:8]}","object": "chat.completion.chunk","created": int(datetime.now().timestamp()),"model": model,"choices": [{"index": 0,"delta": {"content": chunk},"finish_reason": None}]}self.wfile.write(f"data: {json.dumps(chunk_data, ensure_ascii=False)}\n\n".encode('utf-8'))self.wfile.flush()# 发送完成final_data = {"id": f"chatcmpl-{uuid.uuid4().hex[:8]}","object": "chat.completion.chunk","created": int(datetime.now().timestamp()),"model": model,"choices": [{"index": 0,"delta": {},"finish_reason": "stop"}]}self.wfile.write(f"data: {json.dumps(final_data, ensure_ascii=False)}\n\n".encode('utf-8'))self.wfile.write(b"data: [DONE]\n\n")self.wfile.flush()logger.info(f"[流式响应] 完成,共 {len(chunks)} 个块")class MockLLMServer(HTTPServer):"""Mock LLM 服务器"""def __init__(self, port: int, mode: str):super().__init__(('0.0.0.0', port), MockLLMHandler)self.port = portself.mode = modeself.last_request = {}logger.info(f"Mock LLM Server 初始化完成")logger.info(f" 监听地址: http://0.0.0.0:{port}")logger.info(f" 模式: {mode}")logger.info(f" API 端点: /v1/chat/completions")def main():parser = argparse.ArgumentParser(description='Mock LLM Server - 模拟大模型服务端')parser.add_argument('--port', '-p', type=int, default=8000, help='监听端口 (默认: 8000)')parser.add_argument('--mode', '-m', type=str, default='smart',choices=['fixed', 'random', 'smart', 'echo'],help='回复模式: fixed(固定) random(随机) smart(智能) echo(回显)')parser.add_argument('--host', type=str, default='0.0.0.0', help='监听地址 (默认: 0.0.0.0)')args = parser.parse_args()import coloramacolorama.init(autoreset=True)os.environ['PYTHONIOENCODING'] = 'utf-8'print("\n" + "="*50)print(" Mock LLM Server 模拟大模型服务端")print("="*50)print(f" 端口: {args.port}")print(f" 模式: {args.mode}")print(f" 地址: http://{args.host}:{args.port}")print("="*50)print("\nAPI 端点:")print(f" - GET /health (健康检查)")print(f" - GET /v1/models (模型列表)")print(f" - POST /v1/chat/completions (聊天完成)")print("\n使用示例:")print(f" curl -X POST http://localhost:{args.port}/v1/chat/completions \\")print(f' -H "Content-Type: application/json" \\')print(f' -d \'{{"messages": [{{"role": "user", "content": "你好"}}], "model": "mock-gpt-4"}}\'')print("\n" + "="*50 + "\n")server = MockLLMServer(args.port, args.mode)try:logger.info(f"服务器启动,监听端口 {args.port}")server.serve_forever()except KeyboardInterrupt:logger.info("服务器关闭")server.shutdown()if __name__ == '__main__':main()
夜雨聆风