2. sglang源码调试环境搭建

前言

使用源码安装方式sglang
官方文档https://docs.sglang.io/
有一个sglang的访谈值得一看朱邦华: SGLang，强化学习，英伟达收购，二次创业，清华，伯克利，LMSYS，Chatbot Arena，善于放弃

环境搭建记录

新建并激活conda环境conda create -n sglang_src python=3.10 -yconda activate sglang_src
升级pippip install --upgrade pippip install ninja
下载源码
安装cd sglang-0.5.12.post1pip install -e "python[all]"
vscode打开源码新建launch.json

{    "version": "0.2.0",    "configurations": [        {            "name": "sglang serve",            "type": "debugpy",            "request": "launch",            "python": "/home/t/miniconda3/envs/sglang_src/bin/python",            "module": "sglang.launch_server",             // "program": "{env:PATH}",                "LD_LIBRARY_PATH": "/usr/local/cuda-12.6/lib64:${env:LD_LIBRARY_PATH}"            },            "console": "integratedTerminal",            "justMyCode": false         }    ]}

启动调试，修改错误

[2026-06-12 15:17:28] Using default HuggingFace chat template with detected content format: string[2026-06-12 15:17:57] Init torch distributed begin.[2026-06-12 15:17:57] Init torch distributed ends. elapsed=0.21 s, mem usage=0.06 GB[2026-06-12 15:18:01] Load weight begin. avail mem=46.20 GBMulti-thread loading shards: 100% Completed | 4/4 [00:05<00:00,  1.29s/it][2026-06-12 15:18:07] Load weight end. elapsed=5.59 s, type=Qwen2ForCausalLM, avail mem=31.91 GB, mem usage=14.30 GB.[2026-06-12 15:18:07] Using KV cache dtype: torch.bfloat16[2026-06-12 15:18:07] KV Cache is allocated. #tokens: 470181, K size: 12.56 GB, V size: 12.56 GB[2026-06-12 15:18:07] Memory pool end. avail mem=6.23 GB[2026-06-12 15:18:07] Capture cuda graph begin. This can take up to several minutes. avail mem=5.67 GB[2026-06-12 15:18:07] Capture cuda graph bs [1, 2, 4, 8, 12, 16, 24, 32]Capturing batches (bs=32 avail_mem=5.65 GB):   0%|                                                                                                                           | 0/8 [00:53<?, ?it/s][2026-06-12 15:19:02] Scheduler hit an exception: Traceback (most recent call last):  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/model_executor/cuda_graph_runner.py", line 736, in __init__    self.capture()  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/model_executor/cuda_graph_runner.py", line 918, in capture    _capture_one_stream()  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/model_executor/cuda_graph_runner.py", line 906, in _capture_one_stream    ) = self.capture_one_batch_size(bs, forward, stream_idx)  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/model_executor/cuda_graph_runner.py", line 1187, in capture_one_batch_size    run_once()  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/model_executor/cuda_graph_runner.py", line 1165, in run_once    logits_output_or_pp_proxy_tensors = forward(  File "/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 124, in decorate_context    return func(*args, **kwargs)  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/models/qwen2.py", line 486, in forward    hidden_states = self.model(  File "/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl    return self._call_impl(*args, **kwargs)  File "/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl    return forward_call(*args, **kwargs)  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/models/qwen2.py", line 367, in forward    hidden_states, residual = layer(  File "/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl    return self._call_impl(*args, **kwargs)  File "/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl    return forward_call(*args, **kwargs)  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/models/qwen2.py", line 261, in forward    hidden_states = self.mlp(hidden_states)  File "/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl    return self._call_impl(*args, **kwargs)  File "/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl    return forward_call(*args, **kwargs)  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/models/qwen2.py", line 103, in forward    x = self.act_fn(gate_up)  File "/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl    return self._call_impl(*args, **kwargs)  File "/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl    return forward_call(*args, **kwargs)  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/layers/utils/multi_platform.py", line 83, in forward    return self._forward_method(*args, **kwargs)  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/layers/activation.py", line 94, in forward_cuda    silu_and_mul(x, out)  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/activation.py", line 109, in silu_and_mul    return run_activation("silu", input, out, expert_ids, expert_step)  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/activation.py", line 97, in run_activation    _run_activation_inplace(op_name, input, out)  File "/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/torch/_ops.py", line 1269, in __call__    return self._op(*args, **kwargs)  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/activation.py", line 57, in _run_activation_inplace    module = _jit_activation_module(input.dtype)  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/utils.py", line 57, in wrapper    result_map[key] = fn(*args, **kwargs)  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/activation.py", line 34, in _jit_activation_module    return load_jit(  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/utils.py", line 208, in load_jit    return load_inline(  File "/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/tvm_ffi/cpp/extension.py", line 1035, in load_inline    build_inline(  File "/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/tvm_ffi/cpp/extension.py", line 877, in build_inline    return _build_impl(  File "/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/tvm_ffi/cpp/extension.py", line 672, in _build_impl    build_ninja(str(build_dir))  File "/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/tvm_ffi/cpp/extension.py", line 542, in build_ninja    raise RuntimeError("\n".join(msg))RuntimeError: ninja exited with status 1stdout:[1/2] /usr/local/cuda-12.6/bin/nvcc  --generate-dependencies-with-compile --dependency-output cuda_0.o.d -Xcompiler -fPIC -std=c++17 -O2 -gencode=arch=compute_89,code=sm_89 -DSGL_CUDA_ARCH=890 -std=c++20 -O3 --expt-relaxed-constexpr --use_fast_math -I/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/tvm_ffi/include -I/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/tvm_ffi/include -I/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/include -c /home/t/.cache/tvm-ffi/sgl_kernel_jit_activation_bf16_t_false_3a2beb40cee02f8b/cuda.cu -o cuda_0.oFAILED: [code=1] cuda_0.o /usr/local/cuda-12.6/bin/nvcc  --generate-dependencies-with-compile --dependency-output cuda_0.o.d -Xcompiler -fPIC -std=c++17 -O2 -gencode=arch=compute_89,code=sm_89 -DSGL_CUDA_ARCH=890 -std=c++20 -O3 --expt-relaxed-constexpr --use_fast_math -I/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/tvm_ffi/include -I/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/tvm_ffi/include -I/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/include -c /home/t/.cache/tvm-ffi/sgl_kernel_jit_activation_bf16_t_false_3a2beb40cee02f8b/cuda.cu -o cuda_0.onvcc warning : incompatible redefinition for option 'std', the last value of this option was usednvcc warning : incompatible redefinition for option 'optimize', the last value of this option was used/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh(146): warning #2361-D: invalid narrowing conversion from "signed long" to "unsigned int"          .hidden_dim = hidden_size,                        ^Remark: The warnings can be suppressed with "-diag-suppress <warning-number>"/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh(146): warning #2361-D: invalid narrowing conversion from "signed long" to "unsigned int"          .hidden_dim = hidden_size,                        ^          detected during instantiation of "void <unnamed>::ActivationKernel<T, kUsePDL>::run_activation(tvm::ffi::TensorView, tvm::ffi::TensorView, std::string) [with T=bf16_t, kUsePDL=false]" at line 8 of /home/t/.cache/tvm-ffi/sgl_kernel_jit_activation_bf16_t_false_3a2beb40cee02f8b/cuda.cuRemark: The warnings can be suppressed with "-diag-suppress <warning-number>"/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh: In static member function ‘static void _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::launch(const tvm::ffi::TensorView&, const tvm::ffi::TensorView&, const std::string&, const int32_t*, uint32_t)’:/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:153:42: error: no matching function for call to ‘_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::select_kernel<true>(const std::string&)’  153 |       const auto kernel = select_kernel<true>(type);      |                     ~~~~~~~~~~~~~~~~~~~~~^~~~~~/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:1: note: candidate: ‘template<class T, bool kUsePDL> template<bool kFilterExpert> static decltype (activation_kernel<_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind::kSiLU, kFilterExpert>) _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::select_kernel(const std::string&)’   95 |   static auto select_kernel(const std::string& type)      | ^ ~~~~~~~~~~~/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:1: note:   template argument deduction/substitution failed:/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh: In substitution of ‘template<class T, bool kUsePDL> template<bool kFilterExpert> static decltype (activation_kernel<_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind::kSiLU, kFilterExpert>) _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::select_kernel(const std::string&) [with bool kFilterExpert = <missing>; T = true; bool kUsePDL = <missing>]’:/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:153:42:   required from here/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:52: error: type/value mismatch at argument 1 in template parameter list for ‘template<class T, bool kUsePDL> template<_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind kAct, bool kFilterExpert> constexpr const auto _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::activation_kernel<kAct, kFilterExpert>’   95 |   static auto select_kernel(const std::string& type)      |                                                    ^                                                       /home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:52: note:   expected a type, got ‘_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind::kSiLU’/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:156:43: error: no matching function for call to ‘_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::select_kernel<false>(const std::string&)’  156 |       const auto kernel = select_kernel<false>(type);      |                     ~~~~~~~~~~~~~~~~~~~~~~^~~~~~/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:1: note: candidate: ‘template<class T, bool kUsePDL> template<bool kFilterExpert> static decltype (activation_kernel<_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind::kSiLU, kFilterExpert>) _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::select_kernel(const std::string&)’   95 |   static auto select_kernel(const std::string& type)      | ^ ~~~~~~~~~~~/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:1: note:   template argument deduction/substitution failed:/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh: In substitution of ‘template<class T, bool kUsePDL> template<bool kFilterExpert> static decltype (activation_kernel<_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind::kSiLU, kFilterExpert>) _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::select_kernel(const std::string&) [with bool kFilterExpert = <missing>; T = false; bool kUsePDL = <missing>]’:/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:156:43:   required from here/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:52: error: type/value mismatch at argument 1 in template parameter list for ‘template<class T, bool kUsePDL> template<_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind kAct, bool kFilterExpert> constexpr const auto _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::activation_kernel<kAct, kFilterExpert>’   95 |   static auto select_kernel(const std::string& type)      |                                                    ^                                                       /home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:52: note:   expected a type, got ‘_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind::kSiLU’/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh: In instantiation of ‘static void _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::launch(const tvm::ffi::TensorView&, const tvm::ffi::TensorView&, const std::string&, const int32_t*, uint32_t) [with T = __nv_bfloat16; bool kUsePDL = false; std::string = std::__cxx11::basic_string<char>; int32_t = int; uint32_t = unsigned int]’:/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:162:9:   required from ‘static void _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::run_activation(tvm::ffi::TensorView, tvm::ffi::TensorView, std::string) [with T = __nv_bfloat16; bool kUsePDL = false; std::string = std::__cxx11::basic_string<char>]’/home/t/.cache/tvm-ffi/sgl_kernel_jit_activation_bf16_t_false_3a2beb40cee02f8b/cuda.cu:8:427:   required from here/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:143:102: warning: narrowing conversion of ‘(long int)hidden_size’ from ‘long int’ to ‘uint32_t’ {aka ‘unsigned int’} [-Wnarrowing]  143 |     const auto params = ActivationParams{      |                                                                                                      ^          ninja: build stopped: subcommand failed.During handling of the above exception, another exception occurred:Traceback (most recent call last):  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/managers/scheduler.py", line 4025, in run_scheduler_process    scheduler = Scheduler(  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/managers/scheduler.py", line 437, in __init__    self.init_model_worker()  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/managers/scheduler.py", line 718, in init_model_worker    self.init_tp_model_worker()  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/managers/scheduler.py", line 673, in init_tp_model_worker    self.tp_worker = TpModelWorker(**worker_kwargs)  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/managers/tp_worker.py", line 262, in __init__    self._init_model_runner()  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/managers/tp_worker.py", line 347, in _init_model_runner    self._model_runner = ModelRunner(  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/model_executor/model_runner.py", line 535, in __init__    self.initialize(pre_model_load_memory)  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/model_executor/model_runner.py", line 791, in initialize    self.init_device_graphs()  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/model_executor/model_runner.py", line 2965, in init_device_graphs    self.graph_runner = graph_runners[self.device](self)  File "/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/srt/model_executor/cuda_graph_runner.py", line 738, in __init__    raise Exception(Exception: Capture cuda graph failed: ninja exited with status 1stdout:[1/2] /usr/local/cuda-12.6/bin/nvcc  --generate-dependencies-with-compile --dependency-output cuda_0.o.d -Xcompiler -fPIC -std=c++17 -O2 -gencode=arch=compute_89,code=sm_89 -DSGL_CUDA_ARCH=890 -std=c++20 -O3 --expt-relaxed-constexpr --use_fast_math -I/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/tvm_ffi/include -I/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/tvm_ffi/include -I/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/include -c /home/t/.cache/tvm-ffi/sgl_kernel_jit_activation_bf16_t_false_3a2beb40cee02f8b/cuda.cu -o cuda_0.oFAILED: [code=1] cuda_0.o /usr/local/cuda-12.6/bin/nvcc  --generate-dependencies-with-compile --dependency-output cuda_0.o.d -Xcompiler -fPIC -std=c++17 -O2 -gencode=arch=compute_89,code=sm_89 -DSGL_CUDA_ARCH=890 -std=c++20 -O3 --expt-relaxed-constexpr --use_fast_math -I/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/tvm_ffi/include -I/home/t/miniconda3/envs/sglang_src/lib/python3.10/site-packages/tvm_ffi/include -I/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/include -c /home/t/.cache/tvm-ffi/sgl_kernel_jit_activation_bf16_t_false_3a2beb40cee02f8b/cuda.cu -o cuda_0.onvcc warning : incompatible redefinition for option 'std', the last value of this option was usednvcc warning : incompatible redefinition for option 'optimize', the last value of this option was used/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh(146): warning #2361-D: invalid narrowing conversion from "signed long" to "unsigned int"          .hidden_dim = hidden_size,                        ^Remark: The warnings can be suppressed with "-diag-suppress <warning-number>"/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh(146): warning #2361-D: invalid narrowing conversion from "signed long" to "unsigned int"          .hidden_dim = hidden_size,                        ^          detected during instantiation of "void <unnamed>::ActivationKernel<T, kUsePDL>::run_activation(tvm::ffi::TensorView, tvm::ffi::TensorView, std::string) [with T=bf16_t, kUsePDL=false]" at line 8 of /home/t/.cache/tvm-ffi/sgl_kernel_jit_activation_bf16_t_false_3a2beb40cee02f8b/cuda.cuRemark: The warnings can be suppressed with "-diag-suppress <warning-number>"/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh: In static member function ‘static void _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::launch(const tvm::ffi::TensorView&, const tvm::ffi::TensorView&, const std::string&, const int32_t*, uint32_t)’:/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:153:42: error: no matching function for call to ‘_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::select_kernel<true>(const std::string&)’  153 |       const auto kernel = select_kernel<true>(type);      |                     ~~~~~~~~~~~~~~~~~~~~~^~~~~~/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:1: note: candidate: ‘template<class T, bool kUsePDL> template<bool kFilterExpert> static decltype (activation_kernel<_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind::kSiLU, kFilterExpert>) _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::select_kernel(const std::string&)’   95 |   static auto select_kernel(const std::string& type)      | ^ ~~~~~~~~~~~/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:1: note:   template argument deduction/substitution failed:/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh: In substitution of ‘template<class T, bool kUsePDL> template<bool kFilterExpert> static decltype (activation_kernel<_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind::kSiLU, kFilterExpert>) _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::select_kernel(const std::string&) [with bool kFilterExpert = <missing>; T = true; bool kUsePDL = <missing>]’:/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:153:42:   required from here/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:52: error: type/value mismatch at argument 1 in template parameter list for ‘template<class T, bool kUsePDL> template<_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind kAct, bool kFilterExpert> constexpr const auto _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::activation_kernel<kAct, kFilterExpert>’   95 |   static auto select_kernel(const std::string& type)      |                                                    ^                                                       /home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:52: note:   expected a type, got ‘_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind::kSiLU’/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:156:43: error: no matching function for call to ‘_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::select_kernel<false>(const std::string&)’  156 |       const auto kernel = select_kernel<false>(type);      |                     ~~~~~~~~~~~~~~~~~~~~~~^~~~~~/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:1: note: candidate: ‘template<class T, bool kUsePDL> template<bool kFilterExpert> static decltype (activation_kernel<_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind::kSiLU, kFilterExpert>) _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::select_kernel(const std::string&)’   95 |   static auto select_kernel(const std::string& type)      | ^ ~~~~~~~~~~~/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:1: note:   template argument deduction/substitution failed:/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh: In substitution of ‘template<class T, bool kUsePDL> template<bool kFilterExpert> static decltype (activation_kernel<_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind::kSiLU, kFilterExpert>) _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::select_kernel(const std::string&) [with bool kFilterExpert = <missing>; T = false; bool kUsePDL = <missing>]’:/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:156:43:   required from here/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:52: error: type/value mismatch at argument 1 in template parameter list for ‘template<class T, bool kUsePDL> template<_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind kAct, bool kFilterExpert> constexpr const auto _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::activation_kernel<kAct, kFilterExpert>’   95 |   static auto select_kernel(const std::string& type)      |                                                    ^                                                       /home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:95:52: note:   expected a type, got ‘_GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKind::kSiLU’/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh: In instantiation of ‘static void _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::launch(const tvm::ffi::TensorView&, const tvm::ffi::TensorView&, const std::string&, const int32_t*, uint32_t) [with T = __nv_bfloat16; bool kUsePDL = false; std::string = std::__cxx11::basic_string<char>; int32_t = int; uint32_t = unsigned int]’:/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:162:9:   required from ‘static void _GLOBAL__N__7f02c643_7_cuda_cu_fc7cb620::ActivationKernel<T, kUsePDL>::run_activation(tvm::ffi::TensorView, tvm::ffi::TensorView, std::string) [with T = __nv_bfloat16; bool kUsePDL = false; std::string = std::__cxx11::basic_string<char>]’/home/t/.cache/tvm-ffi/sgl_kernel_jit_activation_bf16_t_false_3a2beb40cee02f8b/cuda.cu:8:427:   required from here/home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh:143:102: warning: narrowing conversion of ‘(long int)hidden_size’ from ‘long int’ to ‘uint32_t’ {aka ‘unsigned int’} [-Wnarrowing]  143 |     const auto params = ActivationParams{      |                                                                                                      ^          ninja: build stopped: subcommand failed.Possible solutions:1. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)2. set --cuda-graph-max-bs to a smaller value (e.g., 16)3. disable torch compile by not using --enable-torch-compile4. disable CUDA graph by --disable-cuda-graph. (Not recommended. Huge performance loss)Open an issue on GitHub https://github.com/sgl-project/sglang/issues/new/choose

解决办法，在sglang issue中找到了解决方法 https://github.com/sgl-project/sglang/issues/25682 /home/t/lld/learn/sglang-0.5.12.post1/python/sglang/jit_kernel/csrc/elementwise/activation.cuh文件中修改select_kernel函数，原来是

template <bool kFilterExpert>  static auto select_kernel(const std::string& type)      -> decltype(activation_kernel<ActivationKind::kSiLU, kFilterExpert>)   {    using namespace host;    if (type == "silu") {      return activation_kernel<ActivationKind::kSiLU, kFilterExpert>;    } else if (type == "gelu") {      return activation_kernel<ActivationKind::kGELU, kFilterExpert>;    } else if (type == "gelu_tanh") {      return activation_kernel<ActivationKind::kGELUTanh, kFilterExpert>;    } else {      Panic("unsupported activation type: ", type);    }    return nullptr;  }

去掉两行改成

  template <bool kFilterExpert>  static auto select_kernel(const std::string& type)      //-> decltype(activation_kernel<ActivationKind::kSiLU, kFilterExpert>)   {    using namespace host;    if (type == "silu") {      return activation_kernel<ActivationKind::kSiLU, kFilterExpert>;    } else if (type == "gelu") {      return activation_kernel<ActivationKind::kGELU, kFilterExpert>;    } else if (type == "gelu_tanh") {      return activation_kernel<ActivationKind::kGELUTanh, kFilterExpert>;    } else {      Panic("unsupported activation type: ", type);    }    // return nullptr;  }

下断点成功，可以愉快的调试了

总结

搭建了sglang调试环境
如果为了学习sglang原理推荐mini-sglang