CMakeLists.txt
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)set(CMAKE_EXPORT_COMPILE_COMMANDS ON)include(CheckLanguage)include(cmake/modules/set_ifndef.cmake)include(cmake/modules/find_library_create_target.cmake)project(tensorrt_llm LANGUAGES CXX)
macro(find_library_create_target target_name lib libtype hints) message( STATUS "========================= Importing and creating target ${target_name} ==========================" ) message(STATUS "Looking for library ${lib}") if(CMAKE_BUILD_TYPE STREQUAL "Debug") find_library( ${lib}_LIB_PATH ${lib}${TRT_DEBUG_POSTFIX} HINTS ${hints} NO_DEFAULT_PATH) endif() find_library(${lib}_LIB_PATH ${lib} HINTS ${hints} NO_DEFAULT_PATH) find_library(${lib}_LIB_PATH ${lib}) message(STATUS "Library that was found ${${lib}_LIB_PATH}") add_library(${target_name} ${libtype} IMPORTED) set_target_properties( ${target_name} PROPERTIES IMPORTED_LOCATION ${${lib}_LIB_PATH} IMPORTED_IMPLIB ${${lib}_LIB_PATH}) message( STATUS "==========================================================================================" )endmacro()
function(set_ifndef variable value) if(NOT DEFINED ${variable}) set(${variable} ${value} PARENT_SCOPE) endif()endfunction()
# Build optionsoption(BUILD_PYT "Build in PyTorch TorchScript class mode" ON)option(BUILD_TESTS "Build Google tests" ON)option(BUILD_BENCHMARKS "Build benchmarks" ON)option(NVTX_DISABLE "Disable all NVTX features" ON)if(NVTX_DISABLE) add_compile_definitions("NVTX_DISABLE") message(STATUS "NVTX is disabled")else() message(STATUS "NVTX is enabled")endif()
# 开源版本不提供 batch manager 的源码。if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/tensorrt_llm/batch_manager/CMakeLists.txt") set(BUILD_BATCH_MANAGER_DEFAULT ON)else() set(BUILD_BATCH_MANAGER_DEFAULT OFF)endif()option(BUILD_BATCH_MANAGER "Build batch manager from source" ${BUILD_BATCH_MANAGER_DEFAULT})if(BUILD_BATCH_MANAGER) message(STATUS "Building batch manager")else() message(STATUS "Importing batch manager")endif()
# Determine CUDA version before enabling the language extensioncheck_language(CUDA)if(CMAKE_CUDA_COMPILER) message(STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER}") if(NOT WIN32) # Linux execute_process( COMMAND "bash" "-c" "${CMAKE_CUDA_COMPILER} --version | egrep -o 'V[0-9]+.[0-9]+.[0-9]+' | cut -c2-" RESULT_VARIABLE _BASH_SUCCESS OUTPUT_VARIABLE CMAKE_CUDA_COMPILER_VERSION # cuda compiler version OUTPUT_STRIP_TRAILING_WHITESPACE) if(NOT _BASH_SUCCESS EQUAL 0) message(FATAL_ERROR "Failed to determine CUDA version") endif() else() # Windows ... endif()else() message(FATAL_ERROR "No CUDA compiler found")endif()
# 最低 cuda 版本 11.2set(CUDA_REQUIRED_VERSION "11.2")if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS CUDA_REQUIRED_VERSION) message( FATAL_ERROR "CUDA version ${CMAKE_CUDA_COMPILER_VERSION} must be at least ${CUDA_REQUIRED_VERSION}" )endif()
# Initialize CMAKE_CUDA_ARCHITECTURES before enabling CUDAif(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.8") # >= 11.8 set(CMAKE_CUDA_ARCHITECTURES 70-real 80-real 86-real 89-real 90-real) else() set(CMAKE_CUDA_ARCHITECTURES 70-real 80-real 86-real) endif()endif()message(STATUS "GPU architectures: ${CMAKE_CUDA_ARCHITECTURES}")
# 超找 CUDA 相关库enable_language(CUDA)find_package(CUDAToolkit REQUIRED)find_library( CUDNN_LIB cudnn HINTS ${CUDNN_ROOT_DIR} ${CUDAToolkit_LIBRARY_DIR} PATH_SUFFIXES lib64 lib lib/x64)find_library( CUBLAS_LIB cublas HINTS ${CUDAToolkit_LIBRARY_DIR} PATH_SUFFIXES lib64 lib lib/stubs)find_library( CUBLASLT_LIB cublasLt HINTS ${CUDAToolkit_LIBRARY_DIR} PATH_SUFFIXES lib64 lib lib/stubs)find_library( CUDA_DRV_LIB cuda HINTS ${CUDAToolkit_LIBRARY_DIR} PATH_SUFFIXES stubs lib lib64 lib/stubs lib64/stubs)set(CMAKE_CUDA_RUNTIME_LIBRARY Static)find_library(RT_LIB rt)set_ifndef(ENABLE_MULTI_DEVICE 1)if(ENABLE_MULTI_DEVICE EQUAL 1) # NCCL dependencies set_ifndef(NCCL_LIB_DIR /usr/lib/${CMAKE_SYSTEM_PROCESSOR}-linux-gnu/) set_ifndef(NCCL_INCLUDE_DIR /usr/include/) find_library(NCCL_LIB nccl HINTS ${NCCL_LIB_DIR})endif()
# 新增 inc 路径get_filename_component(TRT_LLM_ROOT_DIR ${CMAKE_SOURCE_DIR} PATH)set(3RDPARTY_DIR ${TRT_LLM_ROOT_DIR}/3rdparty)include_directories( ${CUDA_INCLUDE_DIRS} ${CUDNN_ROOT_DIR}/include ${NCCL_INCLUDE_DIR} ${3RDPARTY_DIR}/cutlass/include ${3RDPARTY_DIR}/NVTX/include ${3RDPARTY_DIR}/json/include)
# TRT dependenciesset_ifndef(TRT_LIB_DIR ${CMAKE_BINARY_DIR})set_ifndef(TRT_INCLUDE_DIR /usr/include/${CMAKE_SYSTEM_PROCESSOR}-linux-gnu)set(TRT_LIB nvinfer)find_library_create_target(${TRT_LIB} nvinfer SHARED ${TRT_LIB_DIR})find_library_create_target(nvuffparser nvparsers SHARED ${TRT_LIB_DIR})
if(${CUDAToolkit_VERSION} VERSION_GREATER_EQUAL "11") # >= 11 支持 BF16 add_definitions("-DENABLE_BF16") message( STATUS "CUDAToolkit_VERSION ${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR} is greater or equal than 11.0, enable -DENABLE_BF16 flag" )endif()if(${CUDAToolkit_VERSION} VERSION_GREATER_EQUAL "11.8") # >= 11.8 支持 FP8 add_definitions("-DENABLE_FP8") message( STATUS "CUDAToolkit_VERSION ${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR} is greater or equal than 11.8, enable -DENABLE_FP8 flag" )endif()
# MPI MPI isn't used until tensorrt_llm/CMakeLists.txt is invoked. However, if# it's not called before "CMAKE_CXX_FLAGS" is set, it breaks on Windows for some# reason, so we just call it here as a workaround.find_package(MPI REQUIRED)add_definitions("-DOMPI_SKIP_MPICXX")
# C++17set(CMAKE_CXX_STANDARD 17)set(CMAKE_CXX_STANDARD_REQUIRED ON)set(CMAKE_CXX_EXTENSIONS OFF)set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBUILD_SYSTEM=cmake_oss -DENABLE_MULTI_DEVICE=${ENABLE_MULTI_DEVICE}")# Disable deprecated declarations warningsif(NOT WIN32) set(CMAKE_CXX_FLAGS "-Wno-deprecated-declarations ${CMAKE_CXX_FLAGS}")else() ...endif()set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
set(COMMON_HEADER_DIRS ${PROJECT_SOURCE_DIR} ${CUDAToolkit_INCLUDE_DIR})message(STATUS "COMMON_HEADER_DIRS: ${COMMON_HEADER_DIRS}")
if(BUILD_PYT) # Build TORCH_CUDA_ARCH_LIST set(TORCH_CUDA_ARCH_LIST "") foreach(CUDA_ARCH IN LISTS CMAKE_CUDA_ARCHITECTURES) if(CUDA_ARCH MATCHES "^([0-9])([0-9])(-real)*$") set(TORCH_ARCH "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}") elseif(CUDA_ARCH STREQUAL "native") set(TORCH_ARCH "Auto") else() message(FATAL_ERROR "${CUDA_ARCH} is not supported") endif() if(NOT CUDA_ARCH MATCHES "-real$" AND NOT CUDA_ARCH STREQUAL "native") string(APPEND TORCH_ARCH "+PTX") endif() list(APPEND TORCH_CUDA_ARCH_LIST ${TORCH_ARCH}) endforeach() message(STATUS "TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST}") # ignore values passed from the environment if(DEFINED ENV{TORCH_CUDA_ARCH_LIST}) message( WARNING "Ignoring environment variable TORCH_CUDA_ARCH_LIST=$ENV{TORCH_CUDA_ARCH_LIST}" ) endif() unset(ENV{TORCH_CUDA_ARCH_LIST}) find_package(Python3 COMPONENTS Interpreter Development REQUIRED) message(STATUS "Found Python executable at ${Python3_EXECUTABLE}") message(STATUS "Found Python libraries at ${Python3_LIBRARY_DIRS}") link_directories("${Python3_LIBRARY_DIRS}") list(APPEND COMMON_HEADER_DIRS ${Python3_INCLUDE_DIRS}) execute_process( COMMAND ${Python3_EXECUTABLE} "-c" "from __future__ import print_function; import torch; print(torch.__version__,end='');" RESULT_VARIABLE _PYTHON_SUCCESS OUTPUT_VARIABLE TORCH_VERSION) if(TORCH_VERSION VERSION_LESS "1.5.0") message(FATAL_ERROR "PyTorch >= 1.5.0 is needed for TorchScript mode.") endif() execute_process( COMMAND ${Python3_EXECUTABLE} "-c" "from __future__ import print_function; import os; import torch;print(os.path.dirname(torch.__file__),end='');" RESULT_VARIABLE _PYTHON_SUCCESS OUTPUT_VARIABLE TORCH_DIR) if(NOT _PYTHON_SUCCESS MATCHES 0) message(FATAL_ERROR "Torch config Error.") endif() list(APPEND CMAKE_PREFIX_PATH ${TORCH_DIR}) find_package(Torch REQUIRED) message(STATUS "TORCH_CXX_FLAGS: ${TORCH_CXX_FLAGS}") add_compile_options(${TORCH_CXX_FLAGS}) add_compile_definitions(TORCH_CUDA=1)endif()
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" VERSION_STRINGS REGEX "#define NV_TENSORRT_.*")foreach(TYPE MAJOR MINOR PATCH BUILD) string(REGEX MATCH "NV_TENSORRT_${TYPE} [0-9]" TRT_TYPE_STRING ${VERSION_STRINGS}) string(REGEX MATCH "[0-9]" TRT_${TYPE} ${TRT_TYPE_STRING})endforeach(TYPE)foreach(TYPE MAJOR MINOR PATCH) string(REGEX MATCH "NV_TENSORRT_SONAME_${TYPE} [0-9]" TRT_TYPE_STRING ${VERSION_STRINGS}) string(REGEX MATCH "[0-9]" TRT_SO_${TYPE} ${TRT_TYPE_STRING})endforeach(TYPE)set(TRT_VERSION "${TRT_MAJOR}.${TRT_MINOR}.${TRT_PATCH}" CACHE STRING "TensorRT project version")set(TRT_SOVERSION "${TRT_SO_MAJOR}" CACHE STRING "TensorRT library so version")message( STATUS "Building for TensorRT version: ${TRT_VERSION}, library version: ${TRT_SOVERSION}")
list(APPEND COMMON_HEADER_DIRS)include_directories(${COMMON_HEADER_DIRS})include_directories(SYSTEM ${TORCH_INCLUDE_DIRS} ${TRT_INCLUDE_DIR})
add_subdirectory(tensorrt_llm)
tensorrt_llm
set(TARGET_NAME tensorrt_llm)set(SHARED_TARGET ${TARGET_NAME})set(SHARED_TARGET ${SHARED_TARGET} PARENT_SCOPE)set(STATIC_TARGET ${TARGET_NAME}_static)set(STATIC_TARGET ${STATIC_TARGET} PARENT_SCOPE)set(API_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
find_package(MPI REQUIRED)message(STATUS "Using MPI_CXX_INCLUDE_DIRS: ${MPI_CXX_INCLUDE_DIRS}")message(STATUS "Using MPI_CXX_LIBRARIES: ${MPI_CXX_LIBRARIES}")include_directories(${CMAKE_CURRENT_SOURCE_DIR}/cutlass_extensions/include ${API_INCLUDE_DIR} ${MPI_INCLUDE_PATH})
add_subdirectory(common)add_subdirectory(kernels)add_subdirectory(layers)add_subdirectory(runtime)
set(BATCH_MANAGER_TARGET tensorrt_llm_batch_manager_static)set(BATCH_MANAGER_TARGET_ARCH "unknown")message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")if(NOT WIN32) # Linux if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") set(BATCH_MANAGER_TARGET_ARCH "x86_64-linux-gnu") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") set(BATCH_MANAGER_TARGET_ARCH "aarch64-linux-gnu") else() message( FATAL_ERROR "The system processor type is unsupported: ${CMAKE_SYSTEM_PROCESSOR}") endif()else() # Windows ...endif()
if(BUILD_BATCH_MANAGER) add_subdirectory(batch_manager)else() add_library(${BATCH_MANAGER_TARGET} STATIC IMPORTED) if(NOT WIN32) # Linux execute_process( COMMAND ${Python3_EXECUTABLE} "-c" "import torch; print(torch.compiled_with_cxx11_abi(),end='');" RESULT_VARIABLE _PYTHON_SUCCESS OUTPUT_VARIABLE USE_CXX11_ABI) message(STATUS "USE_CXX11_ABI: ${USE_CXX11_ABI}") if(USE_CXX11_ABI) set_property( TARGET ${BATCH_MANAGER_TARGET} PROPERTY IMPORTED_LOCATION "${CMAKE_CURRENT_SOURCE_DIR}/batch_manager/${BATCH_MANAGER_TARGET_ARCH}/libtensorrt_llm_batch_manager_static.a" ) else() set_property( TARGET ${BATCH_MANAGER_TARGET} PROPERTY IMPORTED_LOCATION "${CMAKE_CURRENT_SOURCE_DIR}/batch_manager/${BATCH_MANAGER_TARGET_ARCH}/libtensorrt_llm_batch_manager_static.pre_cxx11.a" ) endif() else() # Windows ... endif()endif()
set(TRTLLM_LINK_LIBS ${CUBLAS_LIB} ${CUBLASLT_LIB} ${CUDNN_LIB} ${CMAKE_DL_LIBS} ${MPI_CXX_LIBRARIES} ${NCCL_LIB} ${TRT_LIB} common_src kernels_src layers_src runtime_src ${BATCH_MANAGER_TARGET})
# ################################# SHARED LIBRARY# ##############################################################################set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)add_library(${SHARED_TARGET} SHARED)set_target_properties( ${SHARED_TARGET} PROPERTIES CXX_STANDARD "17" CXX_STANDARD_REQUIRED "YES" CXX_EXTENSIONS "NO")if(NOT MSVC) # Unix-like compilers set(ALLOW_UNDEFINED_FLAG "-Wl, --no-undefined")else() # MSVC set(UNDEFINED_FLAG "")endif()target_link_libraries(${SHARED_TARGET} PUBLIC ${TRTLLM_LINK_LIBS} ${UNDEFINED_FLAG})
# ################################# STATIC LIBRARY# ##############################################################################add_library(${STATIC_TARGET} STATIC)set_target_properties( ${STATIC_TARGET} PROPERTIES CXX_STANDARD "17" CXX_STANDARD_REQUIRED "YES" CXX_EXTENSIONS "NO" POSITION_INDEPENDENT_CODE ON)target_link_libraries(${STATIC_TARGET} PUBLIC ${TRTLLM_LINK_LIBS})# Cyclic dependency of batch manager on TRT-LLMtarget_link_libraries(${BATCH_MANAGER_TARGET} INTERFACE ${STATIC_TARGET})
if(BUILD_PYT) add_subdirectory(thop)endif()add_subdirectory(plugins)
参考文献
- • https://github.com/NVIDIA/TensorRT-LLM/blob/release/0.5.0/cpp/CMakeLists.txt