TensorRT-LLM 0.5.0 源码之五
functional.py
def constant(ndarray: np.ndarray) -> Tensor: ''' Add a constant layer. TensorRT graphs encapsulate constant values in the form of constant layers (tensorrt.IConstantLayer). That function creates such a layer from a Numpy array of values. After compilation of the network by TensorRT, those weights are stored in the serialized TensorRT engine. Parameters: ndarray : numpy.ndarray The array of values (weights) encapsulated by this constant layer. Returns: The tensor produced by the inserted layer. ''' # trt.Weights不是一个存储权重数据的容器本身,而是一个轻量级的结构体(或 Python 中的类)。它的主要作用是作为一个“描述符”或“句柄”,用于将预先准备好的权重数据(来自训练好的模型)安全、高效地传递给 TensorRT 在构建网络层(如卷积层、全连接层、常量层等)时使用。 # 非常重要的一点:trt.Weights本身并不拥有它所指向的数据。它只是一个指向外部管理的内存块的引用。 # TensorRT 构建器在构建优化引擎 (ICudaEngine) 的过程中,会读取 trt.Weights指向的数据,并根据优化策略(如层融合、精度转换 FP32->FP16/INT8、内核选择)处理这些权重。构建完成后,原始的权重数据(trt.Weights指向的数据)通常就不再需要了,因为优化后的权重形式已经序列化到引擎文件里了。 weights = trt.Weights(np_dtype_to_trt(ndarray.dtype), ndarray.ctypes.data, # 数组数据在内存中的起始地址(指针) ndarray.size) # 数组中元素的总个数 # Prevent underlying numpy array from going out of scope default_net().register_ndarray(ndarray) layer = default_trtnet().add_constant(trt.Dims(ndarray.shape), weights) if not default_net()._strongly_typed: layer.set_output_type(0, np_dtype_to_trt(ndarray.dtype)) return _create_tensor(layer.get_output(0), layer)
parameter.py
class Parameter(object): _DEFAULT_DTYPE = trt.DataType.FLOAT def __init__(self, value: Union[np.ndarray] = None, shape: Sequence[int] = None, dtype: Union[str, trt.DataType] = None): if dtype is None: logger.warning( f'Parameter dtype is None, using default dtype: {self._DEFAULT_DTYPE}, it is recommended to always specify dtype explicitly' ) dtype = self._DEFAULT_DTYPE if dtype is None else dtype if isinstance(dtype, str): dtype = str_dtype_to_trt(dtype) if value is None: import torch assert isinstance(shape, (list, tuple)) if len(shape) == 2: # Xavier initialization see https://paperswithcode.com/method/xavier-initialization v_range = np.sqrt(6) / np.sqrt(shape[0] + shape[1]) else: v_range = 0.1 # value ~ U[-1, 1] value = torch.rand( (shape), dtype=trt_dtype_to_torch(dtype), device='cuda') * 2 - 1 # value ~ U[-v_range, v_range] value = torch_to_numpy((value * v_range).cpu()) self._value = value @property def value(self) -> Tensor: if isinstance(self._value, np.ndarray): self._value = constant(self._value) return self._value @value.setter def value(self, v: np.ndarray): assert isinstance(v, np.ndarray) assert v.shape == self._value.shape, \ ('The value updated is not the same shape as the original. ', \ f'Updated: {v.shape}, original: {self._value.shape}') self._value = v def _get_weights(self) -> trt.Weights: return self._value.producer.weights if isinstance(self._value, Tensor) else None
module.py
class Module(object): def __init__(self) -> None: self._modules = {} self._parameters = {} self._network_outputs = {} def forward(self, *args, **kwargs): raise NotImplementedError def __call__(self, *args, **kwargs): current_net = default_net() if not current_net._module_call_stack.module_names_set(): logger.debug("Initializing top level module") current_net._module_call_stack.set_module_names(self) unique_name = current_net._module_call_stack.get_mod_name(self) with current_net._module_call_stack.call_stack_mgr() as stack: stack.append(unique_name) return self.forward(*args, **kwargs) def __getattr__(self, name): parameters = self.__dict__.get('_parameters') if name in parameters: return parameters[name] modules = self.__dict__.get('_modules') if name in modules: return modules[name] raise AttributeError("'{}' object has no attribute '{}'".format( type(self).__name__, name)) def __setattr__(self, name, value) -> None: if isinstance(value, Parameter): parameters = self.__dict__.get('_parameters') parameters[name] = value else: modules = self.__dict__.get('_modules') if isinstance(value, Module): modules[name] = value else: super().__setattr__(name, value) def named_modules(self, memo=None, prefix='', remove_duplicate=True): if memo is None: memo = set() if self not in memo: if remove_duplicate: memo.add(self) yield prefix, self for name, module in self._modules.items(): if module is None: continue submodule_prefix = prefix + ('.' if prefix else '') + name for m in module.named_modules(memo, submodule_prefix, remove_duplicate): yield m def named_children(self): memo = set() for name, module in self._modules.items(): if module is not None and module not in memo: memo.add(module) yield name, module def _named_members(self, get_members_fn, prefix='', recurse=True): memo = set() modules = self.named_modules(prefix=prefix) if recurse else [(prefix, self)] for module_prefix, module in modules: members = get_members_fn(module) for k, v in members: if v is None or v in memo: continue memo.add(v) name = module_prefix + ('.' if module_prefix else '') + k yield name, v def parameter(self, recurse=True): for name, param in self.named_parameters(): yield param def named_parameters(self, prefix='', recurse=True): gen = self._named_members(lambda module: module._parameters.items(), prefix=prefix, recurse=recurse) for elem in gen: yield elem def children(self): for _, module in self.named_children(): yield module def apply(self, fn): for module in self.children(): module.apply(fn) fn(self) return self def _get_name(self): return self.__class__.__name__ def register_parameter(self, name, param): if param is None: self._parameters[name] = None else: self._parameters[name] = param def register_network_output(self, name, value): self._network_outputs[name] = value def named_network_outputs(self): for name, module in self.named_modules(): for n, output in module._network_outputs.items(): yield name + ('.' if name else '') + n, output def update_parameters(self, torch_module): m = {k: v for k, v in self.named_parameters()} tm = {k: v for k, v in torch_module.named_parameters()} assert sorted(m.keys()) == sorted( tm.keys() ), 'The parameter names of the tensorrt-llm module must be the same with the torch module' for k, v in self.named_parameters(): v.value = tm[k].detach().cpu().numpy()
class ModuleList(Module): def __init__(self, modules) -> None: super(ModuleList, self).__init__() offset = len(self) for i, module in enumerate(modules): self._modules[str(offset + i)] = module def _get_abs_string_index(self, idx): """Get the absolute index for the list of modules""" idx = operator.index(idx) if not (-len(self) <= idx < len(self)): raise IndexError('index {} is out of range'.format(idx)) if idx < 0: idx += len(self) return str(idx) def __getitem__(self, idx): if isinstance(idx, slice): return self.__class__(list(self._modules.values())[idx]) else: return self._modules[self._get_abs_string_index(idx)] def __setitem__(self, idx, module) -> None: idx = self._get_abs_string_index(idx) return setattr(self, str(idx), module) def __len__(self): return len(self._modules)
profiler.py
class Timer: def __init__(self): self._start_times = {} self._total_elapsed_times = {} def start(self, tag): self._start_times[tag] = time.time() def stop(self, tag) -> float: elapsed_time = time.time() - self._start_times[tag] if tag not in self._total_elapsed_times: self._total_elapsed_times[tag] = 0 self._total_elapsed_times[tag] += elapsed_time return elapsed_time def elapsed_time_in_sec(self, tag) -> float: if tag not in self._total_elapsed_times: return None return self._total_elapsed_times[tag] def reset(self): self._start_times.clear() self._total_elapsed_times.clear() def summary(self): print('Profile Results') for tag, elapsed_time in self._total_elapsed_times.items(): print(f' - {tag.ljust(30, ".")}: {elapsed_time:.6f} (sec)')_default_timer = Timer()def start(tag): _default_timer.start(tag)def stop(tag): return _default_timer.stop(tag)def elapsed_time_in_sec(tag): return _default_timer.elapsed_time_in_sec(tag)def reset(): _default_timer.reset()def summary(): _default_timer.summary()
参考文献
-
• https://github.com/NVIDIA/TensorRT-LLM/blob/v0.5.0/tensorrt_llm/module.py -
• https://github.com/NVIDIA/TensorRT-LLM/blob/v0.5.0/tensorrt_llm/parameter.py -
• https://github.com/NVIDIA/TensorRT-LLM/blob/v0.5.0/tensorrt_llm/functional.py -
• https://github.com/NVIDIA/TensorRT-LLM/blob/v0.5.0/tensorrt_llm/profiler.py

夜雨聆风