TensorRT-LLM 0.5.0 源码之八
activation.py
class Mish(Module):
def forward(self, input):
return input * tanh(softplus(input, beta=1.0, threshold=20.0))
cast.py
class Cast(Module):
def __init__(self, output_dtype: str = 'float32') -> None:
super().__init__()
assert output_dtype in ('float32', 'float16', 'bfloat16', 'bool',
'int32', 'int8'), TypeError(
"%s is not supported" % output_dtype)
self.output_dtype = output_dtype
def forward(self, x):
return cast(x, self.output_dtype)
def cast(input: Tensor, dtype: Union[str, trt.DataType]):
'''
Add a cast operation.
For an input tensor of type INT8, this function sets the dynamic range of
the input to [-127, 127] for automatic dequantization. For a cast into
INT8, that function sets the dynamic range of the output to [-127, 127] for
automatic quantization.
Parameters:
input : Tensor
The input tensor on which the cast is applied.
dtype : str or trt.DataType
The data type of the output tensor after the cast. When 'dtype' is
provided as a string, it must be a name amongst the valid names.
See _str_to_trt_dtype_dict in _utils.py for a list of supported
types and type names.
Returns:
The tensor produced by the inserted layer.
'''
if isinstance(dtype, str):
cvt_dtype = str_dtype_to_trt(dtype)
elif isinstance(dtype, trt.DataType):
cvt_dtype = dtype
else:
raise TypeError("%s is not supported" % type(dtype))
layer = default_trtnet().add_cast(input.trt_tensor, cvt_dtype)
if not default_net().strongly_typed:
layer.set_output_type(0, cvt_dtype)
output = _create_tensor(layer.get_output(0), layer)
if input.dtype == str_dtype_to_trt('int8'):
layer.get_input(0).set_dynamic_range(-127, 127)
if cvt_dtype == str_dtype_to_trt('int8'):
layer.get_output(0).set_dynamic_range(-127, 127)
return output
pooling.py
class AvgPool2d(Module):
def __init__(self,
kernel_size: Tuple[int],
stride: Optional[Tuple[int]] = None,
padding: Optional[Tuple[int]] = (0, 0),
ceil_mode: bool = False,
count_include_pad: bool = True) -> None:
super().__init__()
self.kernel_szie = kernel_size
self.stride = stride
self.padding = padding
self.ceil_mode = ceil_mode
self.count_include_pad = count_include_pad
def forward(self, input):
return avg_pool2d(input, self.kernel_szie, self.stride, self.padding,
self.ceil_mode, self.count_include_pad)
def avg_pool2d(input: Tensor,
kernel_size: Tuple[int],
stride: Optional[Tuple[int]] = None,
padding: Optional[Tuple[int]] = (0, 0),
ceil_mode: bool = False,
count_include_pad: bool = True) -> Tensor:
##
## TODO: Document that function!
##
assert not input.is_dynamic() # [B, C, H, W] or [B, C, T] = [B, H, W]
ndim = input.ndim()
if ndim == 3:
input = expand_dims(input, 0) # [1, B, C, T]
layer = default_trtnet().add_pooling(input.trt_tensor,
trt.PoolingType.AVERAGE, kernel_size)
if stride is None:
layer.stride = kernel_size
else:
layer.stride = stride
output = _create_tensor(layer.get_output(0), layer)
if ndim == 3:
return output.view(
concat([output.size(1),
output.size(2),
output.size(3)])) # [B, C, T] = [B, H, W]
return output
def expand_dims(input: Tensor, dim: Union[int, Sequence[int]]) -> Tensor:
'''
Add an operation to expand the tensor shape with singleton dimensions.
That function adds a tensorrt.IShuffleLayer to the network. Given an 'input'
of rank N and a sequence of M dimensions, the output tensor produced by
this operation (when executed by TensorRT) will have a rank of N+M. Singleton
dimensions will be inserted at the different positions in 'dim'.
The pseudo-code for that operation is:
new_shape, ii = [], 0
for jj in range(input.rank() + len(dim)):
new_shape.append(1 if jj in dims else input.shape[ii++])
For example, for a tensor of shape [3, 4, 1, 5]
expand_dims(input, [0, 2])
will produce a tensor of shape [1, 3, 1, 4, 1, 5].
Parameters:
input : Tensor
The input tensor to expand.
dim : Union[int, Sequence[int]]
The positions in the output tensor where to insert singleton
dimensions.
Returns:
The tensor produced by the shuffle layer.
'''
if isinstance(dim, int):
dim = (dim, )
out_ndim = len(dim) + input.ndim()
input_shape = shape(input)
out_shapes = []
j = 0
for i in range(out_ndim):
if i in dim:
out_shapes.append(1)
else:
out_shapes.append(gather(input_shape, 0, j))
j = j + 1
out_shape = concat(out_shapes)
return view(input, out_shape)
def gather(input: Tensor, dim: int, indices: Union[Tensor, int]) -> Tensor:
'''
Add an operation to gather elements from a tensor.
That function implements the GatherElements operator from the ONNX
specification as described in
https://github.com/onnx/onnx/blob/main/docs/Operators.md#GatherElements
The input and indices arguments must have the same rank >= 1. The operation
will produce a tensor with the same shape as the indices tensor. The axis
is the dimension to gather on.
As shown in the ONNX description, for a 3D tensor, the output is:
out[i][j][k] = input[indices[i][j][k]][j][k] if axis = 0,
out[i][j][k] = input[i][indices[i][j][k]][k] if axis = 1,
out[i][j][k] = input[i][j][indices[i][j][k]] if axis = 2.
For example,
gather([[4, 2], [5, 3]], 0, [[1, 0], [0, 1]])
will produce [[5, 2], [4, 3]].
gather([[1, 2, 3], [4, 5, 6], 1, [[1], [0]])
will produce [[2], [4]]. See the ONNX documentation for more examples.
That operation maps to the TensorRT IGatherLayer.
Parameters:
input : Tensor
The input tensor to gather elements from.
dim : int
The dimension to gather on.
indices : Union[Tensor, int]
The positions in the 'dim' dimension to gather from.
Returns:
The tensor containing the gathered elements. It has the same shape as
the indices tensor.
'''
if isinstance(indices, int):
indices = constant(int32_array([indices]))
# The input and indices tensors must have the same rank.
assert input.rank() == indices.rank()
layer = default_trtnet().add_gather_v2(input.trt_tensor,
indices.trt_tensor,
mode=trt.GatherMode.ELEMENT)
if dim < 0:
dim = input.ndim() + dim
layer.axis = dim
return _create_tensor(layer.get_output(0), layer)
def view(input: Tensor,
shape: Union[Tensor, Sequence[int]],
zero_is_placeholder: bool = True) -> Tensor:
'''
Add an operation to create a view of a tensor.
That operation adds a tensorrt.IShuffleLayer to the network. If the 'shape'
parameter is a Tensor, that view is dynamic. Otherwise, it is a static
view.
Note that TensorRT limits the number of inferred dimensions to 1. It means
that the shape sequence or tensor cannot contain more than one -1. This
function enforces that constraint and will assert if it is not respected.
Parameters:
input : Tensor
The input tensor to transpose.
shape : Union[Tensor, Sequence[int]]
The shape of the new tensor.
zero_is_placeholder : bool
When that parameter is True, the 0s in 'shape' are replaced by the
sizes of the corresponding dimensions from the 'input'. Otherwise,
the dimensions corresponding to 0s are shrinked.
Returns:
The tensor produced by the view/shuffle layer.
'''
# TensorRT demands that at most one dimension is permitted to be specified as -1
def assert_no_more_than_one_inferred_dim(list):
inferred_dim_list = [i for i in list if i == -1]
assert len(inferred_dim_list) <= 1
layer = default_trtnet().add_shuffle(input.trt_tensor)
layer.zero_is_placeholder = zero_is_placeholder
if isinstance(shape, Tensor):
assert_no_more_than_one_inferred_dim(shape.shape)
layer.set_input(1, shape.trt_tensor)
elif isinstance(shape, (list, tuple)):
assert_no_more_than_one_inferred_dim(shape)
layer.reshape_dims = tuple(shape)
else:
raise TypeError("%s is not supported" % type(shape))
return _create_tensor(layer.get_output(0), layer)
def concat(inputs: Sequence[Union[Tensor, int]], dim: int = 0) -> Tensor:
'''
Add an operation to concatenate tensors.
The function creates an operation that concatenates the tensors from the
sequence 'inputs'. The concatenation is done along the dimension 'dim'.
All the tensors in 'inputs' must have the same shape expect for the
dimension 'dim'.
for ii in range(inputs[0].rank()):
assert (ii == dim) or all(inp.shape[ii] == inputs[0].shape[ii] for inp in inputs)
The shape of the output tensor is defined as:
for ii in range(inputs[0].rank()):
# Same size as all the inputs in dimension ii != dim.
output.shape[ii] = inputs[0].shape[ii]
# Sum of the sizes in the different inputs in dimension 'dim'.
if ii == dim:
for jj in range(1, len(inputs)):
output.shape[ii] += inputs[jj].shape[ii]
For example, given a sequence of two 2D tensors [[0, 1], [2, 3]] and
[[4, 5], [6, 7]] both of shape [2, 2],
concat(inputs, 0)
will produce [[[0, 1], [2, 3]], [[4, 5], [6, 7]]] of shape [4, 2] and
concat(inputs, 1)
will produce [[0, 1, 4, 5], [2, 3, 6, 7]] of shape [2, 4].
Parameters:
inputs : Sequence[Union[Tensor, int]]
The sequence of tensors to concatenate. For integers, that function
creates constant tensors.
dim : int
The dimension in which the concatenation is performed.
Returns:
A tensor that contains the concatenation of the tensors.
'''
tmp = []
for i in inputs:
if isinstance(i, int):
tmp.append(constant(int32_array([i])))
elif i.rank() == 0:
tmp.append(i.view([1]))
else:
tmp.append(i)
layer = default_trtnet().add_concatenation([i.trt_tensor for i in tmp])
layer.axis = dim
return _create_tensor(layer.get_output(0), layer)
normalization.py
class LayerNorm(Module):
def __init__(self,
normalized_shape,
eps=1e-05,
elementwise_affine=True,
dtype=None):
super().__init__()
if isinstance(normalized_shape, int):
normalized_shape = (normalized_shape, )
self.normalized_shape = tuple(normalized_shape)
self.elementwise_affine = elementwise_affine
if self.elementwise_affine:
self.weight = Parameter(shape=self.normalized_shape, dtype=dtype)
self.bias = Parameter(shape=self.normalized_shape, dtype=dtype)
else:
self.register_parameter('weight', None)
self.register_parameter('bias', None)
self.eps = eps
def forward(self, x):
weight = None if self.weight is None else self.weight.value
bias = None if self.bias is None else self.bias.value
return layer_norm(x, self.normalized_shape, weight, bias, self.eps)
def layer_norm(input: Tensor,
normalized_shape: Union[int, Tuple[int]],
weight: Optional[Tensor] = None,
bias: Optional[Tensor] = None,
eps: float = 1e-05,
use_diff_of_squares: bool = True) -> Tensor:
'''
Add a layer-norm operation on a tensor.
That operation applies the layer-normalization to its input tensor. In its
simplest form, for large language models, the 'normalized_shape' should be
set to the hidden dimension of the activation tensor. Otherwise, it is the
shape of the normalized fraction of the tensor (starting from the
right-most dimension).
The 'weight' tensor corresponds to 'gamma' in the layer-norm formula and
'bias' is 'beta'. The 'eps' value is added to the variance before computing
the squared-root.
This implementation (when using the plugin) supports an additional flag to
enable/disable the use of a difference of squares ('Var = Mean(X^2) -
Mean(X)^2').
Parameters:
input : Tensor
The tensor to normalize.
normalized_shape : Union[int, Tuple[int]]
The shape of the sub-tensor that is normalized. Use 'hidden_dim' to
normalize the inner-most dimension of an activation tensor in LLMs.
weight : Optional[Tensor] = None
The 'gamma' term in layer-norm. Its shape must be
'normalized_shape'.
bias : Optional[Tensor] = None
The 'beta' term in layer-norm. Its shape must be
'normalized_shape'.
eps : float
The epsilon term to be added to the variance in the squared-root.
use_diff_of_squares : bool
Does the plugin use the difference of squares to compute the
variance?
Returns:
The output tensor of that operation.
'''
if not default_net().plugin_config.layernorm_plugin:
input, weight = broadcast_helper(input, weight)
input, bias = broadcast_helper(input, bias)
if isinstance(normalized_shape, int): # FIXME: better way?
axis = input.ndim() - 1
else:
axis = input.ndim() - len(normalized_shape)
axes_mask = 0
for i in range(axis, input.ndim()):
axes_mask |= 1 << i
layer = default_trtnet().add_normalization(input.trt_tensor,
weight.trt_tensor,
bias.trt_tensor, axes_mask)
layer.epsilon = eps
return _create_tensor(layer.get_output(0), layer)
else:
plg_creator = trt.get_plugin_registry().get_plugin_creator(
'Layernorm', '1', TRT_LLM_PLUGIN_NAMESPACE)
assert plg_creator is not None
eps = trt.PluginField("eps", np.array(eps, dtype=np.float32),
trt.PluginFieldType.FLOAT32)
use_diff_of_squares = trt.PluginField(
"use_diff_of_squares",
np.array([int(use_diff_of_squares)], dtype=np.int32),
trt.PluginFieldType.INT32)
p_dtype = default_net().plugin_config.layernorm_plugin
pf_type = trt.PluginField(
"type_id", np.array([int(str_dtype_to_trt(p_dtype))], np.int32),
trt.PluginFieldType.INT32)
pfc = trt.PluginFieldCollection([eps, use_diff_of_squares, pf_type])
layernorm_plug = plg_creator.create_plugin("layernorm", pfc)
normalized_shape = [normalized_shape] if isinstance(
normalized_shape, int) else normalized_shape
if weight is None:
weight = constant(
np.ones(normalized_shape, dtype=str_dtype_to_np(p_dtype)))
if bias is None:
bias = constant(
np.zeros(normalized_shape, dtype=str_dtype_to_np(p_dtype)))
plug_inputs = [input.trt_tensor, weight.trt_tensor, bias.trt_tensor]
layer = default_trtnet().add_plugin_v2(plug_inputs, layernorm_plug)
return _create_tensor(layer.get_output(0), layer)
class RmsNorm(Module):
def __init__(self,
normalized_shape,
eps=1e-06,
elementwise_affine=True,
dtype=None):
super().__init__()
if isinstance(normalized_shape, int):
normalized_shape = (normalized_shape, )
self.normalized_shape = tuple(normalized_shape)
self.elementwise_affine = elementwise_affine
if self.elementwise_affine:
self.weight = Parameter(shape=self.normalized_shape, dtype=dtype)
else:
self.register_parameter('weight', None)
self.eps = eps
def forward(self, x):
weight = None if self.weight is None else self.weight.value
return rms_norm(x, self.normalized_shape, weight, self.eps)
def rms_norm(input: Tensor,
normalized_shape: Union[int, Tuple[int]],
weight: Optional[Tensor] = None,
eps: float = 1e-06) -> Tensor:
'''
Add a RMS norm operation on a tensor.
That operation applies the rms-normalization to its input tensor. In its
simplest form, for large language models, the 'normalized_shape' should be
set to the hidden dimension of the activation tensor. Otherwise, it is the
shape of the normalized fraction of the tensor (starting from the
right-most dimension).
The 'weight' tensor corresponds to 'gamma' in the rms-norm formula.
The 'eps' value is added to the variance before computing the squared-root.
Parameters:
input: Tensor
The tensor to normalize.
normalized_shape : Union[int, Tuple[int]]
The shape of the sub-tensor that is normalized. Use 'hidden_dim' to
normalize the inner-most dimension of an activation tensor in LLMs.
weight : Optional[Tensor] = None
The 'gamma' term in layer-norm. Its shape must be
'normalized_shape'.
eps : float
The epsilon term to be added to the variance in the squared-root.weig
Returns:
The output tensor of that operation.
'''
if not default_net().plugin_config.rmsnorm_plugin:
normalized_shape = [normalized_shape] if isinstance(
normalized_shape, int) else normalized_shape
dim = tuple([-i - 1 for i in range(len(normalized_shape))])
if default_net().strongly_typed:
input_dtype = input.dtype
fp32_input = cast(input, "float32")
varx = pow(fp32_input, 2.0)
varx = varx.mean(dim, keepdim=True)
denom = varx + eps
denom = denom.sqrt()
fp32_y = fp32_input / denom
y = cast(fp32_y, input_dtype)
else:
with precision("float32"):
varx = pow(input, 2.0)
varx = varx.mean(dim, keepdim=True)
denom = varx + eps
denom = denom.sqrt()
y = input / denom
if weight is not None:
y = y * weight
return y
else:
# TODO remove the plugin version if rmsnorm operation can be offloaded
# to Myelin.
plg_creator = trt.get_plugin_registry().get_plugin_creator(
'Rmsnorm', '1', TRT_LLM_PLUGIN_NAMESPACE)
assert plg_creator is not None
eps = trt.PluginField("eps", np.array(eps, dtype=np.float32),
trt.PluginFieldType.FLOAT32)
p_dtype = default_net().plugin_config.rmsnorm_plugin
pf_type = trt.PluginField(
"type_id", np.array([int(str_dtype_to_trt(p_dtype))], np.int32),
trt.PluginFieldType.INT32)
pfc = trt.PluginFieldCollection([eps, pf_type])
rmsnorm_plug = plg_creator.create_plugin("rmsnorm", pfc)
normalized_shape = [normalized_shape] if isinstance(
normalized_shape, int) else normalized_shape
if weight is None:
weight = constant(
np.zeros(normalized_shape, dtype=str_dtype_to_np(p_dtype)))
plug_inputs = [input.trt_tensor, weight.trt_tensor]
layer = default_trtnet().add_plugin_v2(plug_inputs, rmsnorm_plug)
return _create_tensor(layer.get_output(0), layer)
class GroupNorm(Module):
def __init__(self,
num_groups,
num_channels,
eps=1e-05,
affine=True,
dtype=None):
super().__init__()
if num_channels % num_groups != 0:
raise ValueError('num_channels must be divisible by num_groups')
self.num_groups = num_groups
self.num_channels = num_channels
self.affine = affine
if self.affine:
self.weight = Parameter(shape=(self.num_channels, ), dtype=dtype)
self.bias = Parameter(shape=(self.num_channels, ), dtype=dtype)
else:
self.register_parameter('weight', None)
self.register_parameter('bias', None)
self.eps = eps
def forward(self, x):
weight = None if self.weight is None else self.weight.value
bias = None if self.bias is None else self.bias.value
return group_norm(x, self.num_groups, weight, bias, self.eps)
def group_norm(input: Tensor,
num_groups: int,
weight: Optional[Tensor] = None,
bias: Optional[Tensor] = None,
eps: float = 1e-05):
##
## TODO: Document that function!
##
assert not input.is_dynamic(1) # [B, C, T, D] or [B, C, ...]
num_channels = input.size()[1]
ndim = input.ndim()
old_shape = shape(input)
new_shape = concat([
input.size(0),
num_groups,
num_channels // num_groups,
] + [input.size(i) for i in range(2, ndim)])
x = input.view(new_shape) # [B, G, C//G, ...]
reduce_dim = tuple(range(2, ndim + 1)) # (2, ..., ndim)
ux = x.mean(reduce_dim, keepdim=True) # [B, G, 1, 1, 1] or [B, G, 1, ..., 1]
numerator = x - ux
varx = numerator * numerator
varx = varx.mean(reduce_dim, keepdim=True)
denom = varx + eps
denom = denom.sqrt()
y = numerator / denom
y = y.view(old_shape)
new_shape = concat([num_channels] + [1 for _ in range(2, ndim)]) #[C,1,...1]
if weight is not None:
y = y * weight.view(new_shape)
if bias is not None:
y = y + bias.view(new_shape)
return y # [B, C, T, D] or [B, C, ...]
conv.py
class Conv2d(Module):
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: Tuple[int, int],
stride: Tuple[int, int] = (1, 1),
padding: Tuple[int, int] = (0, 0),
dilation: Tuple[int, int] = (1, 1),
groups: int = 1,
bias: bool = True,
padding_mode: str = 'zeros', # TODO: refine this type
dtype=None) -> None:
super().__init__()
if groups <= 0:
raise ValueError('groups must be a positive integer')
if in_channels % groups != 0:
raise ValueError('in_channels must be divisible by groups')
if out_channels % groups != 0:
raise ValueError('out_channels must be divisible by groups')
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = groups
self.padding_mode = padding_mode
self.weight = Parameter(shape=(out_channels, in_channels // groups,
*kernel_size),
dtype=dtype)
if bias:
self.bias = Parameter(shape=(out_channels, ), dtype=dtype)
else:
self.register_parameter('bias', None)
def forward(self, input):
return conv2d(input, self.weight.value,
None if self.bias is None else self.bias.value,
self.stride, self.padding, self.dilation, self.groups)
class ConvTranspose2d(Module):
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: Tuple[int, int],
stride: Tuple[int, int] = (1, 1),
padding: Tuple[int, int] = (0, 0),
output_padding: Tuple[int, int] = (0, 0),
dilation: Tuple[int, int] = (1, 1),
groups: int = 1,
bias: bool = True,
padding_mode: str = 'zeros', # TODO: refine this type
dtype=None) -> None:
super().__init__()
if groups <= 0:
raise ValueError('groups must be a positive integer')
if in_channels % groups != 0:
raise ValueError('in_channels must be divisible by groups')
if out_channels % groups != 0:
raise ValueError('out_channels must be divisible by groups')
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.output_padding = output_padding
self.dilation = dilation
self.groups = groups
self.padding_mode = padding_mode
self.weight = Parameter(shape=(in_channels, out_channels // groups,
*kernel_size),
dtype=dtype)
if bias:
self.bias = Parameter(shape=(out_channels, ), dtype=dtype)
else:
self.register_parameter('bias', None)
# `_output_padding` 方法的主要作用是根据期望的输出尺寸自动计算需要的输出填充值,或者在没有指定输出尺寸时返回预设的输出填充值
def _output_padding(self,
input, # 输入张量
output_size, # 期望的输出尺寸(可选)
stride,
padding,
kernel_size,
num_spatial_dims: int, # 空间维度数(2D卷积为2)
dilation=None): # 膨胀率
if output_size is None:
# 如果用户没有指定output_size,直接返回预设的self.output_padding值
ret = self.output_padding
else:
# 判断输入张量是否包含batch维度
has_batch_dim = input.dim() == num_spatial_dims + 2
num_non_spatial_dims = 2 if has_batch_dim else 1
# 调整output_size,移除非空间维度
# 确保output_size只包含空间维度(高度和宽度)
if len(output_size) == num_non_spatial_dims + num_spatial_dims:
output_size = output_size[num_non_spatial_dims:]
if len(output_size) != num_spatial_dims:
raise ValueError(
"ConvTranspose{}D: for {}D input, output_size must have {} or {} elements (got {})"
.format(num_spatial_dims, input.dim(), num_spatial_dims,
num_non_spatial_dims + num_spatial_dims,
len(output_size)))
# 这里使用转置卷积的输出尺寸计算公式来计算每个维度的最小和最大可能输出尺寸
# H_out = (H_in - 1) × stride - 2 × padding + dilation × (kernel_size - 1) + 1
min_sizes = []
max_sizes = []
for d in range(num_spatial_dims):
dim_size = (
(input.size(d + num_non_spatial_dims) - 1) * stride[d] -
2 * padding[d] +
(dilation[d] if dilation is not None else 1) *
(kernel_size[d] - 1) + 1)
min_sizes.append(dim_size)
max_sizes.append(min_sizes[d] + stride[d] - 1)
# 确保请求的output_size在理论上的最小和最大值之间
for i in range(len(output_size)):
size = output_size[i]
min_size = min_sizes[i]
max_size = max_sizes[i]
if size < min_size or size > max_size:
raise ValueError((
"requested an output size of {}, but valid sizes range "
"from {} to {} (for an input of {})").format(
output_size, min_sizes, max_sizes,
input.size()[2:]))
# 计算需要添加的output_padding值,即期望尺寸与最小可能尺寸的差值
res = []
for d in range(num_spatial_dims):
res.append(output_size[d] - min_sizes[d])
ret = res
return ret
def forward(self, input, output_size=None):
num_spatial_dims = 2 # 2D卷积
# 计算输出填充
output_padding = self._output_padding(input, output_size, self.stride,
self.padding, self.kernel_size,
num_spatial_dims, self.dilation)
return conv_transpose2d(input, self.weight.value,
None if self.bias is None else self.bias.value,
self.stride, self.padding, output_padding,
self.dilation, self.groups)
-
1. 解决尺寸歧义问题
转置卷积的一个特点是相同的输入尺寸在不同参数下可能产生相同的输出尺寸。_output_padding方法通过允许用户直接指定期望的输出尺寸来解决这种多对一映射的歧义问题。 -
2. 确保尺寸匹配
在编码器-解码器结构(如U-Net)中,精确的尺寸匹配至关重要。这个方法确保了解卷积后的特征图与编码器中的对应特征图尺寸完全一致,便于后续的拼接或相加操作。 -
3. 输出尺寸计算公式
该方法基于的标准转置卷积输出尺寸公式为:
H_out = (H_in - 1) × stride - 2 × padding + dilation × (kernel_size - 1) + output_padding + 1
-
4. 实际应用示例
假设在U-Net模型中,需要确保解码器的输出与编码器的输入尺寸匹配:
# 编码器中的卷积
conv = nn.Conv2d(1, 1, 3, stride=2, padding=1)
# 输入尺寸: 7x7 → 输出尺寸: 4x4
# 解码器中的转置卷积
deconv = nn.ConvTranspose2d(1, 1, 3, stride=2, padding=1)
# 正常情况: 4x4 → 7x7
# 但如果希望输出8x8,就需要使用output_padding
deconv_with_padding = nn.ConvTranspose2d(1, 1, 3, stride=2, padding=1, output_padding=1)
# 结果: 4x4 → 8x8
这种方法为深度学习中的上采样操作提供了精确的尺寸控制,特别在需要尺寸对称的架构中非常有用。
参考文献
-
• https://github.com/NVIDIA/TensorRT-LLM/blob/v0.5.0/tensorrt_llm/functional.py -
• https://github.com/NVIDIA/TensorRT-LLM/blob/v0.5.0/tensorrt_llm/layers/activation.py -
• https://www.mindspore.cn/tutorials/zh-CN/r2.7.1/model_infer/ms_infer/ms_infer_parallel_infer.html -
• https://github.com/NVIDIA/TensorRT-LLM/blob/v0.5.0/tensorrt_llm/layers/embedding.py -
• https://github.com/NVIDIA/TensorRT-LLM/blob/v0.5.0/tensorrt_llm/layers/linear.py

夜雨聆风