乐于分享
好东西不私藏

TensorRT-LLM 0.5.0 源码之十四

TensorRT-LLM 0.5.0 源码之十四

LLaMADecoderLayer

class LLaMADecoderLayer(Module):

    def
 __init__(self,
                 layer_id,
                 hidden_size,
                 num_attention_heads,
                 num_kv_heads=None,
                 max_position_embeddings=2048,
                 dtype=None,
                 attention_mask_type=AttentionMaskType.causal,
                 hidden_act='silu',
                 position_embedding_type=PositionEmbeddingType.rope_gpt_neox,
                 rotary_base=10000.0,
                 rotary_scaling=None,
                 mlp_hidden_size=None,
                 tp_group=None,
                 tp_size=1,
                 quant_mode=QuantMode(0),
                 rms_norm_eps=1e-06
):
        super
().__init__()
        self
._layer_id = layer_id  # useful for debugging
        # used for quantizing model

        self
.hidden_size = hidden_size
        self
.num_attention_heads = num_attention_heads
        self
.num_kv_heads = num_kv_heads
        self
.max_position_embeddings = max_position_embeddings
        self
.dtype = dtype
        self
.hidden_act = hidden_act
        self
.tp_group = tp_group
        self
.tp_size = tp_size
        self
.mlp_hidden_size = mlp_hidden_size
        self
.attention_mask_type = attention_mask_type
        self
.position_embedding_type = position_embedding_type
        self
.input_layernorm = RmsNorm(normalized_shape=hidden_size,
                                       eps=rms_norm_eps,
                                       dtype=dtype)

        self
.attention = Attention(
            hidden_size,
            num_attention_heads,
            num_kv_heads,
            max_position_embeddings,
            dtype=dtype,
            attention_mask_type=AttentionMaskType.causal,
            bias=False,
            position_embedding_type=position_embedding_type,
            rotary_embedding_base=rotary_base,
            rotary_embedding_scaling=rotary_scaling,
            tp_group=tp_group,
            tp_size=tp_size,
            use_int8_kv_cache=quant_mode.has_int8_kv_cache(),
            quant_mode=quant_mode,
            instance_id=2 * layer_id,
        )
        if
 not mlp_hidden_size:
            self
.mlp_hidden_size = hidden_size * 4
        self
.mlp = GatedMLP(hidden_size=hidden_size,
                            ffn_hidden_size=self.mlp_hidden_size,
                            hidden_act=hidden_act,
                            dtype=dtype,
                            bias=False,
                            tp_group=tp_group,
                            tp_size=tp_size,
                            quant_mode=quant_mode,
                            instance_id=2 * layer_id + 1)
        self
.post_layernorm = RmsNorm(normalized_shape=hidden_size,
                                      eps=rms_norm_eps,
                                      dtype=dtype)

    def
 forward(self,
                hidden_states,
                attention_mask=None,
                use_cache=False,
                kv_cache_params=None,
                attention_params=None,
                all_reduce_workspace=None
):
        residual = hidden_states
        hidden_states = self.input_layernorm(hidden_states)
        if
 self._layer_id == 0:
            self
.register_network_output(f"norm0", hidden_states)

        attention_output = self.attention(hidden_states,
                                          attention_mask=attention_mask,
                                          use_cache=use_cache,
                                          kv_cache_params=kv_cache_params,
                                          attention_params=attention_params,
                                          workspace=all_reduce_workspace)

        if
 use_cache:
            attention_output, presents = attention_output
        if
 self._layer_id == 0:
            self
.register_network_output(f"attn", attention_output)

        hidden_states = residual + attention_output

        residual = hidden_states
        hidden_states = self.post_layernorm(hidden_states)
        if
 self._layer_id == 0:
            self
.register_network_output(f"norm1", hidden_states)

        hidden_states = self.mlp(hidden_states, all_reduce_workspace)
        if
 self._layer_id == 0:
            self
.register_network_output(f"mlp", hidden_states)

        hidden_states = residual + hidden_states
        if
 use_cache:
            return
 (hidden_states, presents)
        return
 hidden_states

LLaMAModel

模型并行在此实现。

class LLaMAModel(Module):

    def
 __init__(self,
                 num_layers,
                 num_heads,
                 num_kv_heads,
                 hidden_size,
                 vocab_size,
                 hidden_act,
                 max_position_embeddings,
                 dtype,
                 mlp_hidden_size=None,
                 position_embedding_type=PositionEmbeddingType.rope_gpt_neox,
                 rotary_base=10000.0,
                 rotary_scaling=None,
                 mapping=Mapping(),
                 quant_mode=QuantMode(0),
                 use_parallel_embedding=False,
                 embedding_sharding_dim=0,
                 rms_norm_eps=1e-06
):
        super
().__init__()
        self
.mapping = mapping

        if
 self.mapping.is_first_pp_rank():
            self
.vocab_embedding = Embedding(
                num_embeddings=vocab_size,
                embedding_dim=hidden_size,
                dtype=dtype,
                tp_size=mapping.tp_size if use_parallel_embedding else 1,
                tp_group=mapping.tp_group if use_parallel_embedding else None,
                sharding_dim=embedding_sharding_dim,
                tp_rank=mapping.tp_rank)

        self
.layers = ModuleList([
            LLaMADecoderLayer(layer_id=i,
                              hidden_size=hidden_size,
                              num_attention_heads=num_heads,
                              num_kv_heads=num_kv_heads,
                              max_position_embeddings=max_position_embeddings,
                              dtype=dtype,
                              hidden_act=hidden_act,
                              mlp_hidden_size=mlp_hidden_size,
                              position_embedding_type=position_embedding_type,
                              rotary_base=rotary_base,
                              rotary_scaling=rotary_scaling,
                              tp_group=mapping.tp_group,
                              tp_size=mapping.tp_size,
                              quant_mode=quant_mode,
                              rms_norm_eps=rms_norm_eps)
            for
 i in self.get_transformer_layers(self.mapping, num_layers)
        ])

        if
 self.mapping.is_last_pp_rank():
            self
.ln_f = RmsNorm(normalized_shape=hidden_size,
                                eps=rms_norm_eps,
                                dtype=dtype)

    def
 forward(self,
                input_ids,
                position_ids=None,
                use_cache=False,
                attention_mask=None,
                kv_cache_params=None,
                attention_params=None,
                hidden_states=None,
                all_reduce_workspace=None
):

        if
 kv_cache_params.past_key_value is None:
            tuple
([None] * len(self.layers))

        if
 use_cache:
            presents = []

        if
 self.mapping.is_first_pp_rank():
            hidden_states = self.vocab_embedding(input_ids)
        else
:
            hidden_states = recv(hidden_states, self.mapping.prev_pp_rank())
        self
.register_network_output(f"embd", hidden_states)

        for
 layer, past, pointer in zip(
                self
.layers, kv_cache_params.past_key_value,
                kv_cache_params.kv_cache_block_pointers):
            hidden_states = layer(
                hidden_states,
                use_cache=use_cache,
                attention_mask=attention_mask,
                kv_cache_params=KeyValueCacheParams(
                    past_key_value=[past],
                    host_past_key_value_lengths=kv_cache_params.
                    host_past_key_value_lengths,
                    kv_cache_block_pointers=[pointer],
                    cache_indirection=kv_cache_params.cache_indirection),
                attention_params=attention_params,
                all_reduce_workspace=all_reduce_workspace)

            if
 use_cache:
                presents.append(hidden_states[1])
                hidden_states = hidden_states[0]

        if
 self.mapping.is_last_pp_rank():
            hidden_states = self.ln_f(hidden_states)
        else
:
            hidden_states = send(hidden_states, self.mapping.next_pp_rank())

        if
 use_cache:
            return
 (hidden_states, tuple(presents))
        return
 hidden_states

GenerationMixin

class GenerationMixin:

    def
 get_transformer_layers(self, mapping, num_layers):
        # 提取pp_rank相应的层

        layers_per_pipeline_stage = num_layers // mapping.pp_size
        layers_range = list(
            range
(mapping.pp_rank * layers_per_pipeline_stage,
                  (mapping.pp_rank + 1) * layers_per_pipeline_stage, 1))
        return
 layers_range

    def
 prepare_basic_inputs(self,
                             max_batch_size,
                             max_beam_width,
                             max_input_len,
                             max_new_tokens,
                             num_kv_heads,
                             head_size,
                             num_layers,
                             kv_dtype,
                             remove_input_padding=False,
                             use_gpt_attention_plugin=False,
                             use_gemm_plugin=False,
                             use_custom_all_reduce=False,
                             paged_kv_cache=False,
                             tokens_per_block=64,
                             gather_all_token_logits=False,
                             dtype=None,
                             num_heads=None,
                             mapping=Mapping(),
                             max_num_tokens=None
):

        max_len = max_input_len + max_new_tokens

             # 计算上下文阶段和生成阶段的动态范围

             # 这部分为TensorRT的优化配置文件提供动态形状范围,支持不同的批处理大小和序列长度

             # cxt = context, gen = geneartion

             # each item is profile with [min, opt, max]

             # bb = batch * beam, bs = batch size

        bb_range_cxt = [1, (max_batch_size + 1) // 2, max_batch_size]
        bb_range_gen = [
            1
, (max_batch_size * max_beam_width + 1) // 2,
            max_batch_size * max_beam_width
        ]
        _bs_range = [1, (max_batch_size + 1) // 2, max_batch_size]
        _beam_width_range = [1, (max_beam_width + 1) // 2, max_beam_width]
        inlen_range_cxt = [1, (max_input_len + 1) // 2, max_input_len]
        inlen_range_gen = [1, 1, 1]
        _mask_len_ctx = [1, (max_input_len + 1) // 2, max_input_len]
        _mask_len_gen = [2, (max_len + 1) // 2 + 1, max_len + 1]
        _kv_cache_range_ctx = [0, 0, 0]
        _kv_cache_range_gen = [1, (max_len + 1) // 2, max_len]
        _max_len_range = [0, (max_len + 1) // 2, max_len]

        if
 max_num_tokens is None:
            # 未指定 max_num_tokens

            num_tokens_range_ctx = [
                1
, (max_input_len * max_batch_size + 1) // 2,
                max_input_len * max_batch_size
            ] # context all tokens
            num_tokens_range_gen = [
                1
, max_batch_size * max_beam_width,
                max_beam_width * max_batch_size
            ] # one token
        else
:
            num_tokens_range_ctx = [[
                1
, (max_num_tokens + 1) // 2, max_num_tokens
            ]]
            num_tokens_range_gen = [[
                1
, (max_num_tokens + 1) // 2, max_num_tokens
            ]]

        enable_two_optimization_profiles = False
        if
 use_gpt_attention_plugin == False or use_gemm_plugin == False:
            # in-flight batch enabled, when use_gpt_attention_plugin and remove_input_padding and paged_kv_cache

            use_in_flight_batching = use_gpt_attention_plugin and remove_input_padding and paged_kv_cache
            enable_two_optimization_profiles = not use_in_flight_batching

        if
 enable_two_optimization_profiles:
            # 非 in-flight batch时,优化两遍

            bb_range = [bb_range_cxt, bb_range_gen]
            bs_range = [_bs_range, _bs_range]
            beam_width_range = [_beam_width_range, _beam_width_range]
            inlen_range = [inlen_range_cxt, inlen_range_gen]
            mask_len_range = [_mask_len_ctx, _mask_len_gen]
            if
 use_gpt_attention_plugin:
                kv_cache_range = [_kv_cache_range_gen, _kv_cache_range_gen]
            else
:
                kv_cache_range = [_kv_cache_range_ctx, _kv_cache_range_gen]
            max_len_range = [_max_len_range, _max_len_range]
            num_tokens_range = [num_tokens_range_ctx, num_tokens_range_gen]
        else
:
            # 一遍优化

            bb_range = [bb_range_gen]
            bs_range = [_bs_range]
            beam_width_range = [_beam_width_range]
            inlen_range = [[1, 1, max_input_len]]
            mask_len_range = [[1, (max_len + 1) // 2 + 1, max_len + 1]]
            kv_cache_range = [[0, (max_len + 1) // 2, max_len]]
            max_len_range = [_max_len_range]
            if
 max_num_tokens is None:
                num_tokens_range = [[
                    1
, max_batch_size * max_beam_width,
                    max
(max_input_len * max_batch_size,
                        max_beam_width * max_batch_size)
                ]]
            else
:
                num_tokens_range = num_tokens_range_ctx

             # 输入张量配置

        input_ids = None
        position_ids = None
        hidden_states = None
        if
 remove_input_padding:
            if
 mapping.is_first_pp_rank():
                # pp_rank==0, inputs

                input_ids = Tensor(
                    name='input_ids',
                    dtype=trt.int32,
                    shape=[1, -1], # [1, num_tokens] packed
                    dim_range=OrderedDict([
                        ('batch_size_fake',
                         [1, 1] if enable_two_optimization_profiles else [1]),
                        ('num_tokens', num_tokens_range),
                    ]))
                position_ids = Tensor(
                    name='position_ids',
                    dtype=trt.int32,
                    shape=[1, -1], # [1, num_tokens] packed
                    dim_range=OrderedDict([
                        ('batch_size_fake',
                         [1, 1] if enable_two_optimization_profiles else [1]),
                        ('num_tokens', num_tokens_range),
                    ]))
            else
:
                # pp_rank > 0, hidden_states

                assert
 dtype is not None
                assert
 num_heads is not None
                hidden_states = Tensor(
                    name='hidden_states_input',
                    dtype=dtype,
                    shape=[1, -1, head_size * num_heads],
                    dim_range=OrderedDict([
                        ('batch_size_fake',
                         [1, 1] if enable_two_optimization_profiles else [1]),
                        ('num_tokens', num_tokens_range),
                        ('hidden_size',
                         [head_size * num_heads, head_size *
                          num_heads] if enable_two_optimization_profiles else
                         [head_size * num_heads]),
                    ]))

        else
:
            if
 mapping.is_first_pp_rank():
                input_ids = Tensor(name='input_ids',
                                   dtype=trt.int32,
                                   shape=[-1, -1],  # [batch_size_beam_width, sequence_length]
                                   dim_range=OrderedDict([
                                       ('batch_size_beam_width', bb_range),
                                       ('input_len', inlen_range),
                                   ]))
                position_ids = Tensor(name='position_ids',
                                      dtype=trt.int32,
                                      shape=[-1, -1],
                                      dim_range=OrderedDict([
                                          ('batch_size_beam_width', bb_range),
                                          ('input_len', inlen_range),
                                      ]))
            else
:
                assert
 dtype is not None
                assert
 num_heads is not None
                hidden_states = Tensor(
                    name='hidden_states_input',
                    dtype=dtype,
                    shape=[-1, -1, head_size * num_heads],
                    dim_range=OrderedDict([
                        ('batch_size_beam_width', bb_range),
                        ('input_len', inlen_range),
                        ('hidden_size',
                         [head_size * num_heads, head_size *
                          num_heads] if enable_two_optimization_profiles else
                         [head_size * num_heads]),
                    ]))

          # make num_kv_heads % top_size == 0

        num_kv_heads = (num_kv_heads + mapping.tp_size - 1) // mapping.tp_size
        # 获取pp_rank对应的层index

        layers_range = self.get_transformer_layers(mapping, num_layers)


        past_key_value = [] # one item per layer, used when !paged_kv_cache
        kv_cache_block_pointers_list = []  # one item per layer, used when paged_kv_cache
        if
 not paged_kv_cache:
            # linear kv cache

            for
 i in layers_range:
                kv_dim_range = OrderedDict([
                    ('batch_size_beam_width', bb_range),
                    ('kv', [2, 2] if enable_two_optimization_profiles else [2]),
                    ('num_heads', [num_kv_heads, num_kv_heads]
                     if
 enable_two_optimization_profiles else [num_kv_heads]),
                    ('past_key_len', kv_cache_range),
                    ('head_size', [head_size, head_size]
                     if
 enable_two_optimization_profiles else [head_size]),
                ])
                kv = Tensor(name=f'past_key_value_{i}',
                            dtype=kv_dtype,
                            shape=[-1, 2, num_kv_heads, -1, head_size],
                            dim_range=kv_dim_range)
                past_key_value.append(kv)

                kv_cache_block_pointers_list.append(None)
        else
:
            if
 enable_two_optimization_profiles:
                max_blocks_per_seq_range = [
                    [
                        math.ceil(kv_cache_range[0][0] / tokens_per_block),
                        math.ceil(kv_cache_range[0][1] / tokens_per_block),
                        math.ceil(kv_cache_range[0][2] / tokens_per_block)
                    ],
                    [
                        math.ceil(kv_cache_range[1][0] / tokens_per_block),
                        math.ceil(kv_cache_range[1][1] / tokens_per_block),
                        math.ceil(kv_cache_range[1][2] / tokens_per_block)
                    ]
                ]
                blocks_range = [
                    [
                        bb_range[0][0] * max_blocks_per_seq_range[0][0],
                        bb_range[0][1] * max_blocks_per_seq_range[0][1],
                        bb_range[0][2] * max_blocks_per_seq_range[0][2]
                    ],
                    [
                        bb_range[1][0] * max_blocks_per_seq_range[1][0],
                        bb_range[1][1] * max_blocks_per_seq_range[1][1],
                        bb_range[1][2] * max_blocks_per_seq_range[1][2]
                    ],
                ]

                max_blocks_per_seq_range = [[
                    x for x in max_blocks_per_seq_range[0]
                ], [x for x in max_blocks_per_seq_range[1]]]
            else
:
                max_blocks_per_seq_range = [[
                    math.ceil(kv_cache_range[0][0] / tokens_per_block),
                    math.ceil(kv_cache_range[0][1] / tokens_per_block),
                    math.ceil(kv_cache_range[0][2] / tokens_per_block)
                ]]
                blocks_range = [[
                    bb_range[0][0] * max_blocks_per_seq_range[0][0],
                    bb_range[0][1] * max_blocks_per_seq_range[0][1],
                    bb_range[0][2] * max_blocks_per_seq_range[0][2]
                ]]

                max_blocks_per_seq_range = [[
                    x for x in max_blocks_per_seq_range[0]
                ]]

            for
 i in layers_range:
                kv_cache_block_pointers = Tensor(
                    name=f'kv_cache_block_pointers_{i}',
                    dtype=trt.int64, # 
                    shape=[-1, 2, -1],
                    dim_range=OrderedDict([
                        ('batch_size_beam_width', bb_range),
                        ('kv',
                         [2, 2] if enable_two_optimization_profiles else [2]),
                        ('max_blocks_per_seq', max_blocks_per_seq_range),
                    ]))
                kv_cache_block_pointers_list.append(kv_cache_block_pointers)
                past_key_value.append(None)

        sequence_length = None
        context_lengths = None
        host_context_lengths = None
        host_past_key_value_lengths = None
        attention_mask = None
        cache_indirection = None
        host_request_types = None

        if
 use_gpt_attention_plugin:
            sequence_length = Tensor(
                name='sequence_length',
                dtype=trt.int32,
                shape=[-1],
                dim_range=OrderedDict([('batch_size_beam_width', bb_range)]),
            )

            host_request_types = Tensor(
                name='host_request_types',
                dtype=trt.int32,
                shape=[-1],
                dim_range=OrderedDict([('batch_size_beam_width', bb_range)]),
            )
            host_past_key_value_lengths = Tensor(
                name='host_past_key_value_lengths',
                dtype=trt.int32,
                shape=[-1],
                dim_range=OrderedDict([('batch_size_beam_width', bb_range)]),
            )
            context_lengths = Tensor(
                name='context_lengths',
                dtype=trt.int32,
                shape=[-1],
                dim_range=OrderedDict([('batch_size_beam_width', bb_range)]),
            )
        else
:
            attention_mask = Tensor(
                name='attention_mask',
                dtype=trt.int32,
                shape=[-1, -1],
                dim_range=OrderedDict([
                    ('batch_size_beam_width', bb_range),
                    ('mask_len', mask_len_range),
                ]),
            )

        if
 use_gpt_attention_plugin and remove_input_padding:
            host_context_lengths = Tensor(
                name='host_context_lengths',
                dtype=trt.int32,
                shape=[-1],
                dim_range=OrderedDict([('batch_size_beam_width', bb_range)]),
            )

        last_token_ids = None
        if
 mapping.is_last_pp_rank() and not gather_all_token_logits:
            # 只获取最后一个token的logits时需要此输入

            last_token_ids = Tensor(
                name='last_token_ids',
                dtype=trt.int32,
                shape=[-1],
                dim_range=OrderedDict([
                    ('batch_size_last_token_ids', bb_range),
                ]),
            )

        # beamsearch backtrace map

        cache_indirection = Tensor(
            name='cache_indirection',
            dtype=trt.int32,
            shape=[-1, -1, -1],
            dim_range=OrderedDict([
                ('batch_size_cache', bs_range),
                ('beam_width', beam_width_range),
                ('max_seq_len', max_len_range),
            ]),
        )

        all_reduce_workspace = None
        if
 use_custom_all_reduce and mapping.tp_size > 1:
            # 3 (= buffer + signals_in + signals_out)

            workspace_size = 3 * mapping.tp_size
            all_reduce_workspace = Tensor(
                name='all_reduce_workspace',
                dtype=trt.int64,
                shape=[workspace_size],
                dim_range=OrderedDict([
                    ('all_reduce_size', [workspace_size, workspace_size]
                     if
 enable_two_optimization_profiles else [workspace_size])
                ]))

        return
 {
            # input

            ## pp_rank == 0

            'input_ids'
: input_ids,
            'position_ids'
: position_ids,
            ## pp_rank > 0

            'hidden_states_input'
: hidden_states,

              # !use_gpt_attention_plugin

            'attention_mask'
: attention_mask,

                 # !paged_kv_cache

            'past_key_value'
: past_key_value,
                   # paged_kv_cache

            'kv_cache_block_pointers_list'
: kv_cache_block_pointers_list,

            # !use_gpt_attention_plugin

            'last_token_ids'
: last_token_ids,

            # use_gpt_attention_plugin

            'sequence_length'
: sequence_length,
            'host_request_types'
: host_request_types,
            'host_past_key_value_lengths'
: host_past_key_value_lengths,
            'context_lengths'
: context_lengths,

             # use_gpt_attention_plugin and remove_input_padding

            'host_context_lengths'
: host_context_lengths,

            'cache_indirection'
: cache_indirection,

            # use_custom_all_reduce and mapping.tp_size > 1

            'all_reduce_workspace'
: all_reduce_workspace,
        }

LLaMAForCausalLM

class LLaMAForCausalLM(LLaMAModel, GenerationMixin):

    def
 __init__(self,
                 num_layers,
                 num_heads,
                 num_kv_heads,
                 hidden_size,
                 vocab_size,
                 hidden_act,
                 max_position_embeddings,
                 dtype,
                 logits_dtype="float32",
                 mlp_hidden_size=None,
                 position_embedding_type=PositionEmbeddingType.rope_gpt_neox,
                 rotary_base=10000.0,
                 rotary_scaling=None,
                 mapping=Mapping(),
                 quant_mode=QuantMode(0),
                 use_parallel_embedding=False,
                 embedding_sharding_dim=0,
                 rms_norm_eps=1e-06
):

        if
 isinstance(dtype, str):
            self
.dtype = str_dtype_to_trt(dtype)
        else
:
            assert
 isinstance(dtype, trt.DataType)
            self
.dtype = dtype

        if
 isinstance(logits_dtype, str):
            self
.logits_dtype = str_dtype_to_trt(logits_dtype)
        else
:
            assert
 isinstance(logits_dtype, trt.DataType)
            self
.logits_dtype = logits_dtype

        self
.num_layers = num_layers
        self
.num_heads = num_heads
        if
 num_kv_heads is None or num_kv_heads <= 0:
            num_kv_heads = num_heads
        self
.num_kv_heads = num_kv_heads
        self
.hidden_size = hidden_size
        self
.vocab_size = vocab_size
        self
.tp_size = mapping.tp_size

        self
.kv_dtype = self.dtype
        if
 quant_mode.has_int8_kv_cache():
            self
.kv_dtype = str_dtype_to_trt('int8')
        elif
 quant_mode.has_fp8_kv_cache():
            self
.kv_dtype = str_dtype_to_trt('fp8')

        self
.quant_mode = quant_mode
        self
.use_parallel_embedding = use_parallel_embedding
        self
.embedding_sharding_dim = embedding_sharding_dim

        super
().__init__(num_layers, num_heads, num_kv_heads, hidden_size,
                         vocab_size, hidden_act, max_position_embeddings, dtype,
                         mlp_hidden_size, position_embedding_type, rotary_base,
                         rotary_scaling, mapping, quant_mode,
                         use_parallel_embedding, embedding_sharding_dim,
                         rms_norm_eps)

        vocab_size_padded = pad_vocab_size(vocab_size, mapping.tp_size)
        if
 self.mapping.is_last_pp_rank():
            self
.lm_head = ColumnLinear(hidden_size,
                                        vocab_size_padded,
                                        bias=False,
                                        dtype=dtype,
                                        tp_group=mapping.tp_group,
                                        tp_size=mapping.tp_size,
                                        gather_output=True)
    def forward(self,
                input_ids,
                position_ids=None,
                use_cache=False,
                last_token_ids=None,
                attention_mask=None,
                kv_cache_params=None,
                attention_params=None,
                hidden_states=None,
                all_reduce_workspace=None
):
        hidden_states = super().forward(input_ids, position_ids, use_cache,
                                        attention_mask, kv_cache_params,
                                        attention_params, hidden_states,
                                        all_reduce_workspace)

        if
 use_cache:
            hidden_states, presents = hidden_states

        if
 self.mapping.is_last_pp_rank():
            # !gather_last_token_logits 输出所有token的logits

            hidden_states = gather_last_token_logits(
                hidden_states, last_token_ids,
                default_net().plugin_config.remove_input_padding)

            # [batch_size, hidden_size] -> [batch_size, vocab_size]

            lm_logits = self.lm_head(hidden_states)
            # 标记NetWork的输出

            lm_logits.mark_output('logits', self.logits_dtype)
        else
:
            hidden_states.mark_output('hidden_states_output', self.dtype)

        if
 use_cache and default_net().plugin_config.paged_kv_cache == False:
            for
 i, present in zip(
                    self
.get_transformer_layers(self.mapping, self.num_layers),
                    presents):
                present.mark_output(f'present_key_value_{i}', self.kv_dtype)
            if
 self.mapping.is_last_pp_rank():
                # 只有!paged_kv_cache才会返回 past_kv_cache,即中间层的 kv cache。

                return
 (lm_logits, presents)
            return
 (hidden_states, presents)
        else
:
            if
 self.mapping.is_last_pp_rank():
                # paged_kv_cache时,不会返回 kv cache。

                return
 lm_logits
            return
 hidden_states
    def prepare_inputs(self,
                       max_batch_size,
                       max_input_len,
                       max_new_tokens,
                       use_cache,
                       max_beam_width,
                       max_num_tokens: int = None
):
        '''@brief: Prepare inputs Tensors for the model, the given sizes are used to determine the
            ranges of the dimensions of when using TRT dynamic shapes.

            @return: a list contains values which can be fed into the self.forward()
        '''


        # Prepare inputs

        head_size = self.hidden_size // self.num_heads
        remove_input_padding = default_net().plugin_config.remove_input_padding
        use_gpt_attention_plugin = default_net(
        ).plugin_config.gpt_attention_plugin
        use_gemm_plugin = default_net().plugin_config.gemm_plugin
        paged_kv_cache = default_net().plugin_config.paged_kv_cache
        tokens_per_block = default_net().plugin_config.tokens_per_block
        use_custom_all_reduce = default_net(
        ).plugin_config.use_custom_all_reduce

        model_inputs = self.prepare_basic_inputs(
            max_batch_size,
            max_beam_width,
            max_input_len,
            max_new_tokens,
            self
.num_kv_heads,
            head_size,
            self
.num_layers,
            self
.kv_dtype,
            remove_input_padding=remove_input_padding,
            use_gpt_attention_plugin=use_gpt_attention_plugin,
            use_gemm_plugin=use_gemm_plugin,
            use_custom_all_reduce=use_custom_all_reduce,
            paged_kv_cache=paged_kv_cache,
            tokens_per_block=tokens_per_block,
            dtype=self.dtype,
            num_heads=self.num_heads,
            mapping=self.mapping,
            max_num_tokens=max_num_tokens)

        return
 (model_inputs['input_ids'], model_inputs['position_ids'], True,
                model_inputs['last_token_ids'], model_inputs['attention_mask'],
                KeyValueCacheParams(
                    past_key_value=model_inputs['past_key_value'],
                    host_past_key_value_lengths=model_inputs[
                        'host_past_key_value_lengths'
],
                    kv_cache_block_pointers=model_inputs[
                        'kv_cache_block_pointers_list'
],
                    cache_indirection=model_inputs['cache_indirection'],
                ),
                AttentionParams(
                    sequence_length=model_inputs['sequence_length'],
                    context_lengths=model_inputs['context_lengths'],
                    host_context_lengths=model_inputs['host_context_lengths'],
                    max_context_length=max_input_len,
                    host_request_types=model_inputs['host_request_types']),
                model_inputs['hidden_states_input'],
                model_inputs['all_reduce_workspace'])

参考文献

  • • https://github.com/NVIDIA/TensorRT-LLM/blob/v0.5.0/tensorrt_llm/models/llama/model.py
点个「赞」+「在看」❤️
让我们知道这份文字有温暖到你,也是我们持续创作的最大动力!
推荐
Lock-Free 队列实现原理
Share Memory 的 Bank Conflict
告别高成本!TensorRT-LLM实战:如何将LLM推理速度提升数倍
使用LoRA对LLM进行微调的实用技巧
强化学习小白必看:PTX Loss 到底是个啥?
GPT-5 Prompt Migration and Improvement Using the New Optimizer
Task 异步流 coroutine 实现
C++ corotine 介绍
搭建 VSCode 离线开发环境
nlohmann/json 库简介
Intro to C++ Coroutines: Concept
Hugging Face BPE Tokenizer 的资源文件
移动语义 std::move 和完美转发 std::forward
ACEBench: Who Wins the Match Point in Tool Usage?
什么是 GN
RULER: Relative Universal LLM-Elicited Rewards
SFT和RFT的区别
CosyVoice 3: 面向真实场景的大规模零样本语音生成模型
CosyVoice 3: Towards In-the-wild Speech Generation
语音合成(TTS)中文自然度:问题、成因、解决方案
上下文工程如何实现
上下文工程(Context Engineering)
新手必看!LangGraph 101:手把手教你搭一个深度研究 Agent
LangGraph 简介
SFT 泛化新解读:强化学习 + 奖励修正,一文读懂
程序员狂喜!Self-Instruct 框架全解析:无限生成高质量指令集,从此告别标注噩梦!
Evol-Instruct 竟能精准生成领域专属数据?实操技巧速看!
指令微调数据-少即是多
LLM generate 参数怎么用?
语音合成(TTS)跳跃与重复问题的解析:成因、机制及解决方案
大模型训练新思路:GEPA 靠 “反思” 赢过 RL,看完秒懂
F5-TTS:用 Flow Matching 玩转语音,流畅度和真实感都 “拉满” 了
E2 TTS:令人尴尬地简单、完全非自回归、零样本的语音合成技术
Voicebox:大规模文本引导的多语言通用语音生成技术
为什么都在聊 Kimi K2?Open Agentic Intelligence 藏着哪些新惊喜
Step-Audio-AQAA 端到端音频模型
DPO、PPO、GRPO的原理,区别与联系
OPENCSG 中文语料库:一系列高质量的中文数据集,用于语言模型训练
什么是 Classifier-Free Guidance?
Conditional Flow Matching : 连续标准流 Continuous Normalizing Flow
CFM 与 OT-CFM:条件流匹配与最优传输的碰撞
DPO损失实现
Conditional Flow Matching : 常微分方程ODE、欧拉方法和Neural ODE
当 Normalizing flow 遇上语音生成:AI 说话变 “真人” 的秘密在这里!
深度剖析:Kimi - Audio 中 BigVGAN 的神奇作用
为什么说分布变换是 Normalizing flow 的「灵魂操作」?
MATCHA-TTS 来了!条件流匹配让文本转语音效率飙升
从知识增长的角度提升RAG上下文的质量
MiniMax-Speech,零样本语音合成新突破,32 种语言轻松拿捏!
手把手教你创建 evol-instruct 数据集!附完整流程~
社交类聊天的 Query 分析与应答策略
SFT 中指令选择和响应选择哪个更重要?
角色扮演大模型技术分享2-超拟人模型的困境
最新!SpeechLLM 综述:架构、能力、挑战与未来全揭秘
如何低成本生成高质量指令微调数据?
从数量到质量:通过自引导数据选择来提升语言模型性能以实现指令调优
Kimi-Audio:开源音频基础模型全面解析
Kimi-Audio 的 TTS 效果如何?
Qwen 的训练数据是怎么做的?
GeForce RTX 3090, 4090, A10, A40, A100, A800, L20, L40 显卡性能对比
如何低成本生成高质量指令微调数据?
掌握RAG:投入生产前要评估的8个场景
掌握RAG:如何评估RAG的LLM
掌握RAG:如何在部署后观察您的RAG
掌握RAG:如何选择嵌入模型
基础模型中的新范式:为什么o1是不同的,以及它将如何改变LLM应用
Semantic token和连续特征在SLLM下的对比
从数量到质量:通过自引导数据选择来提升语言模型性能以实现指令调优
RLHF及其变体:进展和实际工程见解
Freeze-Omni: 低延迟语音对话模型
Fully Sharded Data Parallelism (FSDP)
什么是置信度?置信度模型怎么做?
晦涩难懂的 Flow matching!图形化理解
中文指令微调数据,质量就是一切!
基于 LLM 的文本泛化
CosyVoice 2:基于大型语言模型的可扩展流式语音合成技术
Mini-Omni2: with Vision, Speech and Duplex Capabilities
FSQ的原理与VQ-VAE的区别和联系
大模型并行训练的一些知识——极简版
亲测有效!如何用 Address Sanitizer 精准定位内存漏洞?附保姆级操作指南
要用 AI 裁员 50% 的千亿独角兽,公开认错,重启招聘!
single codebook和dual codebook在LLM中向量量化上有什么区别?
一些文档去重算法
最佳的指令数据应当是什么样的?
Prefill-Decode分离
亲测有效!如何用 Address Sanitizer 精准定位内存漏洞?附保姆级操作指南
Simhash-文档去重算法简介
RLHF 入门,高手勿进!
最佳的指令数据应当是什么样的?
CosyVoice:一种基于监督式语义标记的可扩展多语言 Zero-Shot 语音合成器
Model Context Protocol (MCP)
MCP(模型上下文协议)是什么以及它是如何运作的
压力测试LLMs——大海捞针实现
基本 文件 流程 错误 SQL 调试
  1. 请求信息 : 2026-05-09 05:10:55 HTTP/1.1 GET : https://www.yeyulingfeng.com/a/590840.html
  2. 运行时间 : 0.104447s [ 吞吐率:9.57req/s ] 内存消耗:4,970.10kb 文件加载:145
  3. 缓存信息 : 0 reads,0 writes
  4. 会话信息 : SESSION_ID=2e53cacb514906e3da71030ae93d9b35
  1. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/public/index.php ( 0.79 KB )
  2. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/autoload.php ( 0.17 KB )
  3. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/composer/autoload_real.php ( 2.49 KB )
  4. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/composer/platform_check.php ( 0.90 KB )
  5. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/composer/ClassLoader.php ( 14.03 KB )
  6. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/composer/autoload_static.php ( 6.05 KB )
  7. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-helper/src/helper.php ( 8.34 KB )
  8. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-validate/src/helper.php ( 2.19 KB )
  9. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/ralouphie/getallheaders/src/getallheaders.php ( 1.60 KB )
  10. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/helper.php ( 1.47 KB )
  11. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/stubs/load_stubs.php ( 0.16 KB )
  12. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Exception.php ( 1.69 KB )
  13. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-container/src/Facade.php ( 2.71 KB )
  14. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/symfony/deprecation-contracts/function.php ( 0.99 KB )
  15. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/symfony/polyfill-mbstring/bootstrap.php ( 8.26 KB )
  16. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/symfony/polyfill-mbstring/bootstrap80.php ( 9.78 KB )
  17. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/symfony/var-dumper/Resources/functions/dump.php ( 1.49 KB )
  18. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-dumper/src/helper.php ( 0.18 KB )
  19. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/symfony/var-dumper/VarDumper.php ( 4.30 KB )
  20. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/guzzlehttp/guzzle/src/functions_include.php ( 0.16 KB )
  21. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/guzzlehttp/guzzle/src/functions.php ( 5.54 KB )
  22. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/App.php ( 15.30 KB )
  23. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-container/src/Container.php ( 15.76 KB )
  24. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/psr/container/src/ContainerInterface.php ( 1.02 KB )
  25. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/app/provider.php ( 0.19 KB )
  26. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Http.php ( 6.04 KB )
  27. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-helper/src/helper/Str.php ( 7.29 KB )
  28. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Env.php ( 4.68 KB )
  29. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/app/common.php ( 0.03 KB )
  30. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/helper.php ( 18.78 KB )
  31. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Config.php ( 5.54 KB )
  32. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/config/alipay.php ( 3.59 KB )
  33. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/facade/Env.php ( 1.67 KB )
  34. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/config/app.php ( 0.95 KB )
  35. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/config/cache.php ( 0.78 KB )
  36. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/config/console.php ( 0.23 KB )
  37. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/config/cookie.php ( 0.56 KB )
  38. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/config/database.php ( 2.48 KB )
  39. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/config/filesystem.php ( 0.61 KB )
  40. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/config/lang.php ( 0.91 KB )
  41. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/config/log.php ( 1.35 KB )
  42. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/config/middleware.php ( 0.19 KB )
  43. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/config/route.php ( 1.89 KB )
  44. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/config/session.php ( 0.57 KB )
  45. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/config/trace.php ( 0.34 KB )
  46. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/config/view.php ( 0.82 KB )
  47. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/app/event.php ( 0.25 KB )
  48. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Event.php ( 7.67 KB )
  49. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/app/service.php ( 0.13 KB )
  50. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/app/AppService.php ( 0.26 KB )
  51. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Service.php ( 1.64 KB )
  52. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Lang.php ( 7.35 KB )
  53. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/lang/zh-cn.php ( 13.70 KB )
  54. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/initializer/Error.php ( 3.31 KB )
  55. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/initializer/RegisterService.php ( 1.33 KB )
  56. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/services.php ( 0.14 KB )
  57. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/service/PaginatorService.php ( 1.52 KB )
  58. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/service/ValidateService.php ( 0.99 KB )
  59. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/service/ModelService.php ( 2.04 KB )
  60. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-trace/src/Service.php ( 0.77 KB )
  61. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Middleware.php ( 6.72 KB )
  62. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/initializer/BootService.php ( 0.77 KB )
  63. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/Paginator.php ( 11.86 KB )
  64. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-validate/src/Validate.php ( 63.20 KB )
  65. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/Model.php ( 23.55 KB )
  66. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/model/concern/Attribute.php ( 21.05 KB )
  67. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/model/concern/AutoWriteData.php ( 4.21 KB )
  68. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/model/concern/Conversion.php ( 6.44 KB )
  69. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/model/concern/DbConnect.php ( 5.16 KB )
  70. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/model/concern/ModelEvent.php ( 2.33 KB )
  71. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/model/concern/RelationShip.php ( 28.29 KB )
  72. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-helper/src/contract/Arrayable.php ( 0.09 KB )
  73. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-helper/src/contract/Jsonable.php ( 0.13 KB )
  74. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/model/contract/Modelable.php ( 0.09 KB )
  75. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Db.php ( 2.88 KB )
  76. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/DbManager.php ( 8.52 KB )
  77. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Log.php ( 6.28 KB )
  78. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Manager.php ( 3.92 KB )
  79. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/psr/log/src/LoggerTrait.php ( 2.69 KB )
  80. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/psr/log/src/LoggerInterface.php ( 2.71 KB )
  81. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Cache.php ( 4.92 KB )
  82. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/psr/simple-cache/src/CacheInterface.php ( 4.71 KB )
  83. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-helper/src/helper/Arr.php ( 16.63 KB )
  84. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/cache/driver/File.php ( 7.84 KB )
  85. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/cache/Driver.php ( 9.03 KB )
  86. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/contract/CacheHandlerInterface.php ( 1.99 KB )
  87. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/app/Request.php ( 0.09 KB )
  88. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Request.php ( 55.78 KB )
  89. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/app/middleware.php ( 0.25 KB )
  90. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Pipeline.php ( 2.61 KB )
  91. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-trace/src/TraceDebug.php ( 3.40 KB )
  92. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/middleware/SessionInit.php ( 1.94 KB )
  93. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Session.php ( 1.80 KB )
  94. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/session/driver/File.php ( 6.27 KB )
  95. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/contract/SessionHandlerInterface.php ( 0.87 KB )
  96. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/session/Store.php ( 7.12 KB )
  97. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Route.php ( 23.73 KB )
  98. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/route/RuleName.php ( 5.75 KB )
  99. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/route/Domain.php ( 2.53 KB )
  100. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/route/RuleGroup.php ( 22.43 KB )
  101. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/route/Rule.php ( 26.95 KB )
  102. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/route/RuleItem.php ( 9.78 KB )
  103. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/route/app.php ( 3.94 KB )
  104. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/facade/Route.php ( 4.70 KB )
  105. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/route/dispatch/Controller.php ( 4.74 KB )
  106. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/route/Dispatch.php ( 10.44 KB )
  107. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/app/controller/Index.php ( 9.87 KB )
  108. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/app/BaseController.php ( 2.05 KB )
  109. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/facade/Db.php ( 0.93 KB )
  110. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/connector/Mysql.php ( 5.44 KB )
  111. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/PDOConnection.php ( 52.47 KB )
  112. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/Connection.php ( 8.39 KB )
  113. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/ConnectionInterface.php ( 4.57 KB )
  114. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/builder/Mysql.php ( 16.58 KB )
  115. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/Builder.php ( 24.06 KB )
  116. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/BaseBuilder.php ( 27.50 KB )
  117. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/Query.php ( 15.71 KB )
  118. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/BaseQuery.php ( 45.13 KB )
  119. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/concern/TimeFieldQuery.php ( 7.43 KB )
  120. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/concern/AggregateQuery.php ( 3.26 KB )
  121. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/concern/ModelRelationQuery.php ( 20.07 KB )
  122. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/concern/ParamsBind.php ( 3.66 KB )
  123. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/concern/ResultOperation.php ( 7.01 KB )
  124. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/concern/WhereQuery.php ( 19.37 KB )
  125. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/concern/JoinAndViewQuery.php ( 7.11 KB )
  126. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/concern/TableFieldInfo.php ( 2.63 KB )
  127. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-orm/src/db/concern/Transaction.php ( 2.77 KB )
  128. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/log/driver/File.php ( 5.96 KB )
  129. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/contract/LogHandlerInterface.php ( 0.86 KB )
  130. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/log/Channel.php ( 3.89 KB )
  131. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/event/LogRecord.php ( 1.02 KB )
  132. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-helper/src/Collection.php ( 16.47 KB )
  133. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/facade/View.php ( 1.70 KB )
  134. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/View.php ( 4.39 KB )
  135. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/app/controller/Es.php ( 3.30 KB )
  136. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Response.php ( 8.81 KB )
  137. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/response/View.php ( 3.29 KB )
  138. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/Cookie.php ( 6.06 KB )
  139. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-view/src/Think.php ( 8.38 KB )
  140. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/framework/src/think/contract/TemplateHandlerInterface.php ( 1.60 KB )
  141. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-template/src/Template.php ( 46.61 KB )
  142. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-template/src/template/driver/File.php ( 2.41 KB )
  143. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-template/src/template/contract/DriverInterface.php ( 0.86 KB )
  144. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/runtime/temp/c935550e3e8a3a4c27dd94e439343fdf.php ( 31.50 KB )
  145. /yingpanguazai/ssd/ssd1/www/wwww.yeyulingfeng.com/vendor/topthink/think-trace/src/Html.php ( 4.42 KB )
  1. CONNECT:[ UseTime:0.000537s ] mysql:host=127.0.0.1;port=3306;dbname=wenku;charset=utf8mb4
  2. SHOW FULL COLUMNS FROM `fenlei` [ RunTime:0.000745s ]
  3. SELECT * FROM `fenlei` WHERE `fid` = 0 [ RunTime:0.000289s ]
  4. SELECT * FROM `fenlei` WHERE `fid` = 63 [ RunTime:0.000279s ]
  5. SHOW FULL COLUMNS FROM `set` [ RunTime:0.000454s ]
  6. SELECT * FROM `set` [ RunTime:0.000200s ]
  7. SHOW FULL COLUMNS FROM `article` [ RunTime:0.000514s ]
  8. SELECT * FROM `article` WHERE `id` = 590840 LIMIT 1 [ RunTime:0.000592s ]
  9. UPDATE `article` SET `lasttime` = 1778274655 WHERE `id` = 590840 [ RunTime:0.005695s ]
  10. SELECT * FROM `fenlei` WHERE `id` = 64 LIMIT 1 [ RunTime:0.000341s ]
  11. SELECT * FROM `article` WHERE `id` < 590840 ORDER BY `id` DESC LIMIT 1 [ RunTime:0.000498s ]
  12. SELECT * FROM `article` WHERE `id` > 590840 ORDER BY `id` ASC LIMIT 1 [ RunTime:0.002648s ]
  13. SELECT * FROM `article` WHERE `id` < 590840 ORDER BY `id` DESC LIMIT 10 [ RunTime:0.000781s ]
  14. SELECT * FROM `article` WHERE `id` < 590840 ORDER BY `id` DESC LIMIT 10,10 [ RunTime:0.000683s ]
  15. SELECT * FROM `article` WHERE `id` < 590840 ORDER BY `id` DESC LIMIT 20,10 [ RunTime:0.001102s ]
0.108544s