30 Matrix3D<float> prepare_decoder_attention_mask(
int length,
int past_length);
34 int voc_size, embed_dim, padding_idx, hidden_dim, num_heads;
35 std::vector<Int4OPTDecoderLayer> layers;
37 std::string profile_name =
"Int4OPTDecoder";
40 float* attention_mask_buf;
41 float* pos_embeds_buf;
42 float* last_hidden_states_buf;
43 float* hidden_states_buf;