30 Matrix3D<float> prepare_decoder_attention_mask(
int length,
int past_length);
34 int voc_size, embed_dim, padding_idx, hidden_dim, num_heads;
35 std::vector<Fp32OPTDecoderLayer> layers;
37 std::string profile_name =
"Fp32OPTDecoder";
40 float* attention_mask_buf;
41 float* pos_embeds_buf;
42 float* last_hidden_states_buf;
43 float* hidden_states_buf;