1#include "Int8OPTAttention.h"
12 hidden_states = hidden_states_;
13 attentions = attentions_;
14 past_key_value = past_key_value_;
21 bool has_past_key_value =
false;
24 hidden_states = hidden_states_;
25 attention_mask = attention_mask_;
26 has_past_key_value =
false;
31 hidden_states = hidden_states_;
32 attention_mask = attention_mask_;
34 past_value = past_value_;
35 has_past_key_value =
true;
48 int embed_dim, num_attention_heads, hidden_dim, layer_idx;
49 LayerNormQ self_attn_layer_norm, final_layer_norm;
53 std::string profile_name =
"Int8OPTDecoderLayer";
Definition BMM_S8T_S8N_F32T.h:7
Definition BMM_S8T_S8N_S8T.h:7
Definition Int8OPTAttention.h:31
Definition Int8OPTDecoderLayer.h:39
Definition LayerNormQ.h:8
Definition W8A8B8O8LinearReLU.h:10
Definition W8A8B8O8Linear.h:10
Definition W8A8BFP32OFP32Linear.h:9
Definition Int8OPTDecoderLayer.h:5