1#include "Int8OPTAttention.h" 
   12        hidden_states = hidden_states_;
 
   13        attentions = attentions_;
 
   14        past_key_value = past_key_value_;
 
 
   21    bool has_past_key_value = 
false;
 
   24        hidden_states = hidden_states_;
 
   25        attention_mask = attention_mask_;
 
   26        has_past_key_value = 
false;
 
   31        hidden_states = hidden_states_;
 
   32        attention_mask = attention_mask_;
 
   34        past_value = past_value_;
 
   35        has_past_key_value = 
true;
 
 
   48    int embed_dim, num_attention_heads, hidden_dim, layer_idx;
 
   49    LayerNormQ self_attn_layer_norm, final_layer_norm;  
 
   53    std::string profile_name = 
"Int8OPTDecoderLayer";
 
 
Definition BMM_S8T_S8N_F32T.h:7
 
Definition BMM_S8T_S8N_S8T.h:7
 
Definition Int8OPTAttention.h:31
 
Definition Int8OPTDecoderLayer.h:39
 
Definition LayerNormQ.h:8
 
Definition W8A8B8O8LinearReLU.h:10
 
Definition W8A8B8O8Linear.h:10
 
Definition W8A8BFP32OFP32Linear.h:9
 
Definition Int8OPTDecoderLayer.h:5