18#include <unordered_map>
21#include "Fp32OPTForCausalLM.h"
22#include "Fp32llamaForCausalLM.h"
23#include "Fp32GPTBigCodeForCausalLM.h"
24#include "Int4OPTForCausalLM.h"
25#include "Int4llamaForCausalLM.h"
26#include "Int4GPTBigCodeForCausalLM.h"
27#include "Fp32CLIPVisionTransformer.h"
28#include "OPTForCausalLM.h"
29#include "OPTTokenizer.h"
34static std::mt19937 OPT_rng;
50 int32_t n_threads = 1;
51 int32_t n_predict = 128;
54 int32_t n_batch = 512;
56 int32_t n_vocab = 50272;
59 std::unordered_map<int, float> logit_bias;
63 float typical_p = 1.00f;
65 float repeat_penalty = 1.10f;
66 int32_t repeat_last_n = 64;
67 float frequency_penalty = 0.00f;
68 float presence_penalty = 0.00f;
70 float mirostat_tau = 5.00f;
71 float mirostat_eta = 0.10f;
74void sample_repetition_penalty(
OPT_token_data_array* candidates,
const int* last_tokens,
size_t last_tokens_size,
77void sample_frequency_and_presence_penalties(
OPT_token_data_array* candidates,
const int* last_tokens_p,
78 size_t last_tokens_size,
float alpha_frequency,
float alpha_presence);
90int sample_token_mirostat(
const int n_vocab,
OPT_token_data_array* candidates,
float tau,
float eta,
int m,
float* mu);
100std::vector<int> OPTGenerate(
void* model,
int model_type, std::vector<int> input_ids,
102 bool interactive =
false,
bool voicechat =
false);
104enum { OPT_INT8, LLaMA_FP32, LLaMA_INT4, OPT_FP32, OPT_INT4, StarCoder_FP32, StarCoder_INT4, LLaVA_FP32, LLaVA_INT4, VILA_FP32, VILA_INT4};
105std::string LLaMAGenerate(std::string param_path,
void* model,
int model_type, std::string text,
const struct opt_params generation_config,
106 std::string voc_path,
bool interactive,
bool voicechat);
108std::string GPTBigCodeGenerate(std::string param_path,
void *model_ptr,
int model_type, std::string text,
const struct opt_params generation_config,
109 std::string voc_path,
bool interactive);
111std::string LLaVAGenerate(std::string llama_param_path,
void* llama_model_ptr, std::string clip_param_path,
void* clip_model_ptr,
int model_type,
112 std::string text, std::string img_path,
const struct opt_params generation_config, std::string voc_path,
bool interactive,
113 bool voicechat,
bool is_vila);
115std::string MistralGenerate(std::string param_path,
void* model,
int model_type, std::string text,
const struct opt_params generation_config,
116 std::string voc_path,
bool interactive,
bool voicechat);
118std::string LLaMA3Generate(std::string param_path,
void* model,
int model_type, std::string text,
const struct opt_params generation_config,
119 std::string voc_path,
bool interactive,
bool voicechat);
Definition OPTTokenizer.h:35