Spaces:
Build error
Build error
File size: 2,357 Bytes
3e5595b 46c2bfc 6ba25f7 9938c27 6ba25f7 3e5595b 81bf9b4 edc20ac 3e5595b 9938c27 3e5595b b439a8f 3e5595b 6ba25f7 9938c27 6ba25f7 3e5595b 9938c27 f57d7c6 3e5595b f57d7c6 46c2bfc 3e5595b edc20ac 3e5595b dc53b3a 6ba25f7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
#pragma once
const int stop_token_max = 16;
const int ban_token_max = 16;
const int tensor_split_max = 16;
// match kobold's sampler list and order
enum samplers
{
KCPP_SAMPLER_TOP_K=0,
KCPP_SAMPLER_TOP_A=1,
KCPP_SAMPLER_TOP_P=2,
KCPP_SAMPLER_TFS=3,
KCPP_SAMPLER_TYP=4,
KCPP_SAMPLER_TEMP=5,
KCPP_SAMPLER_REP_PEN=6,
KCPP_SAMPLER_MAX
};
enum stop_reason
{
INVALID=-1,
OUT_OF_TOKENS=0,
EOS_TOKEN=1,
CUSTOM_STOPPER=2,
};
struct load_model_inputs
{
const int threads;
const int blasthreads;
const int max_context_length;
const int batch_size;
const bool f16_kv;
const bool low_vram;
const bool use_mmq;
const char * executable_path;
const char * model_filename;
const char * lora_filename;
const char * lora_base;
const bool use_mmap;
const bool use_mlock;
const bool use_smartcontext;
const int clblast_info = 0;
const int cublas_info = 0;
const int blasbatchsize = 512;
const int debugmode = 0;
const int forceversion = 0;
const int gpulayers = 0;
const float rope_freq_scale = 1.0f;
const float rope_freq_base = 10000.0f;
const char * banned_tokens[ban_token_max];
const float tensor_split[tensor_split_max];
};
struct generation_inputs
{
const int seed;
const char *prompt;
const int max_context_length;
const int max_length;
const float temperature;
const int top_k;
const float top_a = 0.0f;
const float top_p;
const float typical_p;
const float tfs;
const float rep_pen;
const int rep_pen_range;
const int mirostat = 0;
const float mirostat_eta;
const float mirostat_tau;
const samplers sampler_order[KCPP_SAMPLER_MAX];
const int sampler_len;
const bool unban_tokens_rt;
const char * stop_sequence[stop_token_max];
const bool stream_sse;
const char * grammar;
const bool grammar_retain_state;
};
struct generation_outputs
{
int status = -1;
char text[24576]; //24kb should be enough for any response
};
extern std::string executable_path;
extern std::string lora_filename;
extern std::string lora_base;
extern std::vector<std::string> generated_tokens;
extern bool generation_finished;
extern float last_eval_time;
extern float last_process_time;
extern int last_token_count;
extern stop_reason last_stop_reason;
|