[tokens] ; Enter one/all of your API key here. ; E.g., OPENAI_API_KEY = sk-xxxxxxx OPENAI_API_KEY = sk-proj-2JwvyIn7WoKlkbjPOYVWT3BlbkFJnGAk65YAzvPH6cEVQXmr ANTHROPIC_API_KEY = xxxxx TOGETHER_API_KEY = xxxxx ; if you use Meta-Llama models, you may need Huggingface token to access. HUGGINGFACE_TOKEN = xxxxx VERSION = 1.0.1 [directory] ; Directory for source files. DOCS_DIR = ./data ; Directory to store embeddings and Langchain documents. DB_DIR = ./database_store LOCAL_MODEL_DIR = ./models ; The below parameters are optional to modify: ; -------------------------------------------- [parameters] ; Model name schema: Model Provider|Model Name|Model File. Model File is only valid for GGUF format, set None for other format. ; For example: ; OpenAI|gpt-3.5-turbo|None ; OpenAI|gpt-4|None ; Anthropic|claude-2.0|None ; Together|togethercomputer/llama-2-70b-chat|None ; HuggingFace|TheBloke/Llama-2-70B-chat-GGUF|llama-2-70b-chat.q4_K_M.gguf ; HuggingFace|meta-llama/Llama-2-70b-chat-hf|None ; The full Together.AI model list can be found in the end of this file; We currently only support quantized gguf and the full huggingface local LLMs. MODEL_NAME = OpenAI|gpt-4-1106-preview|None ; LLM temperature TEMPURATURE = 0 ; Maximum tokens for storing chat history. MAX_CHAT_HISTORY = 800 ; Maximum tokens for LLM context for retrieved information. MAX_LLM_CONTEXT = 1200 ; Maximum tokens for LLM generation. MAX_LLM_GENERATION = 1000 ; Supported embeddings: openAIEmbeddings and hkunlpInstructorLarge. EMBEDDING_NAME = openAIEmbeddings ; This is dependent on your GPU type. N_GPU_LAYERS = 100 ; this is depend on your GPU and CPU ram when using open source LLMs. N_BATCH = 512 ; The base (small) chunk size for first stage document retrieval. BASE_CHUNK_SIZE = 100 ; Set to 0 for no overlap. CHUNK_OVERLAP = 0 ; The final retrieval (medium) chunk size will be BASE_CHUNK_SIZE * CHUNK_SCALE. CHUNK_SCALE = 3 WINDOW_STEPS = 3 ; The # tokens of window chunk will be BASE_CHUNK_SIZE * WINDOW_SCALE. WINDOW_SCALE = 18 ; Ratio of BM25 retriever to Chroma Vectorstore retriever. RETRIEVER_WEIGHTS = 0.5, 0.5 ; Number of retrieved chunks will range from FIRST_RETRIEVAL_K to 2*FIRST_RETRIEVAL_K due to the ensemble retriever. FIRST_RETRIEVAL_K = 3 ; Number of retrieved chunks will range from SECOND_RETRIEVAL_K to 2*SECOND_RETRIEVAL_K due to the ensemble retriever. SECOND_RETRIEVAL_K = 3 ; Number of windows (large chunks) for the third retriever. NUM_WINDOWS = 2 ; (The third retrieval gets the final chunks passed to the LLM QA chain. The 'k' value is dynamic (based on MAX_LLM_CONTEXT), depending on the number of rephrased questions and retrieved documents.) [logging] ; If you do not want to enable logging, set enabled to False. enabled = True level = INFO filename = IncarnaMind.log format = %(asctime)s [%(levelname)s] %(name)s: %(message)s ; Together.AI supported models: ; 0 Austism/chronos-hermes-13b ; 1 EleutherAI/pythia-12b-v0 ; 2 EleutherAI/pythia-1b-v0 ; 3 EleutherAI/pythia-2.8b-v0 ; 4 EleutherAI/pythia-6.9b ; 5 Gryphe/MythoMax-L2-13b ; 6 HuggingFaceH4/starchat-alpha ; 7 NousResearch/Nous-Hermes-13b ; 8 NousResearch/Nous-Hermes-Llama2-13b ; 9 NumbersStation/nsql-llama-2-7B ; 10 OpenAssistant/llama2-70b-oasst-sft-v10 ; 11 OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 ; 12 OpenAssistant/stablelm-7b-sft-v7-epoch-3 ; 13 Phind/Phind-CodeLlama-34B-Python-v1 ; 14 Phind/Phind-CodeLlama-34B-v2 ; 15 SG161222/Realistic_Vision_V3.0_VAE ; 16 WizardLM/WizardCoder-15B-V1.0 ; 17 WizardLM/WizardCoder-Python-34B-V1.0 ; 18 WizardLM/WizardLM-70B-V1.0 ; 19 bigcode/starcoder ; 20 databricks/dolly-v2-12b ; 21 databricks/dolly-v2-3b ; 22 databricks/dolly-v2-7b ; 23 defog/sqlcoder ; 24 garage-bAInd/Platypus2-70B-instruct ; 25 huggyllama/llama-13b ; 26 huggyllama/llama-30b ; 27 huggyllama/llama-65b ; 28 huggyllama/llama-7b ; 29 lmsys/fastchat-t5-3b-v1.0 ; 30 lmsys/vicuna-13b-v1.3 ; 31 lmsys/vicuna-13b-v1.5-16k ; 32 lmsys/vicuna-13b-v1.5 ; 33 lmsys/vicuna-7b-v1.3 ; 34 prompthero/openjourney ; 35 runwayml/stable-diffusion-v1-5 ; 36 stabilityai/stable-diffusion-2-1 ; 37 stabilityai/stable-diffusion-xl-base-1.0 ; 38 togethercomputer/CodeLlama-13b-Instruct ; 39 togethercomputer/CodeLlama-13b-Python ; 40 togethercomputer/CodeLlama-13b ; 41 togethercomputer/CodeLlama-34b-Instruct ; 42 togethercomputer/CodeLlama-34b-Python ; 43 togethercomputer/CodeLlama-34b ; 44 togethercomputer/CodeLlama-7b-Instruct ; 45 togethercomputer/CodeLlama-7b-Python ; 46 togethercomputer/CodeLlama-7b ; 47 togethercomputer/GPT-JT-6B-v1 ; 48 togethercomputer/GPT-JT-Moderation-6B ; 49 togethercomputer/GPT-NeoXT-Chat-Base-20B ; 50 togethercomputer/Koala-13B ; 51 togethercomputer/LLaMA-2-7B-32K ; 52 togethercomputer/Llama-2-7B-32K-Instruct ; 53 togethercomputer/Pythia-Chat-Base-7B-v0.16 ; 54 togethercomputer/Qwen-7B-Chat ; 55 togethercomputer/Qwen-7B ; 56 togethercomputer/RedPajama-INCITE-7B-Base ; 57 togethercomputer/RedPajama-INCITE-7B-Chat ; 58 togethercomputer/RedPajama-INCITE-7B-Instruct ; 59 togethercomputer/RedPajama-INCITE-Base-3B-v1 ; 60 togethercomputer/RedPajama-INCITE-Chat-3B-v1 ; 61 togethercomputer/RedPajama-INCITE-Instruct-3B-v1 ; 62 togethercomputer/alpaca-7b ; 63 togethercomputer/codegen2-16B ; 64 togethercomputer/codegen2-7B ; 65 togethercomputer/falcon-40b-instruct ; 66 togethercomputer/falcon-40b ; 67 togethercomputer/falcon-7b-instruct ; 68 togethercomputer/falcon-7b ; 69 togethercomputer/guanaco-13b ; 70 togethercomputer/guanaco-33b ; 71 togethercomputer/guanaco-65b ; 72 togethercomputer/guanaco-7b ; 73 togethercomputer/llama-2-13b-chat ; 74 togethercomputer/llama-2-13b ; 75 togethercomputer/llama-2-70b-chat ; 76 togethercomputer/llama-2-70b ; 77 togethercomputer/llama-2-7b-chat ; 78 togethercomputer/llama-2-7b ; 79 togethercomputer/mpt-30b-chat ; 80 togethercomputer/mpt-30b-instruct ; 81 togethercomputer/mpt-30b ; 82 togethercomputer/mpt-7b-chat ; 83 togethercomputer/mpt-7b ; 84 togethercomputer/replit-code-v1-3b ; 85 upstage/SOLAR-0-70b-16bit ; 86 wavymulder/Analog-Diffusion