from langchain_community.llms.ctransformers import CTransformers import os import gradio as gr hf_token = os.environ.get('HF_TOKEN') from huggingface_hub import login login(token=hf_token) # config = AutoConfig.from_pretrained("Mistral-7B-v0.1-GGUF") # config.config.max_new_tokens = 2000 # config.config.context_length = 6000 # llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-v0.1-GGUF", model_file="mistral-7b-v0.1.Q5_K_M.gguf", model_type="mistral",gpu_layers=0, config=config) MODEL_TYPE = 'mistral' MODEL_BIN_PATH = "mistral-7b-instruct-v0.1.Q3_K_S.gguf" MAX_NEW_TOKEN = 600 TEMPRATURE = 0.01 CONTEXT_LENGTH = 6000 llm = CTransformers( model=MODEL_BIN_PATH, config={ 'max_new_tokens': MAX_NEW_TOKEN, 'temperature': TEMPRATURE, 'context_length': CONTEXT_LENGTH }, model_type=MODEL_TYPE ) def generate_text(input_text): output = llm(input_text) print(output) return output text_generation_interface = gr.Interface( fn=generate_text, inputs="text", outputs="text") text_generation_interface.launch()