from langchain_community.llms.ctransformers import CTransformers import os import gradio as gr hf_token = os.environ.get('HF_TOKEN') from huggingface_hub import login login(token=hf_token) # config = AutoConfig.from_pretrained("Mistral-7B-v0.1-GGUF") # config.config.max_new_tokens = 2000 # config.config.context_length = 6000 # llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-v0.1-GGUF", model_file="mistral-7b-v0.1.Q5_K_M.gguf", model_type="mistral",gpu_layers=0, config=config) MODEL_TYPE = 'mistral' MODEL_BIN_PATH = "mistral-7b-instruct-v0.1.Q3_K_S.gguf" MAX_NEW_TOKEN = 600 TEMPRATURE = 0.01 CONTEXT_LENGTH = 6000 llm = CTransformers( model=MODEL_BIN_PATH, config={ 'max_new_tokens': MAX_NEW_TOKEN, 'temperature': TEMPRATURE, 'context_length': CONTEXT_LENGTH }, model_type=MODEL_TYPE ) def generate_text(input_text): output = llm(input_text) print(output) cleaned_output_text = output_text.replace(input_text, "") return cleaned_output_text text_generation_interface = gr.Interface( fn=generate_text, inputs=[ gr.inputs.Textbox(label="Input Text"), ], outputs=gr.inputs.Textbox(label="Generated Text")).launch()