Spaces:
Runtime error
Runtime error
| from langchain import HuggingFacePipeline | |
| from langchain import PromptTemplate, LLMChain | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import transformers | |
| import os | |
| import torch | |
| import gradio as gr | |
| # check if cuda is available | |
| torch.cuda.is_available() | |
| # define the model id | |
| # model_id = "tiiuae/falcon-40b-instruct" | |
| model_id = "tiiuae/falcon-7b-instruct" | |
| # load the tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| # load the model | |
| ## params: | |
| ## cache_dir: Path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used. \n | |
| ## device_map: ensures the model is moved to your GPU(s) | |
| cache_dir = "./workspace/" | |
| torch_dtype = torch.bfloat16 | |
| trust_remote_code = True | |
| device_map = "auto" | |
| offload_folder = "offload" | |
| model = AutoModelForCausalLM.from_pretrained(model_id, cache_dir=cache_dir, torch_dtype = torch_dtype, | |
| trust_remote_code=trust_remote_code, device_map=device_map, offload_folder=offload_folder) | |
| # set pt model to inference mode | |
| model.eval() | |
| # build the hf transformers pipeline | |
| task = "text-generation" | |
| max_length = 400 | |
| do_sample = True | |
| top_k = 10 | |
| num_return_sequences = 1 | |
| eos_token_id = tokenizer.eos_token_id | |
| pipeline = transformers.pipeline("text-generation", model = model, tokenizer = tokenizer, | |
| device_map = device_map, max_length = max_length, | |
| do_sample = do_sample, top_k = top_k, | |
| num_return_sequences = num_return_sequences, | |
| eos_token_id = eos_token_id) | |
| # setup promt template | |
| template = PromptTemplate(input_variables = ['input'], template = f'{input}') | |
| # pass hf pipeline to langhcain class | |
| llm = HuggingFacePipeline(pipeline=pipeline) | |
| # build stacked llm chain, ie prompt-formatting + llm | |
| chain = LLMChain(llm=llm, prompt=template) | |
| # create generate function | |
| def generate(prommpt): | |
| # the prompt will get passes to the llm chain | |
| return chain.run(prompt) | |
| # and will return responses | |
| title = "Falcon 40-b-Instruct" | |
| description = "Web app application using the open-source `Falcon-40b-Instruct` LLM" | |
| # build gradio interface | |
| gr.Interface(fn=generate, | |
| input=["text"], | |
| outputs=["text"], | |
| title=title, | |
| descrption=description).launch() | |