Spaces:
Runtime error
Runtime error
Commit
Β·
787eb2b
1
Parent(s):
f9b4304
Add application file
Browse files- app.py +74 -0
- requirements.txt +4 -0
app.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain import HuggingFacePipeline
|
| 2 |
+
from langchain import PromptTemplate, LLMChain
|
| 3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 4 |
+
|
| 5 |
+
import transformers
|
| 6 |
+
import os
|
| 7 |
+
import torch
|
| 8 |
+
import gradio as gr
|
| 9 |
+
|
| 10 |
+
# check if cuda is available
|
| 11 |
+
torch.cuda.is_available()
|
| 12 |
+
|
| 13 |
+
# define the model id
|
| 14 |
+
# model_id = "tiiuae/falcon-40b-instruct"
|
| 15 |
+
model_id = "tiiuae/falcon-7b-instruct"
|
| 16 |
+
|
| 17 |
+
# load the tokenizer
|
| 18 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 19 |
+
|
| 20 |
+
# load the model
|
| 21 |
+
## params:
|
| 22 |
+
## cache_dir: Path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used. \n
|
| 23 |
+
## device_map: ensures the model is moved to your GPU(s)
|
| 24 |
+
cache_dir = "./workspace/"
|
| 25 |
+
torch_dtype = torch.bfloat16
|
| 26 |
+
trust_remote_code = True
|
| 27 |
+
device_map = "auto"
|
| 28 |
+
offload_folder = "offload"
|
| 29 |
+
|
| 30 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, cache_dir=cache_dir, torch_dtype = torch_dtype,
|
| 31 |
+
trust_remote_code=trust_remote_code, device_map=device_map, offload_folder=offload_folder)
|
| 32 |
+
# set pt model to inference mode
|
| 33 |
+
model.eval()
|
| 34 |
+
|
| 35 |
+
# build the hf transformers pipeline
|
| 36 |
+
task = "text-generation"
|
| 37 |
+
max_length = 400
|
| 38 |
+
do_sample = True
|
| 39 |
+
top_k = 10
|
| 40 |
+
num_return_sequences = 1
|
| 41 |
+
eos_token_id = tokenizer.eos_token_id
|
| 42 |
+
|
| 43 |
+
pipeline = transformers.pipeline("text-generation", model = model, tokenizer = tokenizer,
|
| 44 |
+
device_map = device_map, max_length = max_length,
|
| 45 |
+
do_sample = do_sample, top_k = top_k,
|
| 46 |
+
num_return_sequences = num_return_sequences,
|
| 47 |
+
eos_token_id = eos_token_id)
|
| 48 |
+
|
| 49 |
+
# setup promt template
|
| 50 |
+
template = PromptTemplate(input_variables = ['input'], template = f'{input}')
|
| 51 |
+
|
| 52 |
+
# pass hf pipeline to langhcain class
|
| 53 |
+
llm = HuggingFacePipeline(pipeline=pipeline)
|
| 54 |
+
|
| 55 |
+
# build stacked llm chain, ie prompt-formatting + llm
|
| 56 |
+
chain = LLMChain(llm=llm, prompt=template)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
# create generate function
|
| 60 |
+
def generate(prommpt):
|
| 61 |
+
# the prompt will get passes to the llm chain
|
| 62 |
+
return chain.run(prompt)
|
| 63 |
+
# and will return responses
|
| 64 |
+
|
| 65 |
+
title = "Falcon 40-b-Instruct"
|
| 66 |
+
description = "Web app application using the open-source `Falcon-40b-Instruct` LLM"
|
| 67 |
+
|
| 68 |
+
# build gradio interface
|
| 69 |
+
gr.Interface(fn=generate,
|
| 70 |
+
input=["text"],
|
| 71 |
+
outputs=["text"],
|
| 72 |
+
title=title,
|
| 73 |
+
descrption=description).launch()
|
| 74 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
langchain
|
| 2 |
+
gradio==3.39.0
|
| 3 |
+
transformers==4.24.0
|
| 4 |
+
torch
|