Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
| import torch | |
| from peft import PeftModel | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| base_model_id = "mistralai/Mistral-7B-v0.1" | |
| ft_model_id = "asusevski/mistraloo-sft" | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| base_model_id, | |
| add_bos_token=True | |
| ) | |
| base_model_id = "mistralai/Mistral-7B-v0.1" | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.bfloat16 | |
| ) | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| base_model_id, | |
| quantization_config=bnb_config, | |
| device_map="auto", | |
| trust_remote_code=True, | |
| token=True | |
| ) | |
| model = PeftModel.from_pretrained(base_model, ft_model_id).to(device) | |
| model.eval() | |
| def uwaterloo_output(post_title, post_text): | |
| prompt = f""" | |
| Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. | |
| ### Instruction: | |
| Respond to the reddit post in the style of a University of Waterloo student. | |
| ### Input: | |
| {post_title} | |
| {post_text} | |
| ### Response: | |
| """ | |
| model_input = tokenizer(prompt, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| model_output = model.generate(**model_input, max_new_tokens=256, repetition_penalty=1.15)[0] | |
| output = tokenizer.decode(model_output, skip_special_tokens=True) | |
| return output.split('### Response:\n')[-1] | |
| iface = gr.Interface( | |
| fn=uwaterloo_output, | |
| inputs=[ | |
| gr.Textbox("", label="Post Title"), | |
| gr.Textbox("", label="Post Text"), | |
| ], | |
| outputs=gr.Textbox("", label="Mistraloo-SFT") | |
| ) | |
| iface.launch() | |
| # base_model_id = "mistralai/Mistral-7B-v0.1" | |
| # bnb_config = BitsAndBytesConfig( | |
| # load_in_4bit=True, | |
| # bnb_4bit_use_double_quant=True, | |
| # bnb_4bit_quant_type="nf4", | |
| # bnb_4bit_compute_dtype=torch.bfloat16 | |
| # ) | |
| # base_model = AutoModelForCausalLM.from_pretrained( | |
| # base_model_id, # Mistral, same as before | |
| # quantization_config=bnb_config, # Same quantization config as before | |
| # device_map="auto", | |
| # trust_remote_code=True, | |
| # use_auth_token=True | |
| # ) | |
| # ft_model = PeftModel.from_pretrained(base_model, "mistral-mistraloo/checkpoint-500") |