|
import gradio as gr |
|
from peft import PeftModel, PeftConfig |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import spaces |
|
import torch |
|
import os |
|
|
|
from tqdm import tqdm |
|
|
|
|
|
device="cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
MODEL_NAME="abdeljalilELmajjodi/alatlas_instruct_lora" |
|
print(f"bf16 available: {torch.cuda.is_bf16_supported()}") |
|
config = PeftConfig.from_pretrained(MODEL_NAME,token = os.environ['TOKEN']) |
|
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, |
|
device_map="auto", |
|
token = os.environ['TOKEN'], |
|
torch_dtype=torch.bfloat16 |
|
) |
|
model = PeftModel.from_pretrained(model,MODEL_NAME,torch_dtype=torch.bfloat16) |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
if tokenizer.pad_token is None: |
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
@spaces.GPU |
|
def generate(prompt, temperature=0.7, top_k=50, repetition_penalty=1.2): |
|
messages=[{"role":"user","content":prompt}] |
|
formatted_prompt=tokenizer.apply_chat_template(messages,tokenize=False) |
|
ids=tokenizer(formatted_prompt,return_tensors="pt").to(device) |
|
output_ids=model.generate(**ids, |
|
max_new_tokens=50, |
|
do_sample=True, |
|
temperature=temperature, |
|
top_k=top_k, |
|
repetition_penalty=repetition_penalty, |
|
eos_token_id=tokenizer.eos_token_id, |
|
pad_token_id=tokenizer.pad_token_id |
|
) |
|
output_ids=output_ids[0][len(ids.input_ids[0]):] |
|
output=tokenizer.decode(output_ids,skip_special_tokens=True) |
|
assistant_marker = "ﭺassistant" |
|
if output.startswith(assistant_marker): |
|
output = output[len(assistant_marker):].strip() |
|
return output |
|
|
|
prompt_input=gr.Textbox(label="Enter your prompt",lines=5,rtl=True,) |
|
model_response = gr.Textbox(label="Model Response",lines=5,interactive=False,rtl=True,) |
|
temperature = gr.Slider(minimum=0.01,maximum=1.0,value=0.7, label="Temperature") |
|
top_k = gr.Slider(1, 10000, value=10, label="Top-k") |
|
repetition_penalty = gr.Slider(0.1, 100.0, value=1.2, label="Repetition Penalty") |
|
|
|
examples = [ |
|
[ "عافاك بغيت نسافر فالمغرب فالصيف ولكن معرفتش فين نمشي. ممكن تعاوني؟",0.1,90,1.2], |
|
[ "عافاك، بغيت نعرف شنو هي أحسن الأماكن لي نقدر نزورها فالمغرب فالصيف؟",0.1,100,1.2], |
|
["شرح ليا الذكاء الاصطناعي عفاك",0.1,1,1.2], |
|
] |
|
|
|
demo=gr.Interface( |
|
fn=generate, |
|
inputs=[prompt_input,temperature,top_k,repetition_penalty], |
|
outputs=model_response, |
|
flagging_mode="never", |
|
examples=examples, |
|
cache_examples=True, |
|
) |
|
demo.launch() |