File size: 4,866 Bytes
550f132
ad41ac1
 
 
 
 
0c6cf3e
 
ad41ac1
 
 
0c6cf3e
 
 
 
 
ad41ac1
0c6cf3e
 
 
ea0c6be
ebbfcba
ad41ac1
55ca4f6
0c6cf3e
ea0c6be
ae2f848
 
0c6cf3e
ad41ac1
0c6cf3e
 
 
 
 
 
 
ea0c6be
0c6cf3e
ad41ac1
0c6cf3e
 
ad41ac1
0c6cf3e
 
 
ae2f848
8ea52ad
 
0c6cf3e
 
 
 
 
 
 
ea0c6be
8ea52ad
 
 
ea0c6be
0c6cf3e
ea0c6be
0c6cf3e
ae2f848
 
0c6cf3e
34fbe71
ad41ac1
0c6cf3e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import spaces
import torch
import sys
import html
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
from threading import Thread
import gradio as gr
from gradio_rich_textbox import RichTextbox



title = """# 🙋🏻‍♂️Welcome to🌟Tonic's🫡📉MetaMath
this is Tencent's mistral DPO finetune for mathematics. You can build with this endpoint using🫡📉MetaMath available here : [TencentARC/Mistral_Pro_8B_v0.1](https://huggingface.co/TencentARC/Mistral_Pro_8B_v0.1). We're using 🤖[introspector/unimath](https://huggingface.co/datasets/introspector/unimath) for cool examples, check it out below ! The demo is still a work in progress and we're looking forward to build downstream tasks that showcase outstanding mathematical reasoning. Have any ideas ? join us below !
You can also use 🫡📉MetaMath by cloning this space. Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/MetaMath?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3> 
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) Math with [introspector](https://huggingface.co/introspector) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [SciTonic](https://github.com/Tonic-AI/scitonic)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
"""

model_name = 'TencentARC/Mistral_Pro_8B_v0.1'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
# model.generation_config = GenerationConfig.from_pretrained(model_name)
# model.generation_config.pad_token_id = model.generation_config.eos_token_id

@torch.inference_mode()
@spaces.GPU
def predict_math_bot(user_message, system_message="", max_new_tokens=125, temperature=0.1, top_p=0.9, repetition_penalty=1.9, do_sample=False):
    prompt = f"<|user|>{user_message}\n<|system|>{system_message}\n<|assistant|>\n" if system_message else user_message
    inputs = tokenizer(prompt, return_tensors='pt', add_special_tokens=True)
    input_ids = inputs["input_ids"].to(model.device)

    output_ids = model.generate(
        input_ids,
        max_length=input_ids.shape[1] + max_new_tokens,
        temperature=temperature,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=do_sample
    )

    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return response

def main():
    with gr.Blocks() as demo:
        gr.Markdown(title)
        with gr.Row():
            user_message = gr.Code(label="🫡Enter your math query here...", language="r", lines=3, value="""F(x) &= \int^a_b \frac{1}{3}x^3""")
            system_message = gr.Textbox(label="📉System Prompt", lines=2, placeholder="Optional: give precise instructions to resolve the problem provided above, produce complete answer in Latex format:")
        
        with gr.Accordion("Advanced Settings"):
            with gr.Row():
                max_new_tokens = gr.Slider(label="Max new tokens", value=125, minimum=25, maximum=1250)
                temperature = gr.Slider(label="Temperature", value=0.1, minimum=0.05, maximum=1.0)
                top_p = gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.01, maximum=0.99)
                repetition_penalty = gr.Slider(label="Repetition penalty", value=1.9, minimum=1.0, maximum=2.0)
                do_sample = gr.Checkbox(label="Uncheck for faster inference", value=False)

        output_text = RichTextbox(label="🫡📉MetaMath", interactive=True)
        
        gr.Button("Try🫡📉MetaMath").click(
            predict_math_bot,
            inputs=[user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample],
            outputs=output_text
        )
             
    demo.launch()

if __name__ == "__main__":
    main()