Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,6 @@ dtype = torch.bfloat16
|
|
| 9 |
|
| 10 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
| 11 |
|
| 12 |
-
@spaces.GPU
|
| 13 |
def load_model():
|
| 14 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 15 |
model = AutoModelForCausalLM.from_pretrained(
|
|
@@ -22,6 +21,7 @@ def load_model():
|
|
| 22 |
|
| 23 |
tokenizer, model = load_model()
|
| 24 |
|
|
|
|
| 25 |
def moderate(user_input, assistant_response):
|
| 26 |
chat = [
|
| 27 |
{"role": "user", "content": user_input},
|
|
@@ -32,11 +32,8 @@ def moderate(user_input, assistant_response):
|
|
| 32 |
prompt_len = input_ids.shape[-1]
|
| 33 |
return tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)
|
| 34 |
|
| 35 |
-
def gradio_moderate(user_input, assistant_response):
|
| 36 |
-
return moderate(user_input, assistant_response)
|
| 37 |
-
|
| 38 |
iface = gr.Interface(
|
| 39 |
-
fn=
|
| 40 |
inputs=[
|
| 41 |
gr.Textbox(lines=3, label="User Input"),
|
| 42 |
gr.Textbox(lines=3, label="Assistant Response")
|
|
|
|
| 9 |
|
| 10 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
| 11 |
|
|
|
|
| 12 |
def load_model():
|
| 13 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 14 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 21 |
|
| 22 |
tokenizer, model = load_model()
|
| 23 |
|
| 24 |
+
@spaces.GPU
|
| 25 |
def moderate(user_input, assistant_response):
|
| 26 |
chat = [
|
| 27 |
{"role": "user", "content": user_input},
|
|
|
|
| 32 |
prompt_len = input_ids.shape[-1]
|
| 33 |
return tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)
|
| 34 |
|
|
|
|
|
|
|
|
|
|
| 35 |
iface = gr.Interface(
|
| 36 |
+
fn=moderate,
|
| 37 |
inputs=[
|
| 38 |
gr.Textbox(lines=3, label="User Input"),
|
| 39 |
gr.Textbox(lines=3, label="Assistant Response")
|