Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,25 +1,22 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import joblib
|
|
|
4 |
import numpy as np
|
5 |
import pandas as pd
|
6 |
from openai import OpenAI
|
7 |
-
from huggingface_hub import login
|
8 |
-
from huggingface_hub import hf_hub_download
|
9 |
-
import logfire
|
10 |
from pydantic import BaseModel
|
11 |
|
12 |
-
|
13 |
logfire.configure(token=os.getenv("LOGFIRE_API_KEY"))
|
14 |
logfire.instrument_pydantic()
|
15 |
|
16 |
-
# Load
|
17 |
-
# model_path = hf_hub_download(repo_id="govtech/zoo-entry-001", filename="model.joblib", use_auth_token=True)
|
18 |
model_data = joblib.load("model.joblib")
|
19 |
-
model = model_data[
|
20 |
-
label_names = model_data[
|
21 |
|
22 |
-
class
|
23 |
text: str
|
24 |
hateful: float
|
25 |
insults: float
|
@@ -28,63 +25,53 @@ class results(BaseModel):
|
|
28 |
self_harm: float
|
29 |
aom: float
|
30 |
|
31 |
-
|
32 |
# Initialize OpenAI client
|
33 |
client = OpenAI()
|
34 |
|
35 |
-
def get_embedding(text, embedding_model="text-embedding-3-large"):
|
36 |
"""
|
37 |
Get embedding for the input text from OpenAI.
|
38 |
-
|
39 |
"""
|
40 |
text = text.replace("\n", " ")
|
41 |
-
response = client.embeddings.create(
|
42 |
-
input=[text],
|
43 |
-
model=embedding_model
|
44 |
-
)
|
45 |
-
# Extract embedding vector from response
|
46 |
embedding = response.data[0].embedding
|
47 |
return np.array(embedding)
|
48 |
|
49 |
-
def classify_text(text):
|
50 |
"""
|
51 |
Get the OpenAI embedding for the provided text, classify it using your model,
|
52 |
-
and return
|
53 |
"""
|
54 |
embedding = get_embedding(text)
|
55 |
-
|
56 |
-
X = np.array(embedding)[None, :]
|
57 |
-
# Get probabilities from the model
|
58 |
probabilities = model.predict(X)
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
'Prediction': (probabilities[0] > 0.5).astype(int)
|
64 |
-
})
|
65 |
-
# Return an update to the DataFrame component to make it visible with the results
|
66 |
-
results(
|
67 |
text=text,
|
68 |
-
hateful=
|
69 |
-
insults=
|
70 |
-
sexual=
|
71 |
-
violence=
|
72 |
-
self_harm=
|
73 |
-
aom=
|
74 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
return gr.update(value=df, visible=True)
|
76 |
|
77 |
with gr.Blocks(title="Zoo Entry 001") as iface:
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
with gr.Row():
|
83 |
-
submit_btn = gr.Button("Submit")
|
84 |
-
|
85 |
-
# Initialize the table as hidden
|
86 |
-
with gr.Row():
|
87 |
-
output_table = gr.DataFrame(label="Classification Results", visible=False)
|
88 |
|
89 |
submit_btn.click(fn=classify_text, inputs=input_text, outputs=output_table)
|
90 |
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import joblib
|
4 |
+
import logfire
|
5 |
import numpy as np
|
6 |
import pandas as pd
|
7 |
from openai import OpenAI
|
|
|
|
|
|
|
8 |
from pydantic import BaseModel
|
9 |
|
10 |
+
# Configure logging
|
11 |
logfire.configure(token=os.getenv("LOGFIRE_API_KEY"))
|
12 |
logfire.instrument_pydantic()
|
13 |
|
14 |
+
# Load pre-trained model and label names
|
|
|
15 |
model_data = joblib.load("model.joblib")
|
16 |
+
model = model_data["model"]
|
17 |
+
label_names = model_data["label_names"]
|
18 |
|
19 |
+
class Results(BaseModel):
|
20 |
text: str
|
21 |
hateful: float
|
22 |
insults: float
|
|
|
25 |
self_harm: float
|
26 |
aom: float
|
27 |
|
|
|
28 |
# Initialize OpenAI client
|
29 |
client = OpenAI()
|
30 |
|
31 |
+
def get_embedding(text: str, embedding_model: str = "text-embedding-3-large") -> np.ndarray:
|
32 |
"""
|
33 |
Get embedding for the input text from OpenAI.
|
34 |
+
Replaces newlines with spaces before calling the API.
|
35 |
"""
|
36 |
text = text.replace("\n", " ")
|
37 |
+
response = client.embeddings.create(input=[text], model=embedding_model)
|
|
|
|
|
|
|
|
|
38 |
embedding = response.data[0].embedding
|
39 |
return np.array(embedding)
|
40 |
|
41 |
+
def classify_text(text: str):
|
42 |
"""
|
43 |
Get the OpenAI embedding for the provided text, classify it using your model,
|
44 |
+
and return a DataFrame with the rounded probabilities and binary predictions.
|
45 |
"""
|
46 |
embedding = get_embedding(text)
|
47 |
+
X = embedding.reshape(1, -1)
|
|
|
|
|
48 |
probabilities = model.predict(X)
|
49 |
+
rounded_probs = np.round(probabilities[0], 4)
|
50 |
+
|
51 |
+
# Optionally log the results (this doesn't affect the output)
|
52 |
+
Results(
|
|
|
|
|
|
|
|
|
53 |
text=text,
|
54 |
+
hateful=rounded_probs[0],
|
55 |
+
insults=rounded_probs[1],
|
56 |
+
sexual=rounded_probs[2],
|
57 |
+
violence=rounded_probs[3],
|
58 |
+
self_harm=rounded_probs[4],
|
59 |
+
aom=rounded_probs[5],
|
60 |
)
|
61 |
+
|
62 |
+
# Create DataFrame with rounded probabilities and binary predictions
|
63 |
+
df = pd.DataFrame({
|
64 |
+
"Label": label_names,
|
65 |
+
"Probability": rounded_probs,
|
66 |
+
"Prediction": (rounded_probs > 0.5).astype(int)
|
67 |
+
})
|
68 |
+
|
69 |
return gr.update(value=df, visible=True)
|
70 |
|
71 |
with gr.Blocks(title="Zoo Entry 001") as iface:
|
72 |
+
input_text = gr.Textbox(lines=5, label="Input Text")
|
73 |
+
submit_btn = gr.Button("Submit")
|
74 |
+
output_table = gr.DataFrame(label="Classification Results", visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
submit_btn.click(fn=classify_text, inputs=input_text, outputs=output_table)
|
77 |
|