gabrielchua commited on
Commit
71b879a
·
verified ·
1 Parent(s): 92f7782

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -46
app.py CHANGED
@@ -1,25 +1,22 @@
1
  import os
2
  import gradio as gr
3
  import joblib
 
4
  import numpy as np
5
  import pandas as pd
6
  from openai import OpenAI
7
- from huggingface_hub import login
8
- from huggingface_hub import hf_hub_download
9
- import logfire
10
  from pydantic import BaseModel
11
 
12
-
13
  logfire.configure(token=os.getenv("LOGFIRE_API_KEY"))
14
  logfire.instrument_pydantic()
15
 
16
- # Load your pre-trained model and label names
17
- # model_path = hf_hub_download(repo_id="govtech/zoo-entry-001", filename="model.joblib", use_auth_token=True)
18
  model_data = joblib.load("model.joblib")
19
- model = model_data['model']
20
- label_names = model_data['label_names']
21
 
22
- class results(BaseModel):
23
  text: str
24
  hateful: float
25
  insults: float
@@ -28,63 +25,53 @@ class results(BaseModel):
28
  self_harm: float
29
  aom: float
30
 
31
-
32
  # Initialize OpenAI client
33
  client = OpenAI()
34
 
35
- def get_embedding(text, embedding_model="text-embedding-3-large"):
36
  """
37
  Get embedding for the input text from OpenAI.
38
- Replace newlines in the text, then call the API.
39
  """
40
  text = text.replace("\n", " ")
41
- response = client.embeddings.create(
42
- input=[text],
43
- model=embedding_model
44
- )
45
- # Extract embedding vector from response
46
  embedding = response.data[0].embedding
47
  return np.array(embedding)
48
 
49
- def classify_text(text):
50
  """
51
  Get the OpenAI embedding for the provided text, classify it using your model,
52
- and return an updated DataFrame component with the predictions and probabilities.
53
  """
54
  embedding = get_embedding(text)
55
- # Add batch dimension
56
- X = np.array(embedding)[None, :]
57
- # Get probabilities from the model
58
  probabilities = model.predict(X)
59
- # Create a DataFrame with probabilities, labels, and binary predictions
60
- df = pd.DataFrame({
61
- 'Label': label_names,
62
- 'Probability': probabilities[0],
63
- 'Prediction': (probabilities[0] > 0.5).astype(int)
64
- })
65
- # Return an update to the DataFrame component to make it visible with the results
66
- results(
67
  text=text,
68
- hateful=probabilities[0][0].round(4),
69
- insults=probabilities[0][1].round(4),
70
- sexual=probabilities[0][2].round(4),
71
- violence=probabilities[0][3].round(4),
72
- self_harm=probabilities[0][4].round(4),
73
- aom=probabilities[0][5].round(4),
74
  )
 
 
 
 
 
 
 
 
75
  return gr.update(value=df, visible=True)
76
 
77
  with gr.Blocks(title="Zoo Entry 001") as iface:
78
-
79
- with gr.Row():
80
- input_text = gr.Textbox(lines=5, label="Input Text")
81
-
82
- with gr.Row():
83
- submit_btn = gr.Button("Submit")
84
-
85
- # Initialize the table as hidden
86
- with gr.Row():
87
- output_table = gr.DataFrame(label="Classification Results", visible=False)
88
 
89
  submit_btn.click(fn=classify_text, inputs=input_text, outputs=output_table)
90
 
 
1
  import os
2
  import gradio as gr
3
  import joblib
4
+ import logfire
5
  import numpy as np
6
  import pandas as pd
7
  from openai import OpenAI
 
 
 
8
  from pydantic import BaseModel
9
 
10
+ # Configure logging
11
  logfire.configure(token=os.getenv("LOGFIRE_API_KEY"))
12
  logfire.instrument_pydantic()
13
 
14
+ # Load pre-trained model and label names
 
15
  model_data = joblib.load("model.joblib")
16
+ model = model_data["model"]
17
+ label_names = model_data["label_names"]
18
 
19
+ class Results(BaseModel):
20
  text: str
21
  hateful: float
22
  insults: float
 
25
  self_harm: float
26
  aom: float
27
 
 
28
  # Initialize OpenAI client
29
  client = OpenAI()
30
 
31
+ def get_embedding(text: str, embedding_model: str = "text-embedding-3-large") -> np.ndarray:
32
  """
33
  Get embedding for the input text from OpenAI.
34
+ Replaces newlines with spaces before calling the API.
35
  """
36
  text = text.replace("\n", " ")
37
+ response = client.embeddings.create(input=[text], model=embedding_model)
 
 
 
 
38
  embedding = response.data[0].embedding
39
  return np.array(embedding)
40
 
41
+ def classify_text(text: str):
42
  """
43
  Get the OpenAI embedding for the provided text, classify it using your model,
44
+ and return a DataFrame with the rounded probabilities and binary predictions.
45
  """
46
  embedding = get_embedding(text)
47
+ X = embedding.reshape(1, -1)
 
 
48
  probabilities = model.predict(X)
49
+ rounded_probs = np.round(probabilities[0], 4)
50
+
51
+ # Optionally log the results (this doesn't affect the output)
52
+ Results(
 
 
 
 
53
  text=text,
54
+ hateful=rounded_probs[0],
55
+ insults=rounded_probs[1],
56
+ sexual=rounded_probs[2],
57
+ violence=rounded_probs[3],
58
+ self_harm=rounded_probs[4],
59
+ aom=rounded_probs[5],
60
  )
61
+
62
+ # Create DataFrame with rounded probabilities and binary predictions
63
+ df = pd.DataFrame({
64
+ "Label": label_names,
65
+ "Probability": rounded_probs,
66
+ "Prediction": (rounded_probs > 0.5).astype(int)
67
+ })
68
+
69
  return gr.update(value=df, visible=True)
70
 
71
  with gr.Blocks(title="Zoo Entry 001") as iface:
72
+ input_text = gr.Textbox(lines=5, label="Input Text")
73
+ submit_btn = gr.Button("Submit")
74
+ output_table = gr.DataFrame(label="Classification Results", visible=False)
 
 
 
 
 
 
 
75
 
76
  submit_btn.click(fn=classify_text, inputs=input_text, outputs=output_table)
77