Spaces:

nebiyu29
/

exp

Runtime error

App Files Files Community

nebiyu29 commited on Feb 12, 2024

Commit

16a1d49

verified ·

1 Parent(s): 71d31ee

Create app.py

Browse files

Files changed (1) hide show

app.py +111 -0

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import gradio as gr
+# Load model directly
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import transformers
+tokenizer = AutoTokenizer.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
+model = AutoModelForSequenceClassification.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
+# Load the model and tokenizer
+# model = transformers.AutoModelForSequenceClassification.from_pretrained("facebook/bart-large-mnli")
+# tokenizer = transformers.AutoTokenizer.from_pretrained("facebook/bart-large-mnli")
+# Define a function to split a text into segments of 512 tokens
+def split_text(text):
+    # Tokenize the text
+    tokens = tokenizer.tokenize(text)
+    # Initialize an empty list for segments
+    segments = []
+    # Initialize an empty list for current segment
+    current_segment = []
+    # Initialize a counter for tokens
+    token_count = 0
+    # Loop through the tokens
+    for token in tokens:
+        # Add the token to the current segment
+        current_segment.append(token)
+        # Increment the token count
+        token_count += 1
+        # If the token count reaches 512 or the end of the text, add the current segment to the segments list
+        if token_count == 512 or token == tokens[-1]:
+            # Convert the current segment to a string and add it to the segments list
+            segments.append(tokenizer.convert_tokens_to_string(current_segment))
+            # Reset the current segment and the token count
+            current_segment = []
+            token_count = 0
+    # Return the segments list
+    return segments
+# a function that classifies text
+def classify_text(text):
+    # Define labels
+    labels = ["depression", "anxiety", "bipolar disorder", "schizophrenia", "PTSD", "OCD", "ADHD", "autism", "eating disorder", "personality disorder", "phobia"]
+    # Split text into segments using split_text
+    segments = split_text(text)
+    # Initialize empty list for predictions
+    predictions = []
+    # Move device to GPU if available
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = model.to(device)
+    # Loop through segments, process, and store predictions
+    for segment in segments:
+        inputs = tokenizer([segment], padding=True, return_tensors="pt")
+        input_ids = inputs["input_ids"].to(device)
+        attention_mask = inputs["attention_mask"].to(device)
+        with torch.no_grad():
+            outputs = model(input_ids, attention_mask=attention_mask)
+        # Extract predictions for each segment
+        probs, preds = extract_predictions(outputs)  # Define this function based on your model's output
+        # Append predictions for this segment
+        predictions.append({
+            "segment_text": segment,
+            "label": preds[0],  # Assuming single label prediction
+            "probability": probs[preds[0]]  # Access probability for the predicted label
+        })
+# Define a function to extract predictions from model output (adjust as needed)
+def extract_predictions(outputs):
+    # Assuming outputs contain logits and labels (adapt based on your model's output format)
+    logits = outputs.logits
+    probs = logits.softmax(dim=1)
+    preds = torch.argmax(probs, dim=1)
+    return probs, preds  # Return all probabilities and predicted labels
+# def classify_text(text):
+#   """
+#   This function preprocesses, feeds text to the model, and outputs the predicted class.
+#   """
+#   inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
+#   outputs = model(**inputs)
+#   logits = outputs.logits  # Access logits instead of pipeline output
+#   predictions = torch.argmax(logits, dim=-1)  # Apply argmax for prediction
+#   return model.config.id2label[predictions.item()]  # Map index to class label
+interface = gr.Interface(
+    fn=classify_text,
+    inputs="text",
+    outputs="text",
+    title="Text Classification Demo",
+    description="Enter some text, and the model will classify it.",
+    #choices=["depression", "anxiety", "bipolar disorder", "schizophrenia", "PTSD", "OCD", "ADHD", "autism", "eating disorder", "personality disorder", "phobia"]  # Adjust class names
+)
+#interface.launch()