Osama-Ahmed-27 commited on
Commit
a97143b
·
verified ·
1 Parent(s): d367abd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -0
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import torch
4
+ import nltk
5
+ import speech_recognition as sr
6
+ from fastapi import FastAPI, UploadFile, File, HTTPException
7
+ from pydantic import BaseModel
8
+ from transformers import pipeline, BertForSequenceClassification, BertTokenizer
9
+ from nltk.sentiment.vader import SentimentIntensityAnalyzer
10
+
11
+ # ---------------- INIT ----------------
12
+ try:
13
+ nltk.data.find("sentiment/vader_lexicon")
14
+ except LookupError:
15
+ nltk.download("vader_lexicon")
16
+ vader = SentimentIntensityAnalyzer()
17
+
18
+ # Emotion model
19
+ emotion_model = pipeline("sentiment-analysis", model="tabularisai/multilingual-sentiment-analysis")
20
+
21
+ # FinBERT Tone
22
+ finbert = BertForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone", num_labels=3)
23
+ finbert_tokenizer = BertTokenizer.from_pretrained("yiyanghkust/finbert-tone")
24
+ tone_labels = ["Neutral", "Positive", "Negative"]
25
+
26
+ # FastAPI
27
+ app = FastAPI(title="Sentiment • Emotion • Tone API", version="2.0.0")
28
+
29
+
30
+ # ---------------- HELPERS ----------------
31
+ def _label3(label_str: str) -> str:
32
+ l = label_str.lower()
33
+ if "pos" in l:
34
+ return "Positive"
35
+ if "neg" in l:
36
+ return "Negative"
37
+ return "Neutral"
38
+
39
+ def _signed_score(label: str, score01: float) -> float:
40
+ if label == "Positive":
41
+ return +abs(float(score01))
42
+ if label == "Negative":
43
+ return -abs(float(score01))
44
+ return 0.0
45
+
46
+ def score_sentiment(text: str) -> float:
47
+ c = vader.polarity_scores(text)["compound"]
48
+ if c >= 0.05:
49
+ return _signed_score("Positive", abs(c))
50
+ elif c <= -0.05:
51
+ return _signed_score("Negative", abs(c))
52
+ else:
53
+ return 0.0
54
+
55
+ def score_emotion(text: str) -> float:
56
+ out = emotion_model(text)[0]
57
+ lab = _label3(out["label"])
58
+ return _signed_score(lab, float(out["score"]))
59
+
60
+ def score_tone(text: str) -> float:
61
+ inputs = finbert_tokenizer(text, return_tensors="pt", truncation=True)
62
+ with torch.no_grad():
63
+ logits = finbert(**inputs).logits
64
+ probs = torch.softmax(logits, dim=1).squeeze()
65
+ idx = torch.argmax(probs).item()
66
+ lab = tone_labels[idx]
67
+ scr = float(probs[idx].item())
68
+ return _signed_score(lab, scr)
69
+
70
+ def analyze_text_core(text: str):
71
+ return [{
72
+ "sentiment": round(score_sentiment(text), 4),
73
+ "emotion": round(score_emotion(text), 4),
74
+ "tone": round(score_tone(text), 4),
75
+ }]
76
+
77
+
78
+ # ---------------- SCHEMAS ----------------
79
+ class TextIn(BaseModel):
80
+ text: str
81
+
82
+
83
+ # ---------------- ROUTES ----------------
84
+ @app.get("/")
85
+ def root():
86
+ return {"ok": True, "endpoints": ["/analyze-text", "/analyze-voice"]}
87
+
88
+ @app.post("/analyze-text")
89
+ def analyze_text(payload: TextIn):
90
+ text = (payload.text or "").strip()
91
+ if not text:
92
+ raise HTTPException(status_code=400, detail="Text cannot be empty.")
93
+ return analyze_text_core(text)
94
+
95
+ @app.post("/analyze-voice")
96
+ async def analyze_voice(file: UploadFile = File(...)):
97
+ # Save uploaded audio temporarily
98
+ fname = (file.filename or "audio").lower()
99
+ if not any(fname.endswith(ext) for ext in (".wav", ".aiff", ".aif")):
100
+ raise HTTPException(status_code=400, detail="Please upload WAV/AIFF file (MP3 not supported by speech_recognition without ffmpeg).")
101
+
102
+ data = await file.read()
103
+ tmp_path = f"/tmp/{fname}"
104
+ with open(tmp_path, "wb") as f:
105
+ f.write(data)
106
+
107
+ # SpeechRecognition with Google Web Speech API (free, no key)
108
+ recognizer = sr.Recognizer()
109
+ with sr.AudioFile(tmp_path) as source:
110
+ audio = recognizer.record(source)
111
+
112
+ try:
113
+ transcript = recognizer.recognize_google(audio, language="en-US")
114
+ except Exception as e:
115
+ raise HTTPException(status_code=500, detail=f"Transcription failed: {e}")
116
+
117
+ return analyze_text_core(transcript)