Spaces:
Running
Running
Mitesh Koshiya
commited on
Commit
·
5f52527
1
Parent(s):
43168d6
Update space 1st time
Browse files- .gitignore +28 -0
- .huggingface.yaml +3 -0
- good-main.py +118 -0
- index.html +0 -19
- main.py +291 -0
- old-main.py +67 -0
- requirements.txt +8 -0
- style.css +0 -28
- with-english-name-spacy.py +121 -0
.gitignore
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# Virtual environment
|
7 |
+
venv
|
8 |
+
|
9 |
+
# Environment variables and secrets
|
10 |
+
.env
|
11 |
+
|
12 |
+
# VSCode settings (optional, if not shared across devs)
|
13 |
+
.vscode/
|
14 |
+
|
15 |
+
# OS files
|
16 |
+
.DS_Store
|
17 |
+
Thumbs.db
|
18 |
+
|
19 |
+
# Logs
|
20 |
+
*.log
|
21 |
+
|
22 |
+
# Python distribution / packaging
|
23 |
+
build/
|
24 |
+
dist/
|
25 |
+
*.egg-info/
|
26 |
+
|
27 |
+
# Jupyter Notebook checkpoints (if you use Jupyter)
|
28 |
+
.ipynb_checkpoints/
|
.huggingface.yaml
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# .huggingface.yaml
|
2 |
+
sdk: "fastapi"
|
3 |
+
python_file: "main.py"
|
good-main.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from fastapi import FastAPI
|
3 |
+
from pydantic import BaseModel
|
4 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTokenClassification
|
5 |
+
import dateparser
|
6 |
+
from datetime import datetime
|
7 |
+
import spacy
|
8 |
+
|
9 |
+
app = FastAPI()
|
10 |
+
|
11 |
+
# Load classification and summarization models
|
12 |
+
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
13 |
+
summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
|
14 |
+
summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
|
15 |
+
|
16 |
+
# Load Indic NER (or any general one)
|
17 |
+
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
|
18 |
+
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
|
19 |
+
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
|
20 |
+
|
21 |
+
# Labels for classification
|
22 |
+
labels = [
|
23 |
+
"task", "event", "reminder", "meeting", "relationship", "note", "journal", "memory", "status_update",
|
24 |
+
"sick_notice", "out_of_office", "travel_plan", "celebration", "emotion", "other"
|
25 |
+
]
|
26 |
+
|
27 |
+
class TextInput(BaseModel):
|
28 |
+
text: str
|
29 |
+
|
30 |
+
def extract_dates(text):
|
31 |
+
time_expressions = re.findall(
|
32 |
+
r'\b(kal|aaj|parso|raat|subah|shaam|dopahar|[0-9]{1,2} baje|next week|tomorrow|today|yesterday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|[\d]{1,2}/[\d]{1,2}/[\d]{2,4})\b',
|
33 |
+
text, flags=re.IGNORECASE)
|
34 |
+
parsed = [str(dateparser.parse(t)) for t in time_expressions if dateparser.parse(t)]
|
35 |
+
return list(set(parsed)), list(set(time_expressions))
|
36 |
+
|
37 |
+
def detect_tense(parsed_dates):
|
38 |
+
now = datetime.now()
|
39 |
+
tenses = set()
|
40 |
+
for d in parsed_dates:
|
41 |
+
dt = dateparser.parse(d)
|
42 |
+
if not dt:
|
43 |
+
continue
|
44 |
+
if dt < now:
|
45 |
+
tenses.add("past")
|
46 |
+
elif dt > now:
|
47 |
+
tenses.add("future")
|
48 |
+
else:
|
49 |
+
tenses.add("present")
|
50 |
+
return list(tenses) if tenses else ["unknown"]
|
51 |
+
|
52 |
+
def generate_summary(text):
|
53 |
+
input_ids = summarizer_tokenizer("summarize: " + text, return_tensors="pt").input_ids
|
54 |
+
output_ids = summarizer_model.generate(input_ids, max_length=60, num_beams=4, early_stopping=True)
|
55 |
+
return summarizer_tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
56 |
+
|
57 |
+
|
58 |
+
def extract_people(text):
|
59 |
+
ner_results = ner_pipeline(text)
|
60 |
+
return list(set(ent['word'] for ent in ner_results if ent['entity_group'] == 'PER'))
|
61 |
+
|
62 |
+
def estimate_mood(text):
|
63 |
+
text_lower = text.lower()
|
64 |
+
mood_map = {
|
65 |
+
"happy": ["happy", "excited", "joy", "grateful"],
|
66 |
+
"sad": ["sad", "upset", "crying", "lonely"],
|
67 |
+
"angry": ["angry", "annoyed", "frustrated", "irritated"],
|
68 |
+
"nervous": ["nervous", "anxious", "scared"],
|
69 |
+
"unwell": ["sick", "unwell", "not feeling well", "fever", "cold", "headache"],
|
70 |
+
"neutral": []
|
71 |
+
}
|
72 |
+
|
73 |
+
for mood, keywords in mood_map.items():
|
74 |
+
for kw in keywords:
|
75 |
+
if kw in text_lower:
|
76 |
+
return mood
|
77 |
+
return "neutral"
|
78 |
+
|
79 |
+
def generate_tags(label, text):
|
80 |
+
base_tags = [label]
|
81 |
+
keywords = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
|
82 |
+
force_tags = []
|
83 |
+
|
84 |
+
if any(w in text.lower() for w in ["sick", "unwell", "not feeling well", "fever"]):
|
85 |
+
force_tags += ["sick", "leave"]
|
86 |
+
if "work" in text.lower():
|
87 |
+
force_tags.append("work")
|
88 |
+
|
89 |
+
return list(set(base_tags + force_tags + keywords))
|
90 |
+
|
91 |
+
|
92 |
+
@app.post("/analyze")
|
93 |
+
async def analyze(input: TextInput):
|
94 |
+
text = input.text
|
95 |
+
|
96 |
+
classification = classifier(text, labels)
|
97 |
+
best_label = classification['labels'][0]
|
98 |
+
scores = dict(zip(classification['labels'], classification['scores']))
|
99 |
+
|
100 |
+
parsed_dates, time_mentions = extract_dates(text)
|
101 |
+
tenses = detect_tense(parsed_dates)
|
102 |
+
summary = generate_summary(text)
|
103 |
+
people = extract_people(text)
|
104 |
+
mood = estimate_mood(text)
|
105 |
+
tags = generate_tags(best_label, text)
|
106 |
+
|
107 |
+
return {
|
108 |
+
"type": best_label,
|
109 |
+
"confidence_scores": scores,
|
110 |
+
"time_mentions": time_mentions,
|
111 |
+
"parsed_dates": parsed_dates,
|
112 |
+
"tense": tenses,
|
113 |
+
"summary": summary,
|
114 |
+
"people": people,
|
115 |
+
"mood": mood,
|
116 |
+
"tags": tags
|
117 |
+
}
|
118 |
+
|
index.html
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
<!doctype html>
|
2 |
-
<html>
|
3 |
-
<head>
|
4 |
-
<meta charset="utf-8" />
|
5 |
-
<meta name="viewport" content="width=device-width" />
|
6 |
-
<title>My static Space</title>
|
7 |
-
<link rel="stylesheet" href="style.css" />
|
8 |
-
</head>
|
9 |
-
<body>
|
10 |
-
<div class="card">
|
11 |
-
<h1>Welcome to your static Space!</h1>
|
12 |
-
<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
|
13 |
-
<p>
|
14 |
-
Also don't forget to check the
|
15 |
-
<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
|
16 |
-
</p>
|
17 |
-
</div>
|
18 |
-
</body>
|
19 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main.py
ADDED
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from fastapi import FastAPI
|
3 |
+
from pydantic import BaseModel
|
4 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTokenClassification
|
5 |
+
import dateparser
|
6 |
+
from datetime import datetime
|
7 |
+
from langdetect import detect
|
8 |
+
from textblob import TextBlob
|
9 |
+
from dateparser.search import search_dates
|
10 |
+
import uuid
|
11 |
+
import time
|
12 |
+
|
13 |
+
app = FastAPI()
|
14 |
+
|
15 |
+
# Load classification and summarization models
|
16 |
+
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
17 |
+
summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
|
18 |
+
summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
|
19 |
+
|
20 |
+
# Load Indic NER (or any general one)
|
21 |
+
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
|
22 |
+
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
|
23 |
+
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
|
24 |
+
|
25 |
+
# Labels for classification
|
26 |
+
labels = [
|
27 |
+
"task", "event", "reminder", "meeting", "relationship", "note", "journal", "memory", "status_update",
|
28 |
+
"sick_notice", "out_of_office", "travel_plan", "celebration", "emotion", "news", "information", "other"
|
29 |
+
]
|
30 |
+
|
31 |
+
class TextInput(BaseModel):
|
32 |
+
text: str
|
33 |
+
|
34 |
+
# Function to extract dates and time mentions based on regex patterns
|
35 |
+
def extract_dates(text):
|
36 |
+
time_expressions = re.findall(
|
37 |
+
r'\b(?:\d{1,2}(?:st|nd|rd|th)?\s+(January|February|March|April|May|June|July|August|September|October|November|December)(?:\s+\d{4})?|\d{1,2}:\d{2}\s?(AM|PM|am|pm)?)\b',
|
38 |
+
text, flags=re.IGNORECASE)
|
39 |
+
parsed = [str(dateparser.parse(t)) for t in time_expressions if dateparser.parse(t)]
|
40 |
+
return list(set(parsed)), list(set(time_expressions))
|
41 |
+
|
42 |
+
# Function to detect tense based on parsed dates
|
43 |
+
def extract_dates_with_accuracy(text):
|
44 |
+
settings = {
|
45 |
+
"PREFER_DATES_FROM": "future", # Bias future
|
46 |
+
"RELATIVE_BASE": datetime.now(), # Anchor to now
|
47 |
+
"RETURN_AS_TIMEZONE_AWARE": False, # Use naive datetime
|
48 |
+
}
|
49 |
+
|
50 |
+
results = search_dates(text, settings=settings)
|
51 |
+
time_mentions, parsed = [], []
|
52 |
+
|
53 |
+
if results:
|
54 |
+
for mention, dt in results:
|
55 |
+
if len(mention.strip()) <= 3:
|
56 |
+
continue # skip vague/short like "on", "to"
|
57 |
+
if dt:
|
58 |
+
# Convert to clean ISO format (e.g. "2025-07-14T11:00:00")
|
59 |
+
parsed.append(dt.isoformat())
|
60 |
+
time_mentions.append(mention.strip())
|
61 |
+
|
62 |
+
return list(set(parsed)), list(set(time_mentions))
|
63 |
+
|
64 |
+
def detect_tense(parsed_dates):
|
65 |
+
now = datetime.now()
|
66 |
+
tenses = set()
|
67 |
+
for d in parsed_dates:
|
68 |
+
dt = dateparser.parse(d)
|
69 |
+
if not dt:
|
70 |
+
continue
|
71 |
+
if dt < now:
|
72 |
+
tenses.add("past")
|
73 |
+
elif dt > now:
|
74 |
+
tenses.add("future")
|
75 |
+
else:
|
76 |
+
tenses.add("present")
|
77 |
+
return list(tenses) if tenses else ["unknown"]
|
78 |
+
|
79 |
+
def generate_summary(text):
|
80 |
+
input_ids = summarizer_tokenizer("summarize: " + text, return_tensors="pt").input_ids
|
81 |
+
output_ids = summarizer_model.generate(input_ids, max_length=60, num_beams=4, early_stopping=True)
|
82 |
+
return summarizer_tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
83 |
+
|
84 |
+
|
85 |
+
def extract_people(text):
|
86 |
+
ner_results = ner_pipeline(text)
|
87 |
+
return list(set(ent['word'] for ent in ner_results if ent['entity_group'] == 'PER'))
|
88 |
+
|
89 |
+
def estimate_mood(text):
|
90 |
+
text_lower = text.lower()
|
91 |
+
mood_map = {
|
92 |
+
"happy": ["happy", "excited", "joy", "grateful"],
|
93 |
+
"sad": ["sad", "upset", "crying", "lonely"],
|
94 |
+
"angry": ["angry", "annoyed", "frustrated", "irritated"],
|
95 |
+
"nervous": ["nervous", "anxious", "scared"],
|
96 |
+
"unwell": ["sick", "unwell", "not feeling well", "fever", "cold", "headache"],
|
97 |
+
"neutral": []
|
98 |
+
}
|
99 |
+
|
100 |
+
for mood, keywords in mood_map.items():
|
101 |
+
for kw in keywords:
|
102 |
+
if kw in text_lower:
|
103 |
+
return mood
|
104 |
+
return "neutral"
|
105 |
+
|
106 |
+
def generate_tags(label, text):
|
107 |
+
base_tags = [label]
|
108 |
+
keywords = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
|
109 |
+
force_tags = []
|
110 |
+
|
111 |
+
if any(w in text.lower() for w in ["sick", "unwell", "not feeling well", "fever"]):
|
112 |
+
force_tags += ["sick", "leave"]
|
113 |
+
if "work" in text.lower():
|
114 |
+
force_tags.append("work")
|
115 |
+
|
116 |
+
return list(set(base_tags + force_tags + keywords))
|
117 |
+
|
118 |
+
# Detect language using langdetect
|
119 |
+
def detect_language(text):
|
120 |
+
try:
|
121 |
+
return detect(text)
|
122 |
+
except:
|
123 |
+
return "unknown"
|
124 |
+
|
125 |
+
# Detect sentiment using TextBlob
|
126 |
+
def get_sentiment_score(text):
|
127 |
+
try:
|
128 |
+
blob = TextBlob(text)
|
129 |
+
return round(blob.sentiment.polarity, 3) # Range: -1 to 1
|
130 |
+
except:
|
131 |
+
return 0.0
|
132 |
+
|
133 |
+
# Infer intent based on label
|
134 |
+
def infer_intent(label, text):
|
135 |
+
label_to_intent = {
|
136 |
+
"out_of_office": "taking_leave",
|
137 |
+
"sick_notice": "taking_leave",
|
138 |
+
"reminder": "set_reminder",
|
139 |
+
"event": "log_event",
|
140 |
+
"meeting": "schedule_meeting",
|
141 |
+
"note": "log_note",
|
142 |
+
"journal": "log_memory",
|
143 |
+
"memory": "log_memory",
|
144 |
+
"status_update": "status_update",
|
145 |
+
"task": "create_task",
|
146 |
+
"celebration": "log_event"
|
147 |
+
}
|
148 |
+
return label_to_intent.get(label, "other")
|
149 |
+
|
150 |
+
# Extract entities using NER
|
151 |
+
def extract_entities(text):
|
152 |
+
ner_results = ner_pipeline(text)
|
153 |
+
entities = {"people": [], "places": [], "organizations": [], "dates": [], "misc": []}
|
154 |
+
|
155 |
+
for ent in ner_results:
|
156 |
+
word = ent["word"].replace("##", "")
|
157 |
+
if len(word) <= 2 or not word.isalpha():
|
158 |
+
continue # skip single-letter non-words
|
159 |
+
group = ent["entity_group"]
|
160 |
+
if group == "PER":
|
161 |
+
entities["people"].append(word)
|
162 |
+
elif group == "LOC":
|
163 |
+
entities["places"].append(word)
|
164 |
+
elif group == "ORG":
|
165 |
+
entities["organizations"].append(word)
|
166 |
+
elif group == "DATE":
|
167 |
+
entities["dates"].append(word)
|
168 |
+
else:
|
169 |
+
entities["misc"].append(word)
|
170 |
+
|
171 |
+
# ✅ Fallback: Add known days/dates if not already captured
|
172 |
+
day_keywords = re.findall(r'\b(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)\b', text, re.IGNORECASE)
|
173 |
+
for day in day_keywords:
|
174 |
+
if day not in entities["dates"]:
|
175 |
+
entities["dates"].append(day)
|
176 |
+
|
177 |
+
# ✅ Fallback: Add phrases like “product launch”, “project”, etc. to misc
|
178 |
+
lower_text = text.lower()
|
179 |
+
if "product launch" in lower_text:
|
180 |
+
entities["misc"].append("product launch")
|
181 |
+
if "birthday" in lower_text:
|
182 |
+
entities["misc"].append("birthday")
|
183 |
+
if "project" in lower_text:
|
184 |
+
entities["misc"].append("project")
|
185 |
+
|
186 |
+
# ✅ Deduplicate and return
|
187 |
+
|
188 |
+
return {k: list(set(v)) for k, v in entities.items()}
|
189 |
+
|
190 |
+
# Function to calculate urgency score based on parsed dates
|
191 |
+
def get_urgency_score(text, parsed_dates):
|
192 |
+
urgency_keywords = ["urgent", "asap", "immediate", "must", "need to", "important", "don’t forget", "right away"]
|
193 |
+
text_lower = text.lower()
|
194 |
+
|
195 |
+
score = 0.0
|
196 |
+
|
197 |
+
# 1. Keyword-based boost
|
198 |
+
if any(word in text_lower for word in urgency_keywords):
|
199 |
+
score = 0.7
|
200 |
+
|
201 |
+
# 2. Time-based boost
|
202 |
+
now = datetime.now()
|
203 |
+
for d in parsed_dates:
|
204 |
+
dt = dateparser.parse(d)
|
205 |
+
if dt:
|
206 |
+
hours = (dt - now).total_seconds() / 3600
|
207 |
+
if 0 <= hours <= 24:
|
208 |
+
score = max(score, 1.0)
|
209 |
+
elif 24 < hours <= 72:
|
210 |
+
score = max(score, 0.8)
|
211 |
+
elif 72 < hours <= 168:
|
212 |
+
score = max(score, 0.5)
|
213 |
+
|
214 |
+
return round(score, 2)
|
215 |
+
|
216 |
+
# Function to get meta information about the text
|
217 |
+
def get_meta_info(text: str):
|
218 |
+
now = datetime.now()
|
219 |
+
return {
|
220 |
+
"word_count": len(text.strip().split()),
|
221 |
+
"day_of_week": now.strftime('%A'), # e.g., "Thursday"
|
222 |
+
"hour_of_day": now.hour,
|
223 |
+
"month": now.strftime('%B'), # e.g., "July"
|
224 |
+
"year": now.year # 0 to 23
|
225 |
+
}
|
226 |
+
|
227 |
+
|
228 |
+
@app.post("/analyze")
|
229 |
+
async def analyze(input: TextInput):
|
230 |
+
start_time = time.time() # ⏱️ start
|
231 |
+
|
232 |
+
text = input.text
|
233 |
+
|
234 |
+
classification = classifier(text, labels)
|
235 |
+
best_label = classification['labels'][0]
|
236 |
+
|
237 |
+
if "reported" in text or "announced" in text or "collapsed" in text:
|
238 |
+
if best_label in ["task", "reminder", "event"]:
|
239 |
+
best_label = "news"
|
240 |
+
|
241 |
+
scores = dict(zip(classification['labels'], classification['scores']))
|
242 |
+
|
243 |
+
parsed_dates, time_mentions = extract_dates_with_accuracy(text)
|
244 |
+
tenses = detect_tense(parsed_dates)
|
245 |
+
summary = generate_summary(text).removeprefix("summary:").strip()
|
246 |
+
people = extract_people(text)
|
247 |
+
mood = estimate_mood(text)
|
248 |
+
tags = generate_tags(best_label, text)
|
249 |
+
language_detected = detect_language(text)
|
250 |
+
sentiment_score = get_sentiment_score(text)
|
251 |
+
entities = extract_entities(text)
|
252 |
+
intent = infer_intent(best_label, text)
|
253 |
+
urgency_score = get_urgency_score(text, parsed_dates)
|
254 |
+
|
255 |
+
# Define action triggers
|
256 |
+
ACTION_TRIGGERS = ["plan", "organize", "schedule", "remember", "book", "call", "follow up", "need to"]
|
257 |
+
action_required = False
|
258 |
+
if any(word in text.lower() for word in ACTION_TRIGGERS): action_required = True
|
259 |
+
|
260 |
+
action_required = urgency_score >= 0.6 or action_required
|
261 |
+
meta = get_meta_info(text)
|
262 |
+
|
263 |
+
end_time = time.time() # ⏱️ end
|
264 |
+
processing_time_ms = round((end_time - start_time) * 1000)
|
265 |
+
|
266 |
+
return {
|
267 |
+
"uuid": str(uuid.uuid4()), # Unique identifier for the request
|
268 |
+
"raw_text": text,
|
269 |
+
"word_count": meta["word_count"],
|
270 |
+
"day_of_week": meta["day_of_week"],
|
271 |
+
"hour_of_day": meta["hour_of_day"],
|
272 |
+
"month": meta["month"],
|
273 |
+
"year": meta["year"],
|
274 |
+
"type": best_label,
|
275 |
+
"intent": intent,
|
276 |
+
"confidence_scores": scores,
|
277 |
+
"urgency_score": urgency_score,
|
278 |
+
"time_mentions": time_mentions,
|
279 |
+
"parsed_dates": parsed_dates,
|
280 |
+
"tense": tenses,
|
281 |
+
"summary": summary,
|
282 |
+
"people": people,
|
283 |
+
"mood": mood,
|
284 |
+
"language": language_detected,
|
285 |
+
"sentiment_score": sentiment_score,
|
286 |
+
"tags": tags,
|
287 |
+
"action_required": action_required,
|
288 |
+
"entities": entities,
|
289 |
+
"processing_time_ms": processing_time_ms
|
290 |
+
}
|
291 |
+
|
old-main.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from pydantic import BaseModel
|
3 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
4 |
+
import dateparser
|
5 |
+
from datetime import datetime
|
6 |
+
import re
|
7 |
+
|
8 |
+
app = FastAPI()
|
9 |
+
|
10 |
+
# Load classification model
|
11 |
+
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
12 |
+
|
13 |
+
# Load summarization model
|
14 |
+
summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
|
15 |
+
summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
|
16 |
+
|
17 |
+
# Labels
|
18 |
+
labels = ["task", "event", "reminder", "meeting", "relationship", "note", "journal", "memory", "other"]
|
19 |
+
|
20 |
+
class TextInput(BaseModel):
|
21 |
+
text: str
|
22 |
+
|
23 |
+
def extract_dates(text):
|
24 |
+
time_expressions = re.findall(
|
25 |
+
r'\b(kal|aaj|parso|raat|subah|shaam|dopahar|[0-9]{1,2} baje|next week|tomorrow|today|yesterday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|[\d]{1,2}/[\d]{1,2}/[\d]{2,4})\b',
|
26 |
+
text, flags=re.IGNORECASE)
|
27 |
+
parsed = [str(dateparser.parse(t)) for t in time_expressions if dateparser.parse(t)]
|
28 |
+
return list(set(parsed)), list(set(time_expressions))
|
29 |
+
|
30 |
+
def detect_tense(parsed_dates):
|
31 |
+
now = datetime.now()
|
32 |
+
tenses = set()
|
33 |
+
for d in parsed_dates:
|
34 |
+
dt = dateparser.parse(d)
|
35 |
+
if not dt:
|
36 |
+
continue
|
37 |
+
if dt < now:
|
38 |
+
tenses.add("past")
|
39 |
+
elif dt > now:
|
40 |
+
tenses.add("future")
|
41 |
+
else:
|
42 |
+
tenses.add("present")
|
43 |
+
return list(tenses) if tenses else ["unknown"]
|
44 |
+
|
45 |
+
def generate_summary(text):
|
46 |
+
input_ids = summarizer_tokenizer("summarize: " + text, return_tensors="pt").input_ids
|
47 |
+
output_ids = summarizer_model.generate(input_ids, max_length=50, num_beams=4, early_stopping=True)
|
48 |
+
return summarizer_tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
49 |
+
|
50 |
+
@app.post("/analyze")
|
51 |
+
async def analyze(input: TextInput):
|
52 |
+
text = input.text
|
53 |
+
classification = classifier(text, labels)
|
54 |
+
best_label = classification['labels'][0]
|
55 |
+
scores = dict(zip(classification['labels'], classification['scores']))
|
56 |
+
parsed_dates, time_mentions = extract_dates(text)
|
57 |
+
tenses = detect_tense(parsed_dates)
|
58 |
+
summary = generate_summary(text)
|
59 |
+
|
60 |
+
return {
|
61 |
+
"type": best_label,
|
62 |
+
"confidence_scores": scores,
|
63 |
+
"time_mentions": time_mentions,
|
64 |
+
"parsed_dates": parsed_dates,
|
65 |
+
"tense": tenses,
|
66 |
+
"summary": summary
|
67 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi==0.110.0
|
2 |
+
uvicorn==0.29.0
|
3 |
+
transformers==4.40.0
|
4 |
+
torch>=2.0.0
|
5 |
+
dateparser==1.2.0
|
6 |
+
# spacy
|
7 |
+
langdetect
|
8 |
+
textblob
|
style.css
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
body {
|
2 |
-
padding: 2rem;
|
3 |
-
font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
|
4 |
-
}
|
5 |
-
|
6 |
-
h1 {
|
7 |
-
font-size: 16px;
|
8 |
-
margin-top: 0;
|
9 |
-
}
|
10 |
-
|
11 |
-
p {
|
12 |
-
color: rgb(107, 114, 128);
|
13 |
-
font-size: 15px;
|
14 |
-
margin-bottom: 10px;
|
15 |
-
margin-top: 5px;
|
16 |
-
}
|
17 |
-
|
18 |
-
.card {
|
19 |
-
max-width: 620px;
|
20 |
-
margin: 0 auto;
|
21 |
-
padding: 16px;
|
22 |
-
border: 1px solid lightgray;
|
23 |
-
border-radius: 16px;
|
24 |
-
}
|
25 |
-
|
26 |
-
.card p:last-child {
|
27 |
-
margin-bottom: 0;
|
28 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with-english-name-spacy.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from fastapi import FastAPI
|
3 |
+
from pydantic import BaseModel
|
4 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTokenClassification
|
5 |
+
import dateparser
|
6 |
+
from datetime import datetime
|
7 |
+
import spacy
|
8 |
+
|
9 |
+
app = FastAPI()
|
10 |
+
|
11 |
+
# Load classification and summarization models
|
12 |
+
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
13 |
+
summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
|
14 |
+
summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
|
15 |
+
|
16 |
+
# Load spaCy English model for name/entity detection
|
17 |
+
try:
|
18 |
+
nlp = spacy.load("en_core_web_sm")
|
19 |
+
except:
|
20 |
+
import subprocess
|
21 |
+
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
|
22 |
+
nlp = spacy.load("en_core_web_sm")
|
23 |
+
|
24 |
+
# Labels for classification
|
25 |
+
labels = [
|
26 |
+
"task", "event", "reminder", "meeting", "relationship", "note", "journal", "memory", "status_update",
|
27 |
+
"sick_notice", "out_of_office", "travel_plan", "celebration", "emotion", "other"
|
28 |
+
]
|
29 |
+
|
30 |
+
class TextInput(BaseModel):
|
31 |
+
text: str
|
32 |
+
|
33 |
+
def extract_dates(text):
|
34 |
+
time_expressions = re.findall(
|
35 |
+
r'\b(kal|aaj|parso|raat|subah|shaam|dopahar|[0-9]{1,2} baje|next week|tomorrow|today|yesterday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|[\d]{1,2}/[\d]{1,2}/[\d]{2,4})\b',
|
36 |
+
text, flags=re.IGNORECASE)
|
37 |
+
parsed = [str(dateparser.parse(t)) for t in time_expressions if dateparser.parse(t)]
|
38 |
+
return list(set(parsed)), list(set(time_expressions))
|
39 |
+
|
40 |
+
def detect_tense(parsed_dates):
|
41 |
+
now = datetime.now()
|
42 |
+
tenses = set()
|
43 |
+
for d in parsed_dates:
|
44 |
+
dt = dateparser.parse(d)
|
45 |
+
if not dt:
|
46 |
+
continue
|
47 |
+
if dt < now:
|
48 |
+
tenses.add("past")
|
49 |
+
elif dt > now:
|
50 |
+
tenses.add("future")
|
51 |
+
else:
|
52 |
+
tenses.add("present")
|
53 |
+
return list(tenses) if tenses else ["unknown"]
|
54 |
+
|
55 |
+
def generate_summary(text):
|
56 |
+
input_ids = summarizer_tokenizer("summarize: " + text, return_tensors="pt").input_ids
|
57 |
+
output_ids = summarizer_model.generate(input_ids, max_length=60, num_beams=4, early_stopping=True)
|
58 |
+
return summarizer_tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
59 |
+
|
60 |
+
|
61 |
+
def extract_people(text):
|
62 |
+
doc = nlp(text)
|
63 |
+
return list(set(ent.text for ent in doc.ents if ent.label_ in ["PERSON"]))
|
64 |
+
|
65 |
+
def estimate_mood(text):
|
66 |
+
text_lower = text.lower()
|
67 |
+
mood_map = {
|
68 |
+
"happy": ["happy", "excited", "joy", "grateful"],
|
69 |
+
"sad": ["sad", "upset", "crying", "lonely"],
|
70 |
+
"angry": ["angry", "annoyed", "frustrated", "irritated"],
|
71 |
+
"nervous": ["nervous", "anxious", "scared"],
|
72 |
+
"unwell": ["sick", "unwell", "not feeling well", "fever", "cold", "headache"],
|
73 |
+
"neutral": []
|
74 |
+
}
|
75 |
+
|
76 |
+
for mood, keywords in mood_map.items():
|
77 |
+
for kw in keywords:
|
78 |
+
if kw in text_lower:
|
79 |
+
return mood
|
80 |
+
return "neutral"
|
81 |
+
|
82 |
+
def generate_tags(label, text):
|
83 |
+
base_tags = [label]
|
84 |
+
keywords = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
|
85 |
+
force_tags = []
|
86 |
+
|
87 |
+
if any(w in text.lower() for w in ["sick", "unwell", "not feeling well", "fever"]):
|
88 |
+
force_tags += ["sick", "leave"]
|
89 |
+
if "work" in text.lower():
|
90 |
+
force_tags.append("work")
|
91 |
+
|
92 |
+
return list(set(base_tags + force_tags + keywords))
|
93 |
+
|
94 |
+
|
95 |
+
@app.post("/analyze")
|
96 |
+
async def analyze(input: TextInput):
|
97 |
+
text = input.text
|
98 |
+
|
99 |
+
classification = classifier(text, labels)
|
100 |
+
best_label = classification['labels'][0]
|
101 |
+
scores = dict(zip(classification['labels'], classification['scores']))
|
102 |
+
|
103 |
+
parsed_dates, time_mentions = extract_dates(text)
|
104 |
+
tenses = detect_tense(parsed_dates)
|
105 |
+
summary = generate_summary(text)
|
106 |
+
people = extract_people(text)
|
107 |
+
mood = estimate_mood(text)
|
108 |
+
tags = generate_tags(best_label, text)
|
109 |
+
|
110 |
+
return {
|
111 |
+
"type": best_label,
|
112 |
+
"confidence_scores": scores,
|
113 |
+
"time_mentions": time_mentions,
|
114 |
+
"parsed_dates": parsed_dates,
|
115 |
+
"tense": tenses,
|
116 |
+
"summary": summary,
|
117 |
+
"people": people,
|
118 |
+
"mood": mood,
|
119 |
+
"tags": tags
|
120 |
+
}
|
121 |
+
|