demospace / with-english-name-spacy.py
Mitesh Koshiya
Update space 1st time
5f52527
import re
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTokenClassification
import dateparser
from datetime import datetime
import spacy
app = FastAPI()
# Load classification and summarization models
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
# Load spaCy English model for name/entity detection
try:
nlp = spacy.load("en_core_web_sm")
except:
import subprocess
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
# Labels for classification
labels = [
"task", "event", "reminder", "meeting", "relationship", "note", "journal", "memory", "status_update",
"sick_notice", "out_of_office", "travel_plan", "celebration", "emotion", "other"
]
class TextInput(BaseModel):
text: str
def extract_dates(text):
time_expressions = re.findall(
r'\b(kal|aaj|parso|raat|subah|shaam|dopahar|[0-9]{1,2} baje|next week|tomorrow|today|yesterday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|[\d]{1,2}/[\d]{1,2}/[\d]{2,4})\b',
text, flags=re.IGNORECASE)
parsed = [str(dateparser.parse(t)) for t in time_expressions if dateparser.parse(t)]
return list(set(parsed)), list(set(time_expressions))
def detect_tense(parsed_dates):
now = datetime.now()
tenses = set()
for d in parsed_dates:
dt = dateparser.parse(d)
if not dt:
continue
if dt < now:
tenses.add("past")
elif dt > now:
tenses.add("future")
else:
tenses.add("present")
return list(tenses) if tenses else ["unknown"]
def generate_summary(text):
input_ids = summarizer_tokenizer("summarize: " + text, return_tensors="pt").input_ids
output_ids = summarizer_model.generate(input_ids, max_length=60, num_beams=4, early_stopping=True)
return summarizer_tokenizer.decode(output_ids[0], skip_special_tokens=True)
def extract_people(text):
doc = nlp(text)
return list(set(ent.text for ent in doc.ents if ent.label_ in ["PERSON"]))
def estimate_mood(text):
text_lower = text.lower()
mood_map = {
"happy": ["happy", "excited", "joy", "grateful"],
"sad": ["sad", "upset", "crying", "lonely"],
"angry": ["angry", "annoyed", "frustrated", "irritated"],
"nervous": ["nervous", "anxious", "scared"],
"unwell": ["sick", "unwell", "not feeling well", "fever", "cold", "headache"],
"neutral": []
}
for mood, keywords in mood_map.items():
for kw in keywords:
if kw in text_lower:
return mood
return "neutral"
def generate_tags(label, text):
base_tags = [label]
keywords = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
force_tags = []
if any(w in text.lower() for w in ["sick", "unwell", "not feeling well", "fever"]):
force_tags += ["sick", "leave"]
if "work" in text.lower():
force_tags.append("work")
return list(set(base_tags + force_tags + keywords))
@app.post("/analyze")
async def analyze(input: TextInput):
text = input.text
classification = classifier(text, labels)
best_label = classification['labels'][0]
scores = dict(zip(classification['labels'], classification['scores']))
parsed_dates, time_mentions = extract_dates(text)
tenses = detect_tense(parsed_dates)
summary = generate_summary(text)
people = extract_people(text)
mood = estimate_mood(text)
tags = generate_tags(best_label, text)
return {
"type": best_label,
"confidence_scores": scores,
"time_mentions": time_mentions,
"parsed_dates": parsed_dates,
"tense": tenses,
"summary": summary,
"people": people,
"mood": mood,
"tags": tags
}