import re from fastapi import FastAPI from pydantic import BaseModel from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTokenClassification import dateparser from datetime import datetime import spacy app = FastAPI() # Load classification and summarization models classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small") summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small") # Load spaCy English model for name/entity detection try: nlp = spacy.load("en_core_web_sm") except: import subprocess subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) nlp = spacy.load("en_core_web_sm") # Labels for classification labels = [ "task", "event", "reminder", "meeting", "relationship", "note", "journal", "memory", "status_update", "sick_notice", "out_of_office", "travel_plan", "celebration", "emotion", "other" ] class TextInput(BaseModel): text: str def extract_dates(text): time_expressions = re.findall( r'\b(kal|aaj|parso|raat|subah|shaam|dopahar|[0-9]{1,2} baje|next week|tomorrow|today|yesterday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|[\d]{1,2}/[\d]{1,2}/[\d]{2,4})\b', text, flags=re.IGNORECASE) parsed = [str(dateparser.parse(t)) for t in time_expressions if dateparser.parse(t)] return list(set(parsed)), list(set(time_expressions)) def detect_tense(parsed_dates): now = datetime.now() tenses = set() for d in parsed_dates: dt = dateparser.parse(d) if not dt: continue if dt < now: tenses.add("past") elif dt > now: tenses.add("future") else: tenses.add("present") return list(tenses) if tenses else ["unknown"] def generate_summary(text): input_ids = summarizer_tokenizer("summarize: " + text, return_tensors="pt").input_ids output_ids = summarizer_model.generate(input_ids, max_length=60, num_beams=4, early_stopping=True) return summarizer_tokenizer.decode(output_ids[0], skip_special_tokens=True) def extract_people(text): doc = nlp(text) return list(set(ent.text for ent in doc.ents if ent.label_ in ["PERSON"])) def estimate_mood(text): text_lower = text.lower() mood_map = { "happy": ["happy", "excited", "joy", "grateful"], "sad": ["sad", "upset", "crying", "lonely"], "angry": ["angry", "annoyed", "frustrated", "irritated"], "nervous": ["nervous", "anxious", "scared"], "unwell": ["sick", "unwell", "not feeling well", "fever", "cold", "headache"], "neutral": [] } for mood, keywords in mood_map.items(): for kw in keywords: if kw in text_lower: return mood return "neutral" def generate_tags(label, text): base_tags = [label] keywords = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower()) force_tags = [] if any(w in text.lower() for w in ["sick", "unwell", "not feeling well", "fever"]): force_tags += ["sick", "leave"] if "work" in text.lower(): force_tags.append("work") return list(set(base_tags + force_tags + keywords)) @app.post("/analyze") async def analyze(input: TextInput): text = input.text classification = classifier(text, labels) best_label = classification['labels'][0] scores = dict(zip(classification['labels'], classification['scores'])) parsed_dates, time_mentions = extract_dates(text) tenses = detect_tense(parsed_dates) summary = generate_summary(text) people = extract_people(text) mood = estimate_mood(text) tags = generate_tags(best_label, text) return { "type": best_label, "confidence_scores": scores, "time_mentions": time_mentions, "parsed_dates": parsed_dates, "tense": tenses, "summary": summary, "people": people, "mood": mood, "tags": tags }