Spaces:
Running
Running
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
import dateparser | |
from datetime import datetime | |
import re | |
app = FastAPI() | |
# Load classification model | |
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") | |
# Load summarization model | |
summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small") | |
summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small") | |
# Labels | |
labels = ["task", "event", "reminder", "meeting", "relationship", "note", "journal", "memory", "other"] | |
class TextInput(BaseModel): | |
text: str | |
def extract_dates(text): | |
time_expressions = re.findall( | |
r'\b(kal|aaj|parso|raat|subah|shaam|dopahar|[0-9]{1,2} baje|next week|tomorrow|today|yesterday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|[\d]{1,2}/[\d]{1,2}/[\d]{2,4})\b', | |
text, flags=re.IGNORECASE) | |
parsed = [str(dateparser.parse(t)) for t in time_expressions if dateparser.parse(t)] | |
return list(set(parsed)), list(set(time_expressions)) | |
def detect_tense(parsed_dates): | |
now = datetime.now() | |
tenses = set() | |
for d in parsed_dates: | |
dt = dateparser.parse(d) | |
if not dt: | |
continue | |
if dt < now: | |
tenses.add("past") | |
elif dt > now: | |
tenses.add("future") | |
else: | |
tenses.add("present") | |
return list(tenses) if tenses else ["unknown"] | |
def generate_summary(text): | |
input_ids = summarizer_tokenizer("summarize: " + text, return_tensors="pt").input_ids | |
output_ids = summarizer_model.generate(input_ids, max_length=50, num_beams=4, early_stopping=True) | |
return summarizer_tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
async def analyze(input: TextInput): | |
text = input.text | |
classification = classifier(text, labels) | |
best_label = classification['labels'][0] | |
scores = dict(zip(classification['labels'], classification['scores'])) | |
parsed_dates, time_mentions = extract_dates(text) | |
tenses = detect_tense(parsed_dates) | |
summary = generate_summary(text) | |
return { | |
"type": best_label, | |
"confidence_scores": scores, | |
"time_mentions": time_mentions, | |
"parsed_dates": parsed_dates, | |
"tense": tenses, | |
"summary": summary | |
} | |