demospace / old-main.py
Mitesh Koshiya
Update space 1st time
5f52527
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import dateparser
from datetime import datetime
import re
app = FastAPI()
# Load classification model
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
# Load summarization model
summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
# Labels
labels = ["task", "event", "reminder", "meeting", "relationship", "note", "journal", "memory", "other"]
class TextInput(BaseModel):
text: str
def extract_dates(text):
time_expressions = re.findall(
r'\b(kal|aaj|parso|raat|subah|shaam|dopahar|[0-9]{1,2} baje|next week|tomorrow|today|yesterday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|[\d]{1,2}/[\d]{1,2}/[\d]{2,4})\b',
text, flags=re.IGNORECASE)
parsed = [str(dateparser.parse(t)) for t in time_expressions if dateparser.parse(t)]
return list(set(parsed)), list(set(time_expressions))
def detect_tense(parsed_dates):
now = datetime.now()
tenses = set()
for d in parsed_dates:
dt = dateparser.parse(d)
if not dt:
continue
if dt < now:
tenses.add("past")
elif dt > now:
tenses.add("future")
else:
tenses.add("present")
return list(tenses) if tenses else ["unknown"]
def generate_summary(text):
input_ids = summarizer_tokenizer("summarize: " + text, return_tensors="pt").input_ids
output_ids = summarizer_model.generate(input_ids, max_length=50, num_beams=4, early_stopping=True)
return summarizer_tokenizer.decode(output_ids[0], skip_special_tokens=True)
@app.post("/analyze")
async def analyze(input: TextInput):
text = input.text
classification = classifier(text, labels)
best_label = classification['labels'][0]
scores = dict(zip(classification['labels'], classification['scores']))
parsed_dates, time_mentions = extract_dates(text)
tenses = detect_tense(parsed_dates)
summary = generate_summary(text)
return {
"type": best_label,
"confidence_scores": scores,
"time_mentions": time_mentions,
"parsed_dates": parsed_dates,
"tense": tenses,
"summary": summary
}