Mitesh Koshiya commited on
Commit
5f52527
·
1 Parent(s): 43168d6

Update space 1st time

Browse files
Files changed (9) hide show
  1. .gitignore +28 -0
  2. .huggingface.yaml +3 -0
  3. good-main.py +118 -0
  4. index.html +0 -19
  5. main.py +291 -0
  6. old-main.py +67 -0
  7. requirements.txt +8 -0
  8. style.css +0 -28
  9. with-english-name-spacy.py +121 -0
.gitignore ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Virtual environment
7
+ venv
8
+
9
+ # Environment variables and secrets
10
+ .env
11
+
12
+ # VSCode settings (optional, if not shared across devs)
13
+ .vscode/
14
+
15
+ # OS files
16
+ .DS_Store
17
+ Thumbs.db
18
+
19
+ # Logs
20
+ *.log
21
+
22
+ # Python distribution / packaging
23
+ build/
24
+ dist/
25
+ *.egg-info/
26
+
27
+ # Jupyter Notebook checkpoints (if you use Jupyter)
28
+ .ipynb_checkpoints/
.huggingface.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # .huggingface.yaml
2
+ sdk: "fastapi"
3
+ python_file: "main.py"
good-main.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from fastapi import FastAPI
3
+ from pydantic import BaseModel
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTokenClassification
5
+ import dateparser
6
+ from datetime import datetime
7
+ import spacy
8
+
9
+ app = FastAPI()
10
+
11
+ # Load classification and summarization models
12
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
13
+ summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
14
+ summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
15
+
16
+ # Load Indic NER (or any general one)
17
+ tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
18
+ model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
19
+ ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
20
+
21
+ # Labels for classification
22
+ labels = [
23
+ "task", "event", "reminder", "meeting", "relationship", "note", "journal", "memory", "status_update",
24
+ "sick_notice", "out_of_office", "travel_plan", "celebration", "emotion", "other"
25
+ ]
26
+
27
+ class TextInput(BaseModel):
28
+ text: str
29
+
30
+ def extract_dates(text):
31
+ time_expressions = re.findall(
32
+ r'\b(kal|aaj|parso|raat|subah|shaam|dopahar|[0-9]{1,2} baje|next week|tomorrow|today|yesterday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|[\d]{1,2}/[\d]{1,2}/[\d]{2,4})\b',
33
+ text, flags=re.IGNORECASE)
34
+ parsed = [str(dateparser.parse(t)) for t in time_expressions if dateparser.parse(t)]
35
+ return list(set(parsed)), list(set(time_expressions))
36
+
37
+ def detect_tense(parsed_dates):
38
+ now = datetime.now()
39
+ tenses = set()
40
+ for d in parsed_dates:
41
+ dt = dateparser.parse(d)
42
+ if not dt:
43
+ continue
44
+ if dt < now:
45
+ tenses.add("past")
46
+ elif dt > now:
47
+ tenses.add("future")
48
+ else:
49
+ tenses.add("present")
50
+ return list(tenses) if tenses else ["unknown"]
51
+
52
+ def generate_summary(text):
53
+ input_ids = summarizer_tokenizer("summarize: " + text, return_tensors="pt").input_ids
54
+ output_ids = summarizer_model.generate(input_ids, max_length=60, num_beams=4, early_stopping=True)
55
+ return summarizer_tokenizer.decode(output_ids[0], skip_special_tokens=True)
56
+
57
+
58
+ def extract_people(text):
59
+ ner_results = ner_pipeline(text)
60
+ return list(set(ent['word'] for ent in ner_results if ent['entity_group'] == 'PER'))
61
+
62
+ def estimate_mood(text):
63
+ text_lower = text.lower()
64
+ mood_map = {
65
+ "happy": ["happy", "excited", "joy", "grateful"],
66
+ "sad": ["sad", "upset", "crying", "lonely"],
67
+ "angry": ["angry", "annoyed", "frustrated", "irritated"],
68
+ "nervous": ["nervous", "anxious", "scared"],
69
+ "unwell": ["sick", "unwell", "not feeling well", "fever", "cold", "headache"],
70
+ "neutral": []
71
+ }
72
+
73
+ for mood, keywords in mood_map.items():
74
+ for kw in keywords:
75
+ if kw in text_lower:
76
+ return mood
77
+ return "neutral"
78
+
79
+ def generate_tags(label, text):
80
+ base_tags = [label]
81
+ keywords = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
82
+ force_tags = []
83
+
84
+ if any(w in text.lower() for w in ["sick", "unwell", "not feeling well", "fever"]):
85
+ force_tags += ["sick", "leave"]
86
+ if "work" in text.lower():
87
+ force_tags.append("work")
88
+
89
+ return list(set(base_tags + force_tags + keywords))
90
+
91
+
92
+ @app.post("/analyze")
93
+ async def analyze(input: TextInput):
94
+ text = input.text
95
+
96
+ classification = classifier(text, labels)
97
+ best_label = classification['labels'][0]
98
+ scores = dict(zip(classification['labels'], classification['scores']))
99
+
100
+ parsed_dates, time_mentions = extract_dates(text)
101
+ tenses = detect_tense(parsed_dates)
102
+ summary = generate_summary(text)
103
+ people = extract_people(text)
104
+ mood = estimate_mood(text)
105
+ tags = generate_tags(best_label, text)
106
+
107
+ return {
108
+ "type": best_label,
109
+ "confidence_scores": scores,
110
+ "time_mentions": time_mentions,
111
+ "parsed_dates": parsed_dates,
112
+ "tense": tenses,
113
+ "summary": summary,
114
+ "people": people,
115
+ "mood": mood,
116
+ "tags": tags
117
+ }
118
+
index.html DELETED
@@ -1,19 +0,0 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from fastapi import FastAPI
3
+ from pydantic import BaseModel
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTokenClassification
5
+ import dateparser
6
+ from datetime import datetime
7
+ from langdetect import detect
8
+ from textblob import TextBlob
9
+ from dateparser.search import search_dates
10
+ import uuid
11
+ import time
12
+
13
+ app = FastAPI()
14
+
15
+ # Load classification and summarization models
16
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
17
+ summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
18
+ summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
19
+
20
+ # Load Indic NER (or any general one)
21
+ tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
22
+ model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
23
+ ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
24
+
25
+ # Labels for classification
26
+ labels = [
27
+ "task", "event", "reminder", "meeting", "relationship", "note", "journal", "memory", "status_update",
28
+ "sick_notice", "out_of_office", "travel_plan", "celebration", "emotion", "news", "information", "other"
29
+ ]
30
+
31
+ class TextInput(BaseModel):
32
+ text: str
33
+
34
+ # Function to extract dates and time mentions based on regex patterns
35
+ def extract_dates(text):
36
+ time_expressions = re.findall(
37
+ r'\b(?:\d{1,2}(?:st|nd|rd|th)?\s+(January|February|March|April|May|June|July|August|September|October|November|December)(?:\s+\d{4})?|\d{1,2}:\d{2}\s?(AM|PM|am|pm)?)\b',
38
+ text, flags=re.IGNORECASE)
39
+ parsed = [str(dateparser.parse(t)) for t in time_expressions if dateparser.parse(t)]
40
+ return list(set(parsed)), list(set(time_expressions))
41
+
42
+ # Function to detect tense based on parsed dates
43
+ def extract_dates_with_accuracy(text):
44
+ settings = {
45
+ "PREFER_DATES_FROM": "future", # Bias future
46
+ "RELATIVE_BASE": datetime.now(), # Anchor to now
47
+ "RETURN_AS_TIMEZONE_AWARE": False, # Use naive datetime
48
+ }
49
+
50
+ results = search_dates(text, settings=settings)
51
+ time_mentions, parsed = [], []
52
+
53
+ if results:
54
+ for mention, dt in results:
55
+ if len(mention.strip()) <= 3:
56
+ continue # skip vague/short like "on", "to"
57
+ if dt:
58
+ # Convert to clean ISO format (e.g. "2025-07-14T11:00:00")
59
+ parsed.append(dt.isoformat())
60
+ time_mentions.append(mention.strip())
61
+
62
+ return list(set(parsed)), list(set(time_mentions))
63
+
64
+ def detect_tense(parsed_dates):
65
+ now = datetime.now()
66
+ tenses = set()
67
+ for d in parsed_dates:
68
+ dt = dateparser.parse(d)
69
+ if not dt:
70
+ continue
71
+ if dt < now:
72
+ tenses.add("past")
73
+ elif dt > now:
74
+ tenses.add("future")
75
+ else:
76
+ tenses.add("present")
77
+ return list(tenses) if tenses else ["unknown"]
78
+
79
+ def generate_summary(text):
80
+ input_ids = summarizer_tokenizer("summarize: " + text, return_tensors="pt").input_ids
81
+ output_ids = summarizer_model.generate(input_ids, max_length=60, num_beams=4, early_stopping=True)
82
+ return summarizer_tokenizer.decode(output_ids[0], skip_special_tokens=True)
83
+
84
+
85
+ def extract_people(text):
86
+ ner_results = ner_pipeline(text)
87
+ return list(set(ent['word'] for ent in ner_results if ent['entity_group'] == 'PER'))
88
+
89
+ def estimate_mood(text):
90
+ text_lower = text.lower()
91
+ mood_map = {
92
+ "happy": ["happy", "excited", "joy", "grateful"],
93
+ "sad": ["sad", "upset", "crying", "lonely"],
94
+ "angry": ["angry", "annoyed", "frustrated", "irritated"],
95
+ "nervous": ["nervous", "anxious", "scared"],
96
+ "unwell": ["sick", "unwell", "not feeling well", "fever", "cold", "headache"],
97
+ "neutral": []
98
+ }
99
+
100
+ for mood, keywords in mood_map.items():
101
+ for kw in keywords:
102
+ if kw in text_lower:
103
+ return mood
104
+ return "neutral"
105
+
106
+ def generate_tags(label, text):
107
+ base_tags = [label]
108
+ keywords = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
109
+ force_tags = []
110
+
111
+ if any(w in text.lower() for w in ["sick", "unwell", "not feeling well", "fever"]):
112
+ force_tags += ["sick", "leave"]
113
+ if "work" in text.lower():
114
+ force_tags.append("work")
115
+
116
+ return list(set(base_tags + force_tags + keywords))
117
+
118
+ # Detect language using langdetect
119
+ def detect_language(text):
120
+ try:
121
+ return detect(text)
122
+ except:
123
+ return "unknown"
124
+
125
+ # Detect sentiment using TextBlob
126
+ def get_sentiment_score(text):
127
+ try:
128
+ blob = TextBlob(text)
129
+ return round(blob.sentiment.polarity, 3) # Range: -1 to 1
130
+ except:
131
+ return 0.0
132
+
133
+ # Infer intent based on label
134
+ def infer_intent(label, text):
135
+ label_to_intent = {
136
+ "out_of_office": "taking_leave",
137
+ "sick_notice": "taking_leave",
138
+ "reminder": "set_reminder",
139
+ "event": "log_event",
140
+ "meeting": "schedule_meeting",
141
+ "note": "log_note",
142
+ "journal": "log_memory",
143
+ "memory": "log_memory",
144
+ "status_update": "status_update",
145
+ "task": "create_task",
146
+ "celebration": "log_event"
147
+ }
148
+ return label_to_intent.get(label, "other")
149
+
150
+ # Extract entities using NER
151
+ def extract_entities(text):
152
+ ner_results = ner_pipeline(text)
153
+ entities = {"people": [], "places": [], "organizations": [], "dates": [], "misc": []}
154
+
155
+ for ent in ner_results:
156
+ word = ent["word"].replace("##", "")
157
+ if len(word) <= 2 or not word.isalpha():
158
+ continue # skip single-letter non-words
159
+ group = ent["entity_group"]
160
+ if group == "PER":
161
+ entities["people"].append(word)
162
+ elif group == "LOC":
163
+ entities["places"].append(word)
164
+ elif group == "ORG":
165
+ entities["organizations"].append(word)
166
+ elif group == "DATE":
167
+ entities["dates"].append(word)
168
+ else:
169
+ entities["misc"].append(word)
170
+
171
+ # ✅ Fallback: Add known days/dates if not already captured
172
+ day_keywords = re.findall(r'\b(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)\b', text, re.IGNORECASE)
173
+ for day in day_keywords:
174
+ if day not in entities["dates"]:
175
+ entities["dates"].append(day)
176
+
177
+ # ✅ Fallback: Add phrases like “product launch”, “project”, etc. to misc
178
+ lower_text = text.lower()
179
+ if "product launch" in lower_text:
180
+ entities["misc"].append("product launch")
181
+ if "birthday" in lower_text:
182
+ entities["misc"].append("birthday")
183
+ if "project" in lower_text:
184
+ entities["misc"].append("project")
185
+
186
+ # ✅ Deduplicate and return
187
+
188
+ return {k: list(set(v)) for k, v in entities.items()}
189
+
190
+ # Function to calculate urgency score based on parsed dates
191
+ def get_urgency_score(text, parsed_dates):
192
+ urgency_keywords = ["urgent", "asap", "immediate", "must", "need to", "important", "don’t forget", "right away"]
193
+ text_lower = text.lower()
194
+
195
+ score = 0.0
196
+
197
+ # 1. Keyword-based boost
198
+ if any(word in text_lower for word in urgency_keywords):
199
+ score = 0.7
200
+
201
+ # 2. Time-based boost
202
+ now = datetime.now()
203
+ for d in parsed_dates:
204
+ dt = dateparser.parse(d)
205
+ if dt:
206
+ hours = (dt - now).total_seconds() / 3600
207
+ if 0 <= hours <= 24:
208
+ score = max(score, 1.0)
209
+ elif 24 < hours <= 72:
210
+ score = max(score, 0.8)
211
+ elif 72 < hours <= 168:
212
+ score = max(score, 0.5)
213
+
214
+ return round(score, 2)
215
+
216
+ # Function to get meta information about the text
217
+ def get_meta_info(text: str):
218
+ now = datetime.now()
219
+ return {
220
+ "word_count": len(text.strip().split()),
221
+ "day_of_week": now.strftime('%A'), # e.g., "Thursday"
222
+ "hour_of_day": now.hour,
223
+ "month": now.strftime('%B'), # e.g., "July"
224
+ "year": now.year # 0 to 23
225
+ }
226
+
227
+
228
+ @app.post("/analyze")
229
+ async def analyze(input: TextInput):
230
+ start_time = time.time() # ⏱️ start
231
+
232
+ text = input.text
233
+
234
+ classification = classifier(text, labels)
235
+ best_label = classification['labels'][0]
236
+
237
+ if "reported" in text or "announced" in text or "collapsed" in text:
238
+ if best_label in ["task", "reminder", "event"]:
239
+ best_label = "news"
240
+
241
+ scores = dict(zip(classification['labels'], classification['scores']))
242
+
243
+ parsed_dates, time_mentions = extract_dates_with_accuracy(text)
244
+ tenses = detect_tense(parsed_dates)
245
+ summary = generate_summary(text).removeprefix("summary:").strip()
246
+ people = extract_people(text)
247
+ mood = estimate_mood(text)
248
+ tags = generate_tags(best_label, text)
249
+ language_detected = detect_language(text)
250
+ sentiment_score = get_sentiment_score(text)
251
+ entities = extract_entities(text)
252
+ intent = infer_intent(best_label, text)
253
+ urgency_score = get_urgency_score(text, parsed_dates)
254
+
255
+ # Define action triggers
256
+ ACTION_TRIGGERS = ["plan", "organize", "schedule", "remember", "book", "call", "follow up", "need to"]
257
+ action_required = False
258
+ if any(word in text.lower() for word in ACTION_TRIGGERS): action_required = True
259
+
260
+ action_required = urgency_score >= 0.6 or action_required
261
+ meta = get_meta_info(text)
262
+
263
+ end_time = time.time() # ⏱️ end
264
+ processing_time_ms = round((end_time - start_time) * 1000)
265
+
266
+ return {
267
+ "uuid": str(uuid.uuid4()), # Unique identifier for the request
268
+ "raw_text": text,
269
+ "word_count": meta["word_count"],
270
+ "day_of_week": meta["day_of_week"],
271
+ "hour_of_day": meta["hour_of_day"],
272
+ "month": meta["month"],
273
+ "year": meta["year"],
274
+ "type": best_label,
275
+ "intent": intent,
276
+ "confidence_scores": scores,
277
+ "urgency_score": urgency_score,
278
+ "time_mentions": time_mentions,
279
+ "parsed_dates": parsed_dates,
280
+ "tense": tenses,
281
+ "summary": summary,
282
+ "people": people,
283
+ "mood": mood,
284
+ "language": language_detected,
285
+ "sentiment_score": sentiment_score,
286
+ "tags": tags,
287
+ "action_required": action_required,
288
+ "entities": entities,
289
+ "processing_time_ms": processing_time_ms
290
+ }
291
+
old-main.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
4
+ import dateparser
5
+ from datetime import datetime
6
+ import re
7
+
8
+ app = FastAPI()
9
+
10
+ # Load classification model
11
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
12
+
13
+ # Load summarization model
14
+ summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
15
+ summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
16
+
17
+ # Labels
18
+ labels = ["task", "event", "reminder", "meeting", "relationship", "note", "journal", "memory", "other"]
19
+
20
+ class TextInput(BaseModel):
21
+ text: str
22
+
23
+ def extract_dates(text):
24
+ time_expressions = re.findall(
25
+ r'\b(kal|aaj|parso|raat|subah|shaam|dopahar|[0-9]{1,2} baje|next week|tomorrow|today|yesterday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|[\d]{1,2}/[\d]{1,2}/[\d]{2,4})\b',
26
+ text, flags=re.IGNORECASE)
27
+ parsed = [str(dateparser.parse(t)) for t in time_expressions if dateparser.parse(t)]
28
+ return list(set(parsed)), list(set(time_expressions))
29
+
30
+ def detect_tense(parsed_dates):
31
+ now = datetime.now()
32
+ tenses = set()
33
+ for d in parsed_dates:
34
+ dt = dateparser.parse(d)
35
+ if not dt:
36
+ continue
37
+ if dt < now:
38
+ tenses.add("past")
39
+ elif dt > now:
40
+ tenses.add("future")
41
+ else:
42
+ tenses.add("present")
43
+ return list(tenses) if tenses else ["unknown"]
44
+
45
+ def generate_summary(text):
46
+ input_ids = summarizer_tokenizer("summarize: " + text, return_tensors="pt").input_ids
47
+ output_ids = summarizer_model.generate(input_ids, max_length=50, num_beams=4, early_stopping=True)
48
+ return summarizer_tokenizer.decode(output_ids[0], skip_special_tokens=True)
49
+
50
+ @app.post("/analyze")
51
+ async def analyze(input: TextInput):
52
+ text = input.text
53
+ classification = classifier(text, labels)
54
+ best_label = classification['labels'][0]
55
+ scores = dict(zip(classification['labels'], classification['scores']))
56
+ parsed_dates, time_mentions = extract_dates(text)
57
+ tenses = detect_tense(parsed_dates)
58
+ summary = generate_summary(text)
59
+
60
+ return {
61
+ "type": best_label,
62
+ "confidence_scores": scores,
63
+ "time_mentions": time_mentions,
64
+ "parsed_dates": parsed_dates,
65
+ "tense": tenses,
66
+ "summary": summary
67
+ }
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.110.0
2
+ uvicorn==0.29.0
3
+ transformers==4.40.0
4
+ torch>=2.0.0
5
+ dateparser==1.2.0
6
+ # spacy
7
+ langdetect
8
+ textblob
style.css DELETED
@@ -1,28 +0,0 @@
1
- body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
- }
5
-
6
- h1 {
7
- font-size: 16px;
8
- margin-top: 0;
9
- }
10
-
11
- p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
16
- }
17
-
18
- .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
24
- }
25
-
26
- .card p:last-child {
27
- margin-bottom: 0;
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
with-english-name-spacy.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from fastapi import FastAPI
3
+ from pydantic import BaseModel
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForTokenClassification
5
+ import dateparser
6
+ from datetime import datetime
7
+ import spacy
8
+
9
+ app = FastAPI()
10
+
11
+ # Load classification and summarization models
12
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
13
+ summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
14
+ summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
15
+
16
+ # Load spaCy English model for name/entity detection
17
+ try:
18
+ nlp = spacy.load("en_core_web_sm")
19
+ except:
20
+ import subprocess
21
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
22
+ nlp = spacy.load("en_core_web_sm")
23
+
24
+ # Labels for classification
25
+ labels = [
26
+ "task", "event", "reminder", "meeting", "relationship", "note", "journal", "memory", "status_update",
27
+ "sick_notice", "out_of_office", "travel_plan", "celebration", "emotion", "other"
28
+ ]
29
+
30
+ class TextInput(BaseModel):
31
+ text: str
32
+
33
+ def extract_dates(text):
34
+ time_expressions = re.findall(
35
+ r'\b(kal|aaj|parso|raat|subah|shaam|dopahar|[0-9]{1,2} baje|next week|tomorrow|today|yesterday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|[\d]{1,2}/[\d]{1,2}/[\d]{2,4})\b',
36
+ text, flags=re.IGNORECASE)
37
+ parsed = [str(dateparser.parse(t)) for t in time_expressions if dateparser.parse(t)]
38
+ return list(set(parsed)), list(set(time_expressions))
39
+
40
+ def detect_tense(parsed_dates):
41
+ now = datetime.now()
42
+ tenses = set()
43
+ for d in parsed_dates:
44
+ dt = dateparser.parse(d)
45
+ if not dt:
46
+ continue
47
+ if dt < now:
48
+ tenses.add("past")
49
+ elif dt > now:
50
+ tenses.add("future")
51
+ else:
52
+ tenses.add("present")
53
+ return list(tenses) if tenses else ["unknown"]
54
+
55
+ def generate_summary(text):
56
+ input_ids = summarizer_tokenizer("summarize: " + text, return_tensors="pt").input_ids
57
+ output_ids = summarizer_model.generate(input_ids, max_length=60, num_beams=4, early_stopping=True)
58
+ return summarizer_tokenizer.decode(output_ids[0], skip_special_tokens=True)
59
+
60
+
61
+ def extract_people(text):
62
+ doc = nlp(text)
63
+ return list(set(ent.text for ent in doc.ents if ent.label_ in ["PERSON"]))
64
+
65
+ def estimate_mood(text):
66
+ text_lower = text.lower()
67
+ mood_map = {
68
+ "happy": ["happy", "excited", "joy", "grateful"],
69
+ "sad": ["sad", "upset", "crying", "lonely"],
70
+ "angry": ["angry", "annoyed", "frustrated", "irritated"],
71
+ "nervous": ["nervous", "anxious", "scared"],
72
+ "unwell": ["sick", "unwell", "not feeling well", "fever", "cold", "headache"],
73
+ "neutral": []
74
+ }
75
+
76
+ for mood, keywords in mood_map.items():
77
+ for kw in keywords:
78
+ if kw in text_lower:
79
+ return mood
80
+ return "neutral"
81
+
82
+ def generate_tags(label, text):
83
+ base_tags = [label]
84
+ keywords = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
85
+ force_tags = []
86
+
87
+ if any(w in text.lower() for w in ["sick", "unwell", "not feeling well", "fever"]):
88
+ force_tags += ["sick", "leave"]
89
+ if "work" in text.lower():
90
+ force_tags.append("work")
91
+
92
+ return list(set(base_tags + force_tags + keywords))
93
+
94
+
95
+ @app.post("/analyze")
96
+ async def analyze(input: TextInput):
97
+ text = input.text
98
+
99
+ classification = classifier(text, labels)
100
+ best_label = classification['labels'][0]
101
+ scores = dict(zip(classification['labels'], classification['scores']))
102
+
103
+ parsed_dates, time_mentions = extract_dates(text)
104
+ tenses = detect_tense(parsed_dates)
105
+ summary = generate_summary(text)
106
+ people = extract_people(text)
107
+ mood = estimate_mood(text)
108
+ tags = generate_tags(best_label, text)
109
+
110
+ return {
111
+ "type": best_label,
112
+ "confidence_scores": scores,
113
+ "time_mentions": time_mentions,
114
+ "parsed_dates": parsed_dates,
115
+ "tense": tenses,
116
+ "summary": summary,
117
+ "people": people,
118
+ "mood": mood,
119
+ "tags": tags
120
+ }
121
+