Mitesh Koshiya commited on
Commit
65c85da
Β·
1 Parent(s): f941393

Update API to save data in PostgresSQL

Browse files
Files changed (2) hide show
  1. main.py +138 -40
  2. requirements.txt +3 -1
main.py CHANGED
@@ -20,6 +20,15 @@ from fastapi.responses import ORJSONResponse
20
  from fastapi.requests import Request
21
  from fastapi import status
22
  import asyncio
 
 
 
 
 
 
 
 
 
23
 
24
  app = FastAPI(default_response_class=ORJSONResponse)
25
  app.add_middleware(
@@ -30,6 +39,50 @@ app.add_middleware(
30
  allow_headers=["*"],
31
  )
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  # Load classification and summarization models
34
  classifier = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
35
  summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
@@ -366,74 +419,111 @@ def get_meta_info(text: str):
366
  # Function to extract amounts in various currencies from text
367
  def extract_amounts(text: str):
368
  currency_patterns = [
369
- # Symbol or standard currency
370
- (r"(β‚Ή|rs\.?|inr)\s?(\d{1,3}(?:,\d{3})*(?:\.\d+)?|\d+)", "INR"),
371
- (r"(\$)\s?(\d+(?:,\d{3})*(?:\.\d+)?)", "USD"),
372
- (r"(\d+(?:,\d{3})*(?:\.\d+)?)\s?(\$)", "USD"),
373
- (r"(€|eur)\s?(\d{1,3}(?:,\d{3})*(?:\.\d+)?|\d+)", "EUR"),
374
- (r"(\d+(?:,\d{3})*(?:\.\d+)?)\s?(€)", "EUR"),
375
-
376
- # Word-based currency formats
 
377
  (r"(\d+(?:\.\d+)?)\s?(rupees?|rupaye|rupiye)", "INR"),
378
- (r"(rupaye|rupiye)\s?(\d+(?:,\d{3})*(?:\.\d+)?)", "INR"),
379
-
380
  (r"(\d+(?:\.\d+)?)\s?(dollars?)", "USD"),
381
  (r"(\d+(?:\.\d+)?)\s?(euros?)", "EUR"),
382
  (r"(\d+(?:\.\d+)?)\s?(cents?)", "USD"),
383
-
384
- # Indian number system
385
  (r"(\d+(?:\.\d+)?)\s?(lacs?|lakhs?)", "INR"),
386
  (r"(\d+(?:\.\d+)?)\s?(crores?|cr)", "INR"),
387
  ]
388
 
389
  results = []
390
  seen = set()
391
-
392
  text_lower = text.lower()
393
 
394
- # First, match currency explicitly
395
  for pattern, currency_code in currency_patterns:
396
  for match in re.finditer(pattern, text_lower):
397
  groups = match.groups()
398
- number = None
399
-
 
400
  try:
 
401
  if any(word in groups for word in ['lakh', 'lacs', 'lakhs']):
402
- number = float(groups[0]) * 100000
403
  elif any(word in groups for word in ['crore', 'crores', 'cr']):
404
- number = float(groups[0]) * 10000000
405
  elif 'cents' in groups:
406
- number = float(groups[0]) / 100
407
- elif any(word in groups for word in ['rupees', 'dollars', 'euros', 'rupaye', 'rupiye']):
408
- number = float(groups[0])
409
- elif groups[1]:
410
- number = float(groups[1].replace(",", ""))
411
- except (ValueError, IndexError):
412
  continue
413
 
414
- if number:
415
- key = (number, currency_code)
416
- if key not in seen:
417
- seen.add(key)
418
- results.append({
419
- "value": round(number, 2),
420
- "currency": currency_code
421
- })
422
-
423
- # Second pass: If no currency found but possible standalone expense (like "paid 3000 for rent")
424
- if not results:
425
- match = re.search(r"\b(?:paid|spent|buy|purchase|cost|price)\b.*?(\d{2,8})", text_lower)
426
- if match:
427
- number = float(match.group(1).replace(",", ""))
428
- key = (number, "INR")
429
  if key not in seen:
 
430
  results.append({
431
  "value": round(number, 2),
432
- "currency": "INR"
433
  })
434
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
  return results
436
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  @app.get("/health")
438
  def health_check():
439
  return {"message": "βœ… Hello from yourpartner/demospace β€” API is running!"}
@@ -543,5 +633,13 @@ async def analyze(input: TextInput):
543
  "stores": detected_stores,
544
  "processing_time_ms": processing_time_ms
545
  }
 
 
 
 
 
 
 
 
546
  return ORJSONResponse(content=result)
547
 
 
20
  from fastapi.requests import Request
21
  from fastapi import status
22
  import asyncio
23
+ import psycopg2
24
+ from psycopg2.extras import Json
25
+ import os
26
+ from dotenv import load_dotenv
27
+
28
+ # Load environment variables
29
+ load_dotenv()
30
+
31
+ DATABASE_URL = os.getenv("DATABASE_URL")
32
 
33
  app = FastAPI(default_response_class=ORJSONResponse)
34
  app.add_middleware(
 
39
  allow_headers=["*"],
40
  )
41
 
42
+ CREATE_TABLE_QUERY = """
43
+ CREATE TABLE IF NOT EXISTS user_entries (
44
+ uuid UUID PRIMARY KEY,
45
+ raw_text TEXT,
46
+ word_count INT,
47
+ day_of_week TEXT,
48
+ hour_of_day INT,
49
+ month TEXT,
50
+ year INT,
51
+ type TEXT,
52
+ intent TEXT,
53
+ confidence_scores JSONB,
54
+ urgency_score INT,
55
+ time_mentions TEXT[],
56
+ parsed_dates TEXT[],
57
+ tense TEXT[],
58
+ summary TEXT,
59
+ people TEXT[],
60
+ mood TEXT,
61
+ language JSONB,
62
+ sentiment_score FLOAT,
63
+ tags TEXT[],
64
+ action_required BOOLEAN,
65
+ entities JSONB,
66
+ amounts JSONB,
67
+ stores TEXT[],
68
+ processing_time_ms INT,
69
+ created_at TIMESTAMPTZ DEFAULT now()
70
+ );
71
+ """
72
+
73
+ @app.on_event("startup")
74
+ def run_migrations():
75
+ try:
76
+ conn = psycopg2.connect(DATABASE_URL)
77
+ cur = conn.cursor()
78
+ cur.execute(CREATE_TABLE_QUERY)
79
+ conn.commit()
80
+ cur.close()
81
+ conn.close()
82
+ print("βœ… Table checked/created at startup.")
83
+ except Exception as e:
84
+ print("❌ Migration failed:", e)
85
+
86
  # Load classification and summarization models
87
  classifier = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
88
  summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
 
419
  # Function to extract amounts in various currencies from text
420
  def extract_amounts(text: str):
421
  currency_patterns = [
422
+ # β‚Ή5000, Rs. 1200, INR 300
423
+ (r"(?:β‚Ή|rs\.?|inr)\s?(\d[\d,]*(?:\.\d+)?)", "INR"),
424
+ # $250.75 or 250.75$
425
+ (r"(?:\$)\s?(\d[\d,]*(?:\.\d+)?)", "USD"),
426
+ (r"(\d[\d,]*(?:\.\d+)?)\s?\$", "USD"),
427
+ # €100 or 100€
428
+ (r"(?:€|eur)\s?(\d[\d,]*(?:\.\d+)?)", "EUR"),
429
+ (r"(\d[\d,]*(?:\.\d+)?)\s?€", "EUR"),
430
+ # Word-based currency
431
  (r"(\d+(?:\.\d+)?)\s?(rupees?|rupaye|rupiye)", "INR"),
 
 
432
  (r"(\d+(?:\.\d+)?)\s?(dollars?)", "USD"),
433
  (r"(\d+(?:\.\d+)?)\s?(euros?)", "EUR"),
434
  (r"(\d+(?:\.\d+)?)\s?(cents?)", "USD"),
435
+ # Indian system
 
436
  (r"(\d+(?:\.\d+)?)\s?(lacs?|lakhs?)", "INR"),
437
  (r"(\d+(?:\.\d+)?)\s?(crores?|cr)", "INR"),
438
  ]
439
 
440
  results = []
441
  seen = set()
 
442
  text_lower = text.lower()
443
 
 
444
  for pattern, currency_code in currency_patterns:
445
  for match in re.finditer(pattern, text_lower):
446
  groups = match.groups()
447
+ raw_number = next((g for g in groups if re.match(r"\d", g)), None)
448
+ if not raw_number:
449
+ continue
450
  try:
451
+ number = float(raw_number.replace(",", ""))
452
  if any(word in groups for word in ['lakh', 'lacs', 'lakhs']):
453
+ number *= 100_000
454
  elif any(word in groups for word in ['crore', 'crores', 'cr']):
455
+ number *= 10_000_000
456
  elif 'cents' in groups:
457
+ number /= 100
458
+ except Exception:
 
 
 
 
459
  continue
460
 
461
+ key = (number, currency_code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  if key not in seen:
463
+ seen.add(key)
464
  results.append({
465
  "value": round(number, 2),
466
+ "currency": currency_code
467
  })
468
 
469
+ # Fallback: detect simple numeric amounts like "paid 500 for rent"
470
+ if not results:
471
+ match = re.search(r"\b(?:paid|spent|buy|purchase|cost|price)\b.*?(\d{2,8})", text_lower)
472
+ if match:
473
+ try:
474
+ number = float(match.group(1).replace(",", ""))
475
+ key = (number, "INR")
476
+ if key not in seen:
477
+ results.append({
478
+ "value": round(number, 2),
479
+ "currency": "INR"
480
+ })
481
+ except:
482
+ pass
483
+
484
  return results
485
 
486
+
487
+ def insert_text_entry(data):
488
+ try:
489
+ conn = psycopg2.connect(DATABASE_URL)
490
+ cur = conn.cursor()
491
+
492
+ insert_query = """
493
+ INSERT INTO user_entries (
494
+ uuid, raw_text, word_count, day_of_week, hour_of_day, month, year,
495
+ type, intent, confidence_scores, urgency_score,
496
+ time_mentions, parsed_dates, tense, summary,
497
+ people, mood, language, sentiment_score, tags,
498
+ action_required, entities, amounts, stores, processing_time_ms
499
+ ) VALUES (
500
+ %(uuid)s, %(raw_text)s, %(word_count)s, %(day_of_week)s, %(hour_of_day)s, %(month)s, %(year)s,
501
+ %(type)s, %(intent)s, %(confidence_scores)s, %(urgency_score)s,
502
+ %(time_mentions)s, %(parsed_dates)s, %(tense)s, %(summary)s,
503
+ %(people)s, %(mood)s, %(language)s, %(sentiment_score)s, %(tags)s,
504
+ %(action_required)s, %(entities)s, %(amounts)s, %(stores)s, %(processing_time_ms)s
505
+ )
506
+ ON CONFLICT (uuid) DO NOTHING;
507
+ """
508
+
509
+ cur.execute(insert_query, {
510
+ **data,
511
+ "confidence_scores": Json(data["confidence_scores"]),
512
+ "language": Json(data["language"]),
513
+ "entities": Json(data["entities"]),
514
+ "amounts": Json(data["amounts"])
515
+ })
516
+
517
+ conn.commit()
518
+ cur.close()
519
+ conn.close()
520
+ print("βœ… Data inserted successfully")
521
+
522
+ except Exception as e:
523
+ print("❌ Failed to insert data:", e)
524
+
525
+
526
+
527
  @app.get("/health")
528
  def health_check():
529
  return {"message": "βœ… Hello from yourpartner/demospace β€” API is running!"}
 
633
  "stores": detected_stores,
634
  "processing_time_ms": processing_time_ms
635
  }
636
+
637
+ # Insert into database
638
+ await asyncio.to_thread(insert_text_entry, result)
639
+
640
+ # Log the result
641
+ print("βœ… Analysis complete:", result)
642
+
643
+ # Return the result as JSON response
644
  return ORJSONResponse(content=result)
645
 
requirements.txt CHANGED
@@ -9,4 +9,6 @@ textblob
9
  sentencepiece
10
  protobuf
11
  scikit-learn
12
- orjson
 
 
 
9
  sentencepiece
10
  protobuf
11
  scikit-learn
12
+ orjson
13
+ psycopg2-binary
14
+ python-dotenv