Spaces:

yourpartner
/

demospace

Running

App Files Files Community

Mitesh Koshiya commited on 15 days ago

Commit

65c85da

1 Parent(s): f941393

Update API to save data in PostgresSQL

Browse files

Files changed (2) hide show

main.py +138 -40
requirements.txt +3 -1

main.py CHANGED Viewed

@@ -20,6 +20,15 @@ from fastapi.responses import ORJSONResponse
 from fastapi.requests import Request
 from fastapi import status
 import asyncio
 app = FastAPI(default_response_class=ORJSONResponse)
 app.add_middleware(
@@ -30,6 +39,50 @@ app.add_middleware(
     allow_headers=["*"],
 )
 # Load classification and summarization models
 classifier = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
 summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
@@ -366,74 +419,111 @@ def get_meta_info(text: str):
 # Function to extract amounts in various currencies from text
 def extract_amounts(text: str):
     currency_patterns = [
-        # Symbol or standard currency
-        (r"(₹|rs\.?|inr)\s?(\d{1,3}(?:,\d{3})*(?:\.\d+)?|\d+)", "INR"),
-        (r"(\$)\s?(\d+(?:,\d{3})*(?:\.\d+)?)", "USD"),
-        (r"(\d+(?:,\d{3})*(?:\.\d+)?)\s?(\$)", "USD"),
-        (r"(€|eur)\s?(\d{1,3}(?:,\d{3})*(?:\.\d+)?|\d+)", "EUR"),
-        (r"(\d+(?:,\d{3})*(?:\.\d+)?)\s?(€)", "EUR"),
-        # Word-based currency formats
         (r"(\d+(?:\.\d+)?)\s?(rupees?|rupaye|rupiye)", "INR"),
-        (r"(rupaye|rupiye)\s?(\d+(?:,\d{3})*(?:\.\d+)?)", "INR"),
         (r"(\d+(?:\.\d+)?)\s?(dollars?)", "USD"),
         (r"(\d+(?:\.\d+)?)\s?(euros?)", "EUR"),
         (r"(\d+(?:\.\d+)?)\s?(cents?)", "USD"),
-        # Indian number system
         (r"(\d+(?:\.\d+)?)\s?(lacs?|lakhs?)", "INR"),
         (r"(\d+(?:\.\d+)?)\s?(crores?|cr)", "INR"),
     ]
     results = []
     seen = set()
     text_lower = text.lower()
-    # First, match currency explicitly
     for pattern, currency_code in currency_patterns:
         for match in re.finditer(pattern, text_lower):
             groups = match.groups()
-            number = None
             try:
                 if any(word in groups for word in ['lakh', 'lacs', 'lakhs']):
-                    number = float(groups[0]) * 100000
                 elif any(word in groups for word in ['crore', 'crores', 'cr']):
-                    number = float(groups[0]) * 10000000
                 elif 'cents' in groups:
-                    number = float(groups[0]) / 100
-                elif any(word in groups for word in ['rupees', 'dollars', 'euros', 'rupaye', 'rupiye']):
-                    number = float(groups[0])
-                elif groups[1]:
-                    number = float(groups[1].replace(",", ""))
-            except (ValueError, IndexError):
                 continue
-            if number:
-                key = (number, currency_code)
-                if key not in seen:
-                    seen.add(key)
-                    results.append({
-                        "value": round(number, 2),
-                        "currency": currency_code
-                    })
-    # Second pass: If no currency found but possible standalone expense (like "paid 3000 for rent")
-    if not results:
-        match = re.search(r"\b(?:paid|spent|buy|purchase|cost|price)\b.*?(\d{2,8})", text_lower)
-        if match:
-            number = float(match.group(1).replace(",", ""))
-            key = (number, "INR")
             if key not in seen:
                 results.append({
                     "value": round(number, 2),
-                    "currency": "INR"
                 })
     return results
 @app.get("/health")
 def health_check():
     return {"message": "✅ Hello from yourpartner/demospace — API is running!"}
@@ -543,5 +633,13 @@ async def analyze(input: TextInput):
         "stores": detected_stores,
         "processing_time_ms": processing_time_ms
     }
     return ORJSONResponse(content=result)

 from fastapi.requests import Request
 from fastapi import status
 import asyncio
+import psycopg2
+from psycopg2.extras import Json
+import os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+DATABASE_URL = os.getenv("DATABASE_URL")
 app = FastAPI(default_response_class=ORJSONResponse)
 app.add_middleware(
     allow_headers=["*"],
 )
+CREATE_TABLE_QUERY = """
+CREATE TABLE IF NOT EXISTS user_entries (
+    uuid UUID PRIMARY KEY,
+    raw_text TEXT,
+    word_count INT,
+    day_of_week TEXT,
+    hour_of_day INT,
+    month TEXT,
+    year INT,
+    type TEXT,
+    intent TEXT,
+    confidence_scores JSONB,
+    urgency_score INT,
+    time_mentions TEXT[],
+    parsed_dates TEXT[],
+    tense TEXT[],
+    summary TEXT,
+    people TEXT[],
+    mood TEXT,
+    language JSONB,
+    sentiment_score FLOAT,
+    tags TEXT[],
+    action_required BOOLEAN,
+    entities JSONB,
+    amounts JSONB,
+    stores TEXT[],
+    processing_time_ms INT,
+    created_at TIMESTAMPTZ DEFAULT now()
+);
+"""
+@app.on_event("startup")
+def run_migrations():
+    try:
+        conn = psycopg2.connect(DATABASE_URL)
+        cur = conn.cursor()
+        cur.execute(CREATE_TABLE_QUERY)
+        conn.commit()
+        cur.close()
+        conn.close()
+        print("✅ Table checked/created at startup.")
+    except Exception as e:
+        print("❌ Migration failed:", e)
 # Load classification and summarization models
 classifier = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
 summarizer_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
 # Function to extract amounts in various currencies from text
 def extract_amounts(text: str):
     currency_patterns = [
+        # ₹5000, Rs. 1200, INR 300
+        (r"(?:₹|rs\.?|inr)\s?(\d[\d,]*(?:\.\d+)?)", "INR"),
+        # $250.75 or 250.75$
+        (r"(?:\$)\s?(\d[\d,]*(?:\.\d+)?)", "USD"),
+        (r"(\d[\d,]*(?:\.\d+)?)\s?\$", "USD"),
+        # €100 or 100€
+        (r"(?:€|eur)\s?(\d[\d,]*(?:\.\d+)?)", "EUR"),
+        (r"(\d[\d,]*(?:\.\d+)?)\s?€", "EUR"),
+        # Word-based currency
         (r"(\d+(?:\.\d+)?)\s?(rupees?|rupaye|rupiye)", "INR"),
         (r"(\d+(?:\.\d+)?)\s?(dollars?)", "USD"),
         (r"(\d+(?:\.\d+)?)\s?(euros?)", "EUR"),
         (r"(\d+(?:\.\d+)?)\s?(cents?)", "USD"),
+        # Indian system
         (r"(\d+(?:\.\d+)?)\s?(lacs?|lakhs?)", "INR"),
         (r"(\d+(?:\.\d+)?)\s?(crores?|cr)", "INR"),
     ]
     results = []
     seen = set()
     text_lower = text.lower()
     for pattern, currency_code in currency_patterns:
         for match in re.finditer(pattern, text_lower):
             groups = match.groups()
+            raw_number = next((g for g in groups if re.match(r"\d", g)), None)
+            if not raw_number:
+                continue
             try:
+                number = float(raw_number.replace(",", ""))
                 if any(word in groups for word in ['lakh', 'lacs', 'lakhs']):
+                    number *= 100_000
                 elif any(word in groups for word in ['crore', 'crores', 'cr']):
+                    number *= 10_000_000
                 elif 'cents' in groups:
+                    number /= 100
+            except Exception:
                 continue
+            key = (number, currency_code)
             if key not in seen:
+                seen.add(key)
                 results.append({
                     "value": round(number, 2),
+                    "currency": currency_code
                 })
+    # Fallback: detect simple numeric amounts like "paid 500 for rent"
+    if not results:
+        match = re.search(r"\b(?:paid|spent|buy|purchase|cost|price)\b.*?(\d{2,8})", text_lower)
+        if match:
+            try:
+                number = float(match.group(1).replace(",", ""))
+                key = (number, "INR")
+                if key not in seen:
+                    results.append({
+                        "value": round(number, 2),
+                        "currency": "INR"
+                    })
+            except:
+                pass
     return results
+def insert_text_entry(data):
+    try:
+        conn = psycopg2.connect(DATABASE_URL)
+        cur = conn.cursor()
+        insert_query = """
+            INSERT INTO user_entries (
+                uuid, raw_text, word_count, day_of_week, hour_of_day, month, year,
+                type, intent, confidence_scores, urgency_score,
+                time_mentions, parsed_dates, tense, summary,
+                people, mood, language, sentiment_score, tags,
+                action_required, entities, amounts, stores, processing_time_ms
+            ) VALUES (
+                %(uuid)s, %(raw_text)s, %(word_count)s, %(day_of_week)s, %(hour_of_day)s, %(month)s, %(year)s,
+                %(type)s, %(intent)s, %(confidence_scores)s, %(urgency_score)s,
+                %(time_mentions)s, %(parsed_dates)s, %(tense)s, %(summary)s,
+                %(people)s, %(mood)s, %(language)s, %(sentiment_score)s, %(tags)s,
+                %(action_required)s, %(entities)s, %(amounts)s, %(stores)s, %(processing_time_ms)s
+            )
+            ON CONFLICT (uuid) DO NOTHING;
+        """
+        cur.execute(insert_query, {
+            **data,
+            "confidence_scores": Json(data["confidence_scores"]),
+            "language": Json(data["language"]),
+            "entities": Json(data["entities"]),
+            "amounts": Json(data["amounts"])
+        })
+        conn.commit()
+        cur.close()
+        conn.close()
+        print("✅ Data inserted successfully")
+    except Exception as e:
+        print("❌ Failed to insert data:", e)
 @app.get("/health")
 def health_check():
     return {"message": "✅ Hello from yourpartner/demospace — API is running!"}
         "stores": detected_stores,
         "processing_time_ms": processing_time_ms
     }
+    # Insert into database
+    await asyncio.to_thread(insert_text_entry, result)
+    # Log the result
+    print("✅ Analysis complete:", result)
+    # Return the result as JSON response
     return ORJSONResponse(content=result)

requirements.txt CHANGED Viewed

@@ -9,4 +9,6 @@ textblob
 sentencepiece
 protobuf
 scikit-learn
-orjson

 sentencepiece
 protobuf
 scikit-learn
+orjson
+psycopg2-binary
+python-dotenv