Spaces:

kartiksrma
/

SearchCourses

Running

App Files Files Community

kartiksrma commited on Dec 29, 2024

Commit

7a37ee2

verified ·

1 Parent(s): e20f47b

Upload 2 files

Browse files

Files changed (2) hide show

app.py +116 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import streamlit as st
+import pymongo
+import requests
+import time
+from dotenv import load_dotenv
+import os
+load_dotenv()
+# file_path = '/content/free_courses.json'
+uri = os.getenv("URI")
+hf_token = os.getenv("HF_TOKEN")
+client = pymongo.MongoClient(uri)
+embedding_url = "https://api-inference.huggingface.co/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2"
+try:
+    client.admin.command("ping")  # Sends a ping to the MongoDB server
+    print("Successfully connected to MongoDB!")
+except pymongo.errors.ConnectionError as e:
+    print(f"Failed to connect to MongoDB: {e}")
+try:
+    db = client.av_courses
+    collection = db.courses
+    # Just trying to fetch the first document (you can adjust the query)
+    sample_doc = collection.find_one()
+except Exception as e:
+    print(f"Error accessing the database or collection: {e}")
+def generate_embedding(text: str) -> list[float]:
+  response = requests.post(
+    embedding_url,
+    headers={"Authorization": f"Bearer {hf_token}"},
+    json={"inputs": text})
+  if response.status_code != 200:
+    raise ValueError(f"Request failed with status code {response.status_code}: {response.text}")
+  return response.json()
+# for doc in collection.find({'title':{"$exists": True}}).limit(75):
+#   doc['course_embedding_hf'] = generate_embedding(doc['merged_summary'])
+#   collection.replace_one({'_id': doc['_id']}, doc)
+#   print(doc['_id'], end=" ")
+# db.avcourses.create_index(
+#     [("course_embedding_hf", pymongo.GEOSPHERE)],
+#     name="CourseSemanticSearch"
+# )
+def getSearchResults(query: str) -> list[object]:
+    query_embedding =  generate_embedding(query)
+    time.sleep(1)
+    results = collection.aggregate([
+    {"$vectorSearch": {
+        "queryVector": query_embedding,
+        "path": "course_embedding_hf",
+        "numCandidates": 100,
+        "limit": 10,
+        "index": "CourseSemanticSearch"
+    }},
+    {
+        "$addFields": {"score": {"$meta": "vectorSearchScore"}}
+    },
+    {
+        "$match": {"score": {"$gte": 0.55}}
+    },
+    {"$sort": {"score": -1}},
+    {
+        "$project": {"vector": 0}
+    },
+    ])
+    return results
+# Streamlit UI
+st.title("Semantic Search Interface")
+st.subheader("Find the best courses for your query")
+query = st.text_input("Enter your query here:")
+if st.button("Search"):
+    if query:
+        # Generate query embedding
+        with st.spinner("Fetching results..."):
+            results = getSearchResults(query)
+            try:
+                if results:
+                    for course in results:
+                        st.markdown(
+                            f"""
+                            ### {course['title']}
+                            - **Description:** {course['description']}
+                            - **Duration:** {course['duration']}
+                            - **Ratings:** {course['ratings']}
+                            - **Difficulty:** {course['difficulty']}
+                            """
+                        )
+                        st.markdown(
+            f"[![Go to Course](https://img.shields.io/badge/Go%20to%20Course-blue)]({course['course_url']})",
+            unsafe_allow_html=True,
+        )
+                        st.markdown("---")
+                else:
+                    st.markdown("No matches found!")
+            except Exception as e:
+                st.markdown(f"rate limit for searching has been completed try after few minutes\n",e)
+    else:
+        st.error("Please enter a query.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ pymongo
2	+ python-dotenv