kartiksrma commited on
Commit
7a37ee2
·
verified ·
1 Parent(s): e20f47b

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +116 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pymongo
3
+ import requests
4
+ import time
5
+ from dotenv import load_dotenv
6
+ import os
7
+
8
+ load_dotenv()
9
+
10
+
11
+ # file_path = '/content/free_courses.json'
12
+ uri = os.getenv("URI")
13
+ hf_token = os.getenv("HF_TOKEN")
14
+ client = pymongo.MongoClient(uri)
15
+ embedding_url = "https://api-inference.huggingface.co/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2"
16
+
17
+ try:
18
+ client.admin.command("ping") # Sends a ping to the MongoDB server
19
+ print("Successfully connected to MongoDB!")
20
+ except pymongo.errors.ConnectionError as e:
21
+ print(f"Failed to connect to MongoDB: {e}")
22
+
23
+ try:
24
+ db = client.av_courses
25
+ collection = db.courses
26
+ # Just trying to fetch the first document (you can adjust the query)
27
+ sample_doc = collection.find_one()
28
+ except Exception as e:
29
+ print(f"Error accessing the database or collection: {e}")
30
+
31
+ def generate_embedding(text: str) -> list[float]:
32
+
33
+ response = requests.post(
34
+ embedding_url,
35
+ headers={"Authorization": f"Bearer {hf_token}"},
36
+ json={"inputs": text})
37
+
38
+ if response.status_code != 200:
39
+ raise ValueError(f"Request failed with status code {response.status_code}: {response.text}")
40
+
41
+ return response.json()
42
+
43
+ # for doc in collection.find({'title':{"$exists": True}}).limit(75):
44
+ # doc['course_embedding_hf'] = generate_embedding(doc['merged_summary'])
45
+ # collection.replace_one({'_id': doc['_id']}, doc)
46
+ # print(doc['_id'], end=" ")
47
+
48
+
49
+ # db.avcourses.create_index(
50
+ # [("course_embedding_hf", pymongo.GEOSPHERE)],
51
+ # name="CourseSemanticSearch"
52
+ # )
53
+
54
+
55
+ def getSearchResults(query: str) -> list[object]:
56
+
57
+ query_embedding = generate_embedding(query)
58
+ time.sleep(1)
59
+ results = collection.aggregate([
60
+ {"$vectorSearch": {
61
+ "queryVector": query_embedding,
62
+ "path": "course_embedding_hf",
63
+ "numCandidates": 100,
64
+ "limit": 10,
65
+ "index": "CourseSemanticSearch"
66
+ }},
67
+ {
68
+ "$addFields": {"score": {"$meta": "vectorSearchScore"}}
69
+ },
70
+ {
71
+ "$match": {"score": {"$gte": 0.55}}
72
+ },
73
+ {"$sort": {"score": -1}},
74
+ {
75
+ "$project": {"vector": 0}
76
+ },
77
+ ])
78
+ return results
79
+
80
+
81
+ # Streamlit UI
82
+ st.title("Semantic Search Interface")
83
+ st.subheader("Find the best courses for your query")
84
+
85
+
86
+ query = st.text_input("Enter your query here:")
87
+ if st.button("Search"):
88
+ if query:
89
+ # Generate query embedding
90
+ with st.spinner("Fetching results..."):
91
+ results = getSearchResults(query)
92
+ try:
93
+ if results:
94
+ for course in results:
95
+ st.markdown(
96
+ f"""
97
+ ### {course['title']}
98
+ - **Description:** {course['description']}
99
+ - **Duration:** {course['duration']}
100
+ - **Ratings:** {course['ratings']}
101
+ - **Difficulty:** {course['difficulty']}
102
+ """
103
+ )
104
+ st.markdown(
105
+ f"[![Go to Course](https://img.shields.io/badge/Go%20to%20Course-blue)]({course['course_url']})",
106
+ unsafe_allow_html=True,
107
+ )
108
+
109
+ st.markdown("---")
110
+ else:
111
+ st.markdown("No matches found!")
112
+ except Exception as e:
113
+ st.markdown(f"rate limit for searching has been completed try after few minutes\n",e)
114
+ else:
115
+ st.error("Please enter a query.")
116
+
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pymongo
2
+ python-dotenv