Ankitajadhav commited on
Commit
b1f94d7
·
verified ·
1 Parent(s): d592f4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -17
app.py CHANGED
@@ -27,26 +27,24 @@ class VectorStore:
27
  self.chroma_client = chromadb.Client()
28
  self.collection = self.chroma_client.create_collection(name=collection_name)
29
 
30
-
31
-
32
- def populate_vectors(self, texts, ids):
33
- embeddings = self.embedding_model.encode(texts, batch_size=32).tolist()
34
- for text, embedding, doc_id in zip(texts, embeddings, ids):
35
- self.collection.add(embeddings=[embedding], documents=[text], ids=[doc_id])
36
 
37
  # Method to populate the vector store with embeddings from a dataset
38
- # def populate_vectors(self, dataset):
39
- # # Select the text columns to concatenate
40
- # # title = dataset['train']['title_cleaned'][:1000] # Limiting to 100 examples for the demo
41
- # recipe = dataset['train']['recipe_new'][:1000]
42
- # allergy = dataset['train']['allergy_type'][:1000]
43
- # ingredients = dataset['train']['ingredients_alternatives'][:1000]
44
 
45
- # # Concatenate the text from both columns
46
- # texts = [f"{rep} {ingr} {alle}" for rep, ingr,alle in zip(recipe, ingredients,allergy)]
47
- # for i, item in enumerate(texts):
48
- # embeddings = self.embedding_model.encode(item).tolist()
49
- # self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
50
 
51
  def search_context(self, query, n_results=1):
52
  query_embedding = self.embedding_model.encode([query]).tolist()
 
27
  self.chroma_client = chromadb.Client()
28
  self.collection = self.chroma_client.create_collection(name=collection_name)
29
 
30
+ # def populate_vectors(self, texts):
31
+ # embeddings = self.embedding_model.encode(texts, batch_size=32).tolist()
32
+ # for text, embedding in zip(texts, embeddings, ids):
33
+ # self.collection.add(embeddings=[embedding], documents=[text], ids=[doc_id])
 
 
34
 
35
  # Method to populate the vector store with embeddings from a dataset
36
+ def populate_vectors(self, dataset):
37
+ # Select the text columns to concatenate
38
+ # title = dataset['train']['title_cleaned'][:1000] # Limiting to 100 examples for the demo
39
+ recipe = dataset['train']['recipe_new'][:1000]
40
+ allergy = dataset['train']['allergy_type'][:1000]
41
+ ingredients = dataset['train']['ingredients_alternatives'][:1000]
42
 
43
+ # Concatenate the text from both columns
44
+ texts = [f"{rep} {ingr} {alle}" for rep, ingr,alle in zip(recipe, ingredients,allergy)]
45
+ for i, item in enumerate(texts):
46
+ embeddings = self.embedding_model.encode(item).tolist()
47
+ self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
48
 
49
  def search_context(self, query, n_results=1):
50
  query_embedding = self.embedding_model.encode([query]).tolist()