Spaces:

Ankitajadhav
/

Whats_Cooking

Runtime error

App Files Files Community

Ankitajadhav commited on Jul 8, 2024

Commit

d592f4d

verified ·

1 Parent(s): 2fe908e

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -15

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import copy
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import chromadb
 from sentence_transformers import SentenceTransformer
 # Initialize the Llama model
@@ -26,24 +27,26 @@ class VectorStore:
         self.chroma_client = chromadb.Client()
         self.collection = self.chroma_client.create_collection(name=collection_name)
-    # def populate_vectors(self, texts, ids):
-    #     embeddings = self.embedding_model.encode(texts, batch_size=32).tolist()
-    #     for text, embedding, doc_id in zip(texts, embeddings, ids):
-    #         self.collection.add(embeddings=[embedding], documents=[text], ids=[doc_id])
         # Method to populate the vector store with embeddings from a dataset
-    def populate_vectors(self, dataset):
-        # Select the text columns to concatenate
-        # title = dataset['train']['title_cleaned'][:1000]  # Limiting to 100 examples for the demo
-        recipe = dataset['train']['recipe_new'][:1000]
-        allergy = dataset['train']['allergy_type'][:1000]
-        ingredients = dataset['train']['ingredients_alternatives'][:1000]
-        # Concatenate the text from both columns
-        texts = [f"{rep} {ingr} {alle}" for rep, ingr,alle in zip(recipe, ingredients,allergy)]
-        for i, item in enumerate(texts):
-            embeddings = self.embedding_model.encode(item).tolist()
-            self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
     def search_context(self, query, n_results=1):
         query_embedding = self.embedding_model.encode([query]).tolist()
@@ -51,7 +54,9 @@ class VectorStore:
         return results['documents']
 # Example initialization (assuming you've already populated the vector store)
 vector_store = VectorStore("embedding_vector")
 def generate_text(
     message,

 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import chromadb
+from datasets import load_dataset
 from sentence_transformers import SentenceTransformer
 # Initialize the Llama model
         self.chroma_client = chromadb.Client()
         self.collection = self.chroma_client.create_collection(name=collection_name)
+    def populate_vectors(self, texts, ids):
+        embeddings = self.embedding_model.encode(texts, batch_size=32).tolist()
+        for text, embedding, doc_id in zip(texts, embeddings, ids):
+            self.collection.add(embeddings=[embedding], documents=[text], ids=[doc_id])
         # Method to populate the vector store with embeddings from a dataset
+    # def populate_vectors(self, dataset):
+    #     # Select the text columns to concatenate
+    #     # title = dataset['train']['title_cleaned'][:1000]  # Limiting to 100 examples for the demo
+    #     recipe = dataset['train']['recipe_new'][:1000]
+    #     allergy = dataset['train']['allergy_type'][:1000]
+    #     ingredients = dataset['train']['ingredients_alternatives'][:1000]
+    #     # Concatenate the text from both columns
+    #     texts = [f"{rep} {ingr} {alle}" for rep, ingr,alle in zip(recipe, ingredients,allergy)]
+    #     for i, item in enumerate(texts):
+    #         embeddings = self.embedding_model.encode(item).tolist()
+    #         self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
     def search_context(self, query, n_results=1):
         query_embedding = self.embedding_model.encode([query]).tolist()
         return results['documents']
 # Example initialization (assuming you've already populated the vector store)
+dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full')
 vector_store = VectorStore("embedding_vector")
+vector_store.populate_vectors(dataset)
 def generate_text(
     message,