Ankitajadhav commited on
Commit
d592f4d
·
verified ·
1 Parent(s): 2fe908e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -15
app.py CHANGED
@@ -3,6 +3,7 @@ import copy
3
  from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
5
  import chromadb
 
6
  from sentence_transformers import SentenceTransformer
7
 
8
  # Initialize the Llama model
@@ -26,24 +27,26 @@ class VectorStore:
26
  self.chroma_client = chromadb.Client()
27
  self.collection = self.chroma_client.create_collection(name=collection_name)
28
 
29
- # def populate_vectors(self, texts, ids):
30
- # embeddings = self.embedding_model.encode(texts, batch_size=32).tolist()
31
- # for text, embedding, doc_id in zip(texts, embeddings, ids):
32
- # self.collection.add(embeddings=[embedding], documents=[text], ids=[doc_id])
 
 
33
 
34
  # Method to populate the vector store with embeddings from a dataset
35
- def populate_vectors(self, dataset):
36
- # Select the text columns to concatenate
37
- # title = dataset['train']['title_cleaned'][:1000] # Limiting to 100 examples for the demo
38
- recipe = dataset['train']['recipe_new'][:1000]
39
- allergy = dataset['train']['allergy_type'][:1000]
40
- ingredients = dataset['train']['ingredients_alternatives'][:1000]
41
 
42
- # Concatenate the text from both columns
43
- texts = [f"{rep} {ingr} {alle}" for rep, ingr,alle in zip(recipe, ingredients,allergy)]
44
- for i, item in enumerate(texts):
45
- embeddings = self.embedding_model.encode(item).tolist()
46
- self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
47
 
48
  def search_context(self, query, n_results=1):
49
  query_embedding = self.embedding_model.encode([query]).tolist()
@@ -51,7 +54,9 @@ class VectorStore:
51
  return results['documents']
52
 
53
  # Example initialization (assuming you've already populated the vector store)
 
54
  vector_store = VectorStore("embedding_vector")
 
55
 
56
  def generate_text(
57
  message,
 
3
  from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
5
  import chromadb
6
+ from datasets import load_dataset
7
  from sentence_transformers import SentenceTransformer
8
 
9
  # Initialize the Llama model
 
27
  self.chroma_client = chromadb.Client()
28
  self.collection = self.chroma_client.create_collection(name=collection_name)
29
 
30
+
31
+
32
+ def populate_vectors(self, texts, ids):
33
+ embeddings = self.embedding_model.encode(texts, batch_size=32).tolist()
34
+ for text, embedding, doc_id in zip(texts, embeddings, ids):
35
+ self.collection.add(embeddings=[embedding], documents=[text], ids=[doc_id])
36
 
37
  # Method to populate the vector store with embeddings from a dataset
38
+ # def populate_vectors(self, dataset):
39
+ # # Select the text columns to concatenate
40
+ # # title = dataset['train']['title_cleaned'][:1000] # Limiting to 100 examples for the demo
41
+ # recipe = dataset['train']['recipe_new'][:1000]
42
+ # allergy = dataset['train']['allergy_type'][:1000]
43
+ # ingredients = dataset['train']['ingredients_alternatives'][:1000]
44
 
45
+ # # Concatenate the text from both columns
46
+ # texts = [f"{rep} {ingr} {alle}" for rep, ingr,alle in zip(recipe, ingredients,allergy)]
47
+ # for i, item in enumerate(texts):
48
+ # embeddings = self.embedding_model.encode(item).tolist()
49
+ # self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
50
 
51
  def search_context(self, query, n_results=1):
52
  query_embedding = self.embedding_model.encode([query]).tolist()
 
54
  return results['documents']
55
 
56
  # Example initialization (assuming you've already populated the vector store)
57
+ dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full')
58
  vector_store = VectorStore("embedding_vector")
59
+ vector_store.populate_vectors(dataset)
60
 
61
  def generate_text(
62
  message,