Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import copy
|
|
3 |
from llama_cpp import Llama
|
4 |
from huggingface_hub import hf_hub_download
|
5 |
import chromadb
|
|
|
6 |
from sentence_transformers import SentenceTransformer
|
7 |
|
8 |
# Initialize the Llama model
|
@@ -26,24 +27,26 @@ class VectorStore:
|
|
26 |
self.chroma_client = chromadb.Client()
|
27 |
self.collection = self.chroma_client.create_collection(name=collection_name)
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
33 |
|
34 |
# Method to populate the vector store with embeddings from a dataset
|
35 |
-
def populate_vectors(self, dataset):
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
|
48 |
def search_context(self, query, n_results=1):
|
49 |
query_embedding = self.embedding_model.encode([query]).tolist()
|
@@ -51,7 +54,9 @@ class VectorStore:
|
|
51 |
return results['documents']
|
52 |
|
53 |
# Example initialization (assuming you've already populated the vector store)
|
|
|
54 |
vector_store = VectorStore("embedding_vector")
|
|
|
55 |
|
56 |
def generate_text(
|
57 |
message,
|
|
|
3 |
from llama_cpp import Llama
|
4 |
from huggingface_hub import hf_hub_download
|
5 |
import chromadb
|
6 |
+
from datasets import load_dataset
|
7 |
from sentence_transformers import SentenceTransformer
|
8 |
|
9 |
# Initialize the Llama model
|
|
|
27 |
self.chroma_client = chromadb.Client()
|
28 |
self.collection = self.chroma_client.create_collection(name=collection_name)
|
29 |
|
30 |
+
|
31 |
+
|
32 |
+
def populate_vectors(self, texts, ids):
|
33 |
+
embeddings = self.embedding_model.encode(texts, batch_size=32).tolist()
|
34 |
+
for text, embedding, doc_id in zip(texts, embeddings, ids):
|
35 |
+
self.collection.add(embeddings=[embedding], documents=[text], ids=[doc_id])
|
36 |
|
37 |
# Method to populate the vector store with embeddings from a dataset
|
38 |
+
# def populate_vectors(self, dataset):
|
39 |
+
# # Select the text columns to concatenate
|
40 |
+
# # title = dataset['train']['title_cleaned'][:1000] # Limiting to 100 examples for the demo
|
41 |
+
# recipe = dataset['train']['recipe_new'][:1000]
|
42 |
+
# allergy = dataset['train']['allergy_type'][:1000]
|
43 |
+
# ingredients = dataset['train']['ingredients_alternatives'][:1000]
|
44 |
|
45 |
+
# # Concatenate the text from both columns
|
46 |
+
# texts = [f"{rep} {ingr} {alle}" for rep, ingr,alle in zip(recipe, ingredients,allergy)]
|
47 |
+
# for i, item in enumerate(texts):
|
48 |
+
# embeddings = self.embedding_model.encode(item).tolist()
|
49 |
+
# self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
|
50 |
|
51 |
def search_context(self, query, n_results=1):
|
52 |
query_embedding = self.embedding_model.encode([query]).tolist()
|
|
|
54 |
return results['documents']
|
55 |
|
56 |
# Example initialization (assuming you've already populated the vector store)
|
57 |
+
dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full')
|
58 |
vector_store = VectorStore("embedding_vector")
|
59 |
+
vector_store.populate_vectors(dataset)
|
60 |
|
61 |
def generate_text(
|
62 |
message,
|