Spaces:

Ankitajadhav
/

Whats_Cooking

Runtime error

App Files Files Community

Ankitajadhav commited on Jul 7, 2024

Commit

ac63cbd

verified ·

1 Parent(s): a9d0935

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -27

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# import packages
 import shutil
 import os
 __import__('pysqlite3')
@@ -7,7 +6,6 @@ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
 from sentence_transformers import SentenceTransformer
 import chromadb
 from datasets import load_dataset
-# from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
 from transformers import GPT2Tokenizer, GPT2Model
@@ -19,7 +17,6 @@ tokenizer = GPT2Tokenizer.from_pretrained(model_name)
 # Load the model with from_tf=True
 model = GPT2Model.from_pretrained(model_name, from_tf=True)
 # Function to clear the cache
 def clear_cache(model_name):
     cache_dir = os.path.expanduser(f'~/.cache/torch/sentence_transformers/{model_name.replace("/", "_")}')
@@ -29,12 +26,10 @@ def clear_cache(model_name):
     else:
         print(f"No cache directory found for: {cache_dir}")
 # Embedding vector
 class VectorStore:
     def __init__(self, collection_name):
-       # Initialize the embedding model
-         # Initialize the embedding model with try-except block for better error handling
         try:
             self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
         except Exception as e:
@@ -46,11 +41,11 @@ class VectorStore:
     # Method to populate the vector store with embeddings from a dataset
     def populate_vectors(self, dataset, batch_size=100):
         # Use dataset streaming
-        dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full', split='train[:1500]')
-        # Process in batches
         texts = []
-        for i, example in enumerate(dataset):
             title = example['title_cleaned']
             recipe = example['recipe_new']
             meal_type = example['meal_type']
@@ -66,6 +61,8 @@ class VectorStore:
                 self._process_batch(texts, i)
                 texts = []
         # Process the remaining texts
         if texts:
             self._process_batch(texts, i)
@@ -79,24 +76,13 @@ class VectorStore:
         query_embeddings = self.embedding_model.encode(query).tolist()
         return self.collection.query(query_embeddings=query_embeddings, n_results=n_results)
-# create a vector embedding
 vector_store = VectorStore("embedding_vector")
 vector_store.populate_vectors(dataset=None)
-# Load the model and tokenizer
-# text generation model
-# model_name = "meta-llama/Meta-Llama-3-8B"
-# tokenizer = AutoTokenizer.from_pretrained(model_name)
-# model = AutoModelForCausalLM.from_pretrained(model_name)
-# load model orca-mini general purpose model
-# tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
-# model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
 # Define the chatbot response function
 def chatbot_response(user_input):
     global conversation_history
     results = vector_store.search_context(user_input, n_results=1)
@@ -108,13 +94,11 @@ def chatbot_response(user_input):
     conversation_history.append(response)
     return response
 # Gradio interface
 def chat(user_input):
     response = chatbot_response(user_input)
     return response
 css = ".gradio-container {background: url(https://upload.wikimedia.org/wikipedia/commons/f/f5/Spring_Kitchen_Line-Up_%28Unsplash%29.jpg)}"
-iface = gr.Interface(fn=chat, inputs="text", outputs="text",css=css)
 iface.launch()

 import shutil
 import os
 __import__('pysqlite3')
 from sentence_transformers import SentenceTransformer
 import chromadb
 from datasets import load_dataset
 import gradio as gr
 from transformers import GPT2Tokenizer, GPT2Model
 # Load the model with from_tf=True
 model = GPT2Model.from_pretrained(model_name, from_tf=True)
 # Function to clear the cache
 def clear_cache(model_name):
     cache_dir = os.path.expanduser(f'~/.cache/torch/sentence_transformers/{model_name.replace("/", "_")}')
     else:
         print(f"No cache directory found for: {cache_dir}")
 # Embedding vector
 class VectorStore:
     def __init__(self, collection_name):
+        # Initialize the embedding model
         try:
             self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
         except Exception as e:
     # Method to populate the vector store with embeddings from a dataset
     def populate_vectors(self, dataset, batch_size=100):
         # Use dataset streaming
+        dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full', split='train[:1500]', streaming=True)
         texts = []
+        i = 0  # Initialize index
+        for example in dataset:
             title = example['title_cleaned']
             recipe = example['recipe_new']
             meal_type = example['meal_type']
                 self._process_batch(texts, i)
                 texts = []
+            i += 1  # Increment index
         # Process the remaining texts
         if texts:
             self._process_batch(texts, i)
         query_embeddings = self.embedding_model.encode(query).tolist()
         return self.collection.query(query_embeddings=query_embeddings, n_results=n_results)
+# Create a vector embedding
 vector_store = VectorStore("embedding_vector")
 vector_store.populate_vectors(dataset=None)
 # Define the chatbot response function
+conversation_history = []
 def chatbot_response(user_input):
     global conversation_history
     results = vector_store.search_context(user_input, n_results=1)
     conversation_history.append(response)
     return response
 # Gradio interface
 def chat(user_input):
     response = chatbot_response(user_input)
     return response
 css = ".gradio-container {background: url(https://upload.wikimedia.org/wikipedia/commons/f/f5/Spring_Kitchen_Line-Up_%28Unsplash%29.jpg)}"
+iface = gr.Interface(fn=chat, inputs="text", outputs="text", css=css)
 iface.launch()