Ankitajadhav commited on
Commit
43a8cd8
·
verified ·
1 Parent(s): b406e04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -116
app.py CHANGED
@@ -1,133 +1,96 @@
1
- import shutil
2
  import os
3
- __import__('pysqlite3')
4
- import sys
5
- sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
6
- from sentence_transformers import SentenceTransformer
7
- import chromadb
8
- from datasets import load_dataset
9
  import gradio as gr
10
- import torch
11
- from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
12
-
13
- # Set environment variables to address warnings
14
- os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
15
-
16
-
17
- torch.random.manual_seed(0)
18
- model_name = "microsoft/Phi-3-mini-4k-instruct-gguf"
19
 
20
- model = AutoModelForCausalLM.from_pretrained(
21
- model_name,
22
- low_cpu_mem_usage=True,
23
- torch_dtype="auto",
24
- trust_remote_code=True,
 
 
 
25
  )
26
- tokenizer = AutoTokenizer.from_pretrained(model_name)
27
-
28
- # Function to clear the cache
29
- def clear_cache(model_name):
30
- cache_dir = os.path.expanduser(f'~/.cache/torch/sentence_transformers/{model_name.replace("/", "_")}')
31
- if os.path.exists(cache_dir):
32
- shutil.rmtree(cache_dir)
33
- print(f"Cleared cache directory: {cache_dir}")
34
- else:
35
- print(f"No cache directory found for: {cache_dir}")
36
 
37
- # Embedding vector
38
  class VectorStore:
39
  def __init__(self, collection_name):
40
- try:
41
- self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
42
- except Exception as e:
43
- print(f"Error loading model: {e}")
44
- raise
45
  self.chroma_client = chromadb.Client()
46
  self.collection = self.chroma_client.create_collection(name=collection_name)
47
 
48
- def populate_vectors(self, dataset, batch_size=20):
49
- dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full', split='train')
50
- dataset = dataset.select(range(1500))
51
-
52
- texts = []
53
- i = 0
54
- for example in dataset:
55
- title = example['title_cleaned']
56
- recipe = example['recipe_new']
57
- meal_type = example['meal_type']
58
- allergy = example['allergy_type']
59
- ingredients_alternative = example['ingredients_alternatives']
60
- text = f"{title} {recipe} {meal_type} {allergy} {ingredients_alternative}"
61
- texts.append(text)
62
- if (i + 1) % batch_size == 0:
63
- self._process_batch(texts, i)
64
- texts = []
65
- i += 1
66
- if texts:
67
- self._process_batch(texts, i)
68
-
69
- def _process_batch(self, texts, batch_start_idx):
70
- embeddings = self.embedding_model.encode(texts, batch_size=len(texts)).tolist()
71
- for j, embedding in enumerate(embeddings):
72
- self.collection.add(embeddings=[embedding], documents=[texts[j]], ids=[str(batch_start_idx + j)])
73
 
74
  def search_context(self, query, n_results=1):
75
- query_embeddings = self.embedding_model.encode(query).tolist()
76
- return self.collection.query(query_embeddings=query_embeddings, n_results=n_results)
 
77
 
 
78
  vector_store = VectorStore("embedding_vector")
79
- vector_store.populate_vectors(dataset=None)
80
-
81
- def fine_tune_model():
82
- dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full', split='train')
83
- dataset = dataset.select(range(1500))
84
 
85
- def tokenize_function(examples):
86
- return tokenizer(
87
- [" ".join([title, recipe]) for title, recipe in zip(examples['title_cleaned'], examples['recipe_new'])],
88
- padding="max_length",
89
- truncation=True
90
- )
91
-
92
- tokenized_datasets = dataset.map(tokenize_function, batched=True, batch_size=8)
93
-
94
- training_args = TrainingArguments(
95
- output_dir="./results",
96
- evaluation_strategy="epoch",
97
- learning_rate=2e-5,
98
- per_device_train_batch_size=4,
99
- per_device_eval_batch_size=4,
100
- num_train_epochs=3,
101
- weight_decay=0.01,
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- trainer = Trainer(
105
- model=model,
106
- args=training_args,
107
- train_dataset=tokenized_datasets,
108
- )
109
-
110
- trainer.train()
111
-
112
- fine_tune_model()
113
-
114
- conversation_history = []
115
-
116
- def chatbot_response(user_input):
117
- global conversation_history
118
- results = vector_store.search_context(user_input, n_results=1)
119
- context = results['documents'][0] if results['documents'] else ""
120
- conversation_history.append(f"User: {user_input}\nContext: {context[:150]}\nBot:")
121
- inputs = tokenizer("\n".join(conversation_history), return_tensors="pt")
122
- outputs = model.generate(**inputs, max_length=150, do_sample=True, temperature=0.7)
123
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
124
- conversation_history.append(response)
125
- return response
126
-
127
- def chat(user_input):
128
- response = chatbot_response(user_input)
129
- return response
130
-
131
- css = ".gradio-container {background: url(https://upload.wikimedia.org/wikipedia/commons/f/f5/Spring_Kitchen_Line-Up_%28Unsplash%29.jpg)}"
132
- iface = gr.Interface(fn=chat, inputs="text", outputs="text", css=css)
133
- iface.launch()
 
 
1
  import os
 
 
 
 
 
 
2
  import gradio as gr
3
+ import copy
4
+ from llama_cpp import Llama
5
+ from huggingface_hub import hf_hub_download
6
+ import chromadb
7
+ from sentence_transformers import SentenceTransformer
 
 
 
 
8
 
9
+ # Initialize the Llama model
10
+ llm = Llama(
11
+ model_path=hf_hub_download(
12
+ repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
13
+ filename="Phi-3-mini-4k-instruct-q4.gguf",
14
+ ),
15
+ n_ctx=2048,
16
+ n_gpu_layers=50, # Adjust based on your VRAM
17
  )
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # Initialize ChromaDB Vector Store
20
  class VectorStore:
21
  def __init__(self, collection_name):
22
+ self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
 
 
 
 
23
  self.chroma_client = chromadb.Client()
24
  self.collection = self.chroma_client.create_collection(name=collection_name)
25
 
26
+ def populate_vectors(self, texts, ids):
27
+ embeddings = self.embedding_model.encode(texts, batch_size=32).tolist()
28
+ for text, embedding, doc_id in zip(texts, embeddings, ids):
29
+ self.collection.add(embeddings=[embedding], documents=[text], ids=[doc_id])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  def search_context(self, query, n_results=1):
32
+ query_embedding = self.embedding_model.encode([query]).tolist()
33
+ results = self.collection.query(query_embeddings=query_embedding, n_results=n_results)
34
+ return results['documents']
35
 
36
+ # Example initialization (assuming you've already populated the vector store)
37
  vector_store = VectorStore("embedding_vector")
 
 
 
 
 
38
 
39
+ # Populate with your data if not already done
40
+ # vector_store.populate_vectors(your_texts, your_ids)
41
+
42
+ def generate_text(
43
+ message,
44
+ history: list[tuple[str, str]],
45
+ system_message,
46
+ max_tokens,
47
+ temperature,
48
+ top_p,
49
+ ):
50
+ # Retrieve context from vector store
51
+ context_results = vector_store.search_context(message, n_results=1)
52
+ context = context_results[0] if context_results else ""
53
+
54
+ input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n {context}\n"
55
+ for interaction in history:
56
+ input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
57
+ input_prompt += f"{message} [/INST] "
58
+
59
+ temp = ""
60
+ output = llm(
61
+ input_prompt,
62
+ temperature=temperature,
63
+ top_p=top_p,
64
+ top_k=40,
65
+ repeat_penalty=1.1,
66
+ max_tokens=max_tokens,
67
+ stop=["", " \n", "ASSISTANT:", "USER:", "SYSTEM:"],
68
+ stream=True,
69
  )
70
+ for out in output:
71
+ temp += out["choices"][0]["text"]
72
+ yield temp
73
+
74
+ # Define the Gradio interface
75
+ demo = gr.ChatInterface(
76
+ generate_text,
77
+ title="llama-cpp-python on GPU with ChromaDB",
78
+ description="Running LLM with context retrieval from ChromaDB",
79
+ examples=[
80
+ ["I have leftover rice, what can I make out of it?"],
81
+ ["Can I make lunch for two people with this?"],
82
+ ],
83
+ cache_examples=False,
84
+ retry_btn=None,
85
+ undo_btn="Delete Previous",
86
+ clear_btn="Clear",
87
+ additional_inputs=[
88
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
89
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
90
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
91
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
92
+ ],
93
+ )
94
 
95
+ if __name__ == "__main__":
96
+ demo.launch()