m96tkmok commited on
Commit
14ce008
·
verified ·
1 Parent(s): 0741cf3

Update app.py

Browse files

Update with llama_cpp

Files changed (1) hide show
  1. app.py +68 -30
app.py CHANGED
@@ -1,31 +1,69 @@
1
  import streamlit as st
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
- from langchain_core.prompts import ChatPromptTemplate
4
-
5
- # Load the model and tokenizer
6
- tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-3B-Instruct")
7
- model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-3B-Instruct")
8
-
9
- st.title("Unsloth Llama-3.2-3B-Instruct Text Generation")
10
- st.write("Enter a prompt and generate text using the Unsloth Llama 3.2 3B model.")
11
-
12
- prompt = """
13
- You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.
14
- If you don't know the answer, just say that you don't know.
15
- Answer in bullet points. Make sure your answer is relevant to the question and it is answered from the context only.
16
- Question: {question}
17
- Context: {context}
18
- Answer:
19
- """
20
-
21
- prompt = ChatPromptTemplate.from_template(prompt)
22
-
23
- with st.form("llm-form"):
24
- user_input = st.text_area("Enter your question or statement:")
25
- submit = st.form_submit_button("Submit")
26
-
27
- if submit:
28
- inputs = tokenizer(user_input, return_tensors="pt")
29
- outputs = model.generate(inputs["input_ids"], max_length=200)
30
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
31
- st.write(generated_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from llama_cpp import Llama
3
+
4
+ if 'llm' not in st.session_state:
5
+ st.session_state.llm = Llama.from_pretrained(
6
+ repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF",
7
+ filename="Llama-3.2-3B-Instruct-Q8_0.gguf",
8
+ verbose=True,
9
+ n_ctx=32768,
10
+ n_threads=2,
11
+ chat_format="chatml"
12
+ )
13
+
14
+ # Define the function to get responses from the model
15
+ def respond(message, history):
16
+ messages = []
17
+
18
+ for user_message, assistant_message in history:
19
+ if user_message:
20
+ messages.append({"role": "user", "content": user_message})
21
+ if assistant_message:
22
+ messages.append({"role": "assistant", "content": assistant_message})
23
+
24
+ messages.append({"role": "user", "content": message})
25
+
26
+ response = ""
27
+ # Stream the response from the model
28
+ response_stream = st.session_state.llm.create_chat_completion(
29
+ messages=messages,
30
+ stream=True,
31
+ max_tokens=512, # Use a default value for simplicity
32
+ temperature=0.7, # Use a default value for simplicity
33
+ top_p=0.95 # Use a default value for simplicity
34
+ )
35
+
36
+ # Collect the response chunks
37
+ for chunk in response_stream:
38
+ if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
39
+ response += chunk['choices'][0]["delta"]["content"]
40
+
41
+ return response # Return the full response
42
+
43
+ # Streamlit UI
44
+ st.title("Simple Chatbot")
45
+ st.write("### Interact with the chatbot!")
46
+
47
+ # User input field
48
+ user_message = st.text_area("Your Message:", "")
49
+
50
+ if "chat_history" not in st.session_state:
51
+ st.session_state['chat_history'] = []
52
+
53
+ # Button to send the message
54
+ if st.button("Send"):
55
+ if user_message: # Check if user has entered a message
56
+ # Get the response from the model
57
+ response = respond(user_message, st.session_state['chat_history'])
58
+
59
+ # Add user message and model response to history
60
+ st.session_state['chat_history'].append((user_message, response))
61
+
62
+ # Clear the input field after sending
63
+ user_message = "" # Reset user_message to clear input
64
+
65
+ st.write("## Chat History")
66
+ for user_msg, assistant_msg in reversed(st.session_state['chat_history']):
67
+ st.write(f"**🧑 User**: {user_msg}")
68
+ st.write(f"**🧠 Assistant**: {assistant_msg}")
69
+ st.write("---")