Spaces:
Sleeping
Sleeping
from llama_cpp import Llama | |
import streamlit as st | |
model_path = "vicuna-13b-v1.5.ggmlv3.q2_K.bin" | |
llama = Llama(model_path) | |
def generate_response(messages: list) -> str: | |
response = llama.create_chat_completion(messages, max_tokens=-1, stream=False) | |
print(f"response: {response}") | |
return response['choices'][0]['message']['content'] | |
def main(): | |
st.title("Chat with Vicuna!") | |
# Session state for retaining messages | |
if 'messages' not in st.session_state: | |
st.session_state.messages = [] | |
# Display chat messages from history on app rerun | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(f"{message['content']}") | |
# Input for the user message | |
user_message = st.chat_input("Your Message") | |
# React to user input | |
if user_message: | |
# Display user message in chat message container | |
with st.chat_message("user"): | |
st.markdown(f"{user_message}") | |
# Add user message to chat history | |
st.session_state.messages.append({"role": "user", "content": user_message}) | |
with st.chat_message("assistant"): | |
message_placeholder = st.empty() | |
full_response = "" | |
for char in generate_response([{"role": m["role"], "content": m["content"]} for m in st.session_state.messages]): | |
full_response += char | |
message_placeholder.markdown(full_response + "β") | |
message_placeholder.markdown(full_response) | |
st.session_state.messages.append({"role": "assistant", "content": full_response}) | |
if __name__ == "__main__": | |
main() | |