NvidiaLlama31 / app.py
Ley_Fill7
Changed how it preserves history
5809885
raw
history blame
1.7 kB
import streamlit as st
import os
from openai import OpenAI
api_key = os.getenv("NVIDIANIM_API_KEY")
client = OpenAI(
base_url="https://integrate.api.nvidia.com/v1",
api_key=api_key,
)
model_name = "meta/llama-3.1-405b-instruct"
if "messages" not in st.session_state:
st.session_state.messages = []
def get_llama_response(question):
st.session_state.messages.append({"role": "user", "content": question})
response = client.chat.completions.create(
model=model_name,
messages=st.session_state.messages,
temperature=0.2,
top_p=0.7,
max_tokens=1024,
stream=True
)
response_text = ""
for chunk in response:
if chunk.choices[0].delta.content is not None:
response_text += chunk.choices[0].delta.content
st.session_state.messages.append({"role": "assistant", "content": response_text})
return response_text
st.title("Ask Llama 3.1 405B on Nvidia NIM")
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if "user_input" not in st.session_state:
st.session_state.user_input = ""
user_input_displayed = st.session_state.user_input # Capture current displayed input
if st.button("Submit"):
if user_input_displayed: # Check if user previously entered input
st.session_state.messages.append({"role": "user", "content": user_input_displayed})
with st.chat_message("user"):
st.markdown(user_input_displayed) # Display previously entered input
response = get_llama_response(user_input_displayed)
with st.chat_message("assistant"):
st.markdown(response)