Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
from openai import OpenAI | |
api_key = os.getenv("NVIDIANIM_API_KEY") | |
client = OpenAI( | |
base_url="https://integrate.api.nvidia.com/v1", | |
api_key=api_key, | |
) | |
model_name = "meta/llama-3.1-405b-instruct" | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
def get_llama_response(question): | |
st.session_state.messages.append({"role": "user", "content": question}) | |
response = client.chat.completions.create( | |
model=model_name, | |
messages=st.session_state.messages, | |
temperature=0.2, | |
top_p=0.7, | |
max_tokens=1024, | |
stream=True | |
) | |
response_text = "" | |
for chunk in response: | |
if chunk.choices[0].delta.content is not None: | |
response_text += chunk.choices[0].delta.content | |
st.session_state.messages.append({"role": "assistant", "content": response_text}) | |
return response_text | |
st.title("Ask Llama 3.1 405B on Nvidia NIM") | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
if "user_input" not in st.session_state: | |
st.session_state.user_input = "" | |
user_input_displayed = st.session_state.user_input # Capture current displayed input | |
if st.button("Submit"): | |
if user_input_displayed: # Check if user previously entered input | |
st.session_state.messages.append({"role": "user", "content": user_input_displayed}) | |
with st.chat_message("user"): | |
st.markdown(user_input_displayed) # Display previously entered input | |
response = get_llama_response(user_input_displayed) | |
with st.chat_message("assistant"): | |
st.markdown(response) | |