import os os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["NO_CUDA_EXT"] = "1" from reader_llm import get_reader_llm from retrieval import get_retriever from answer_rag import answer_with_rag2 import streamlit as st # Настройка страницы st.set_page_config(page_title="RAG", layout="wide") st.title("Туристический путеводитель") st.header("Города: Ярославль, Екатеринбург, Нижний Новгород, Владимир") @st.cache_resource def load_models(): READER_LLM = get_reader_llm(name="Vikhrmodels/Vikhr-Llama-3.2-1B-Instruct") # легкая модель для приложения на сайте hugging face embedding_model, KNOWLEDGE_VECTOR_DATABASE = get_retriever() return READER_LLM, embedding_model, KNOWLEDGE_VECTOR_DATABASE READER_LLM, _, KNOWLEDGE_VECTOR_DATABASE = load_models() if "messages" not in st.session_state: st.session_state.messages = [] for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) if prompt := st.chat_input("Задайте Ваш вопрос"): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) with st.chat_message("assistant"): with st.spinner("Ищу информацию..."): answer, sources = answer_with_rag2( question=prompt, llm=READER_LLM, knowledge_index=KNOWLEDGE_VECTOR_DATABASE ) st.markdown(answer) st.session_state.messages.append({"role": "assistant", "content": answer})