# llm.py from langchain_openai import ChatOpenAI from langchain_anthropic import ChatAnthropic from langchain_google_genai import GoogleGenerativeAI from langchain_groq import ChatGroq from langchain_community.chat_models import ChatOllama from langchain_core.runnables import ConfigurableField from langchain.callbacks.base import BaseCallbackHandler class StreamCallback(BaseCallbackHandler): def on_llm_new_token(self, token: str, **kwargs): print(token, end="", flush=True) def get_llm(streaming=True): return ChatOpenAI( model="gpt-4", temperature=0, streaming=streaming, callbacks=[StreamCallback()], ).configurable_alternatives( ConfigurableField(id="llm"), default_key="gpt4", claude=ChatAnthropic( model="claude-3-opus-20240229", temperature=0, streaming=streaming, callbacks=[StreamCallback()], ), gpt3=ChatOpenAI( model="gpt-3.5-turbo", temperature=0, streaming=streaming, callbacks=[StreamCallback()], ), gemini=GoogleGenerativeAI( model="gemini-1.5-flash", temperature=0, streaming=streaming, callbacks=[StreamCallback()], ), llama3=ChatGroq( model_name="llama3-70b-8192", temperature=0, streaming=streaming, callbacks=[StreamCallback()], ), ollama=ChatOllama( model="EEVE-Korean-10.8B:long", streaming=streaming, callbacks=[StreamCallback()], ), )