LISA-demo / llms.py
Kadi-IAM's picture
Clean code and add readme
1a20a59
"""
Load LLMs from huggingface, Groq, etc.
"""
from transformers import (
# AutoModelForCausalLM,
AutoTokenizer,
pipeline,
)
from langchain.llms import HuggingFacePipeline
from langchain_groq import ChatGroq
from langchain.llms import HuggingFaceTextGenInference
# from langchain.chat_models import ChatOpenAI # oai model
def get_llm_hf_online(inference_api_url=""):
"""Get LLM using huggingface inference."""
if not inference_api_url: # default api url
inference_api_url = (
"https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
)
llm = HuggingFaceTextGenInference(
verbose=True, # Provides detailed logs of operation
max_new_tokens=1024, # Maximum number of token that can be generated.
top_p=0.95, # Threshold for controlling randomness in text generation process.
temperature=0.1,
inference_server_url=inference_api_url,
timeout=10, # Timeout for connection with the url
)
return llm
def get_llm_hf_local(model_path):
"""Get local LLM from huggingface."""
model = LlamaForCausalLM.from_pretrained(model_path, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_path)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=2048, # better setting?
model_kwargs={"temperature": 0.1}, # better setting?
)
llm = HuggingFacePipeline(pipeline=pipe)
return llm
def get_groq_chat(model_name="llama-3.1-70b-versatile"):
"""Get LLM from Groq."""
llm = ChatGroq(temperature=0, model_name=model_name)
return llm