from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI from dotenv import load_dotenv import os load_dotenv() llm = HuggingFaceInferenceAPI( model_name="Qwen/Qwen2.5-Coder-32B-Instruct", temperature=0.7, max_tokens=20, token=os.getenv("HUGGINGFACE_TOKEN") ) if __name__ == "__main__": # Example usage response = llm.complete("Hello, how are you?")