import os import requests import gradio as gr # Your Hugging Face API token hf_token = os.getenv("hf_token") api_url = "https://api-inference.huggingface.co/models/antony-pk/llama-3-8b-Instruct-bnb-4bit-e10-emp-gold-jul16" def query_huggingface_api(prompt): headers = {"Authorization": f"Bearer {hf_token}"} payload = {"inputs": prompt} response = requests.post(api_url, headers=headers, json=payload) return response.json() def generate_response(prompt): response = query_huggingface_api(prompt) return response.get("generated_text", "No response received") # Create the Gradio interface interface = gr.Interface( fn=generate_response, inputs="text", outputs="text", title="LLama Model Interaction", description="Enter a prompt to receive a response from the private LLama model." ) # Launch the Gradio interface interface.launch()