import torch from transformers import GPT2Tokenizer, GPT2LMHeadModel import gradio as gr # Check if a GPU is available and use it, otherwise use CPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load the pre-trained model and tokenizer from the saved directory model_path = "Blexus/Quble_Test_Model_v1_Pretrain" tokenizer = GPT2Tokenizer.from_pretrained(model_path) model = GPT2LMHeadModel.from_pretrained(model_path).to(device) # Set model to evaluation mode model.eval() # Function to generate text based on input prompt def generate_text(prompt): # Tokenize and encode the input prompt input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device) # Generate continuation with torch.no_grad(): generated_ids = model.generate( input_ids, max_length=50, # Maximum length of generated text num_return_sequences=1, # Generate 1 sequence pad_token_id=tokenizer.eos_token_id, # Use EOS token for padding do_sample=True, # Enable sampling top_k=50, # Top-k sampling top_p=0.95 # Nucleus sampling ) # Decode the generated text generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) return generated_text # Create a Gradio interface interface = gr.Interface( fn=generate_text, # Function to call when interacting with the UI inputs="text", # Input type: Single-line text outputs="text", # Output type: Text (the generated output) title="Quble Text Generation", # Title of the UI description="Enter a prompt to generate text using Quble." # Simple description ) # Launch the Gradio app interface.launch()