Spaces:
Sleeping
Sleeping
File size: 4,322 Bytes
007b4ac c3d1590 f1c0198 c3d1590 f1c0198 c3d1590 f1c0198 c3d1590 7e7020a c3d1590 7e7020a 5fd2c08 7e7020a 17c4ee5 8f28876 69135cb 8f28876 69135cb e579029 69135cb e579029 69135cb ceb622b 69135cb c3d1590 68a4c6f c3d1590 8f28876 c3d1590 68a4c6f d836ac3 c3d1590 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import random
from datasets import load_dataset
from transformers import GPT2LMHeadModel, GPT2Tokenizer
# Load dataset
dataset = load_dataset("rahul7star/hindi-poetry")["train"]
# Load your model and tokenizer
model_name = "rahul7star/hindi_poetry_language_model"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
# 2️⃣ Function to Generate Hindi Poetry
def generate_poetry_base(prompt, max_length=100, temperature=0.7, top_k=50, top_p=0.95):
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
with torch.no_grad():
output = model.generate(
input_ids,
max_length=max_length,
temperature=temperature,
top_k=top_k,
top_p=top_p,
pad_token_id=tokenizer.pad_token_id
)
return tokenizer.decode(output[0], skip_special_tokens=True)
def generate_poetry(prompt, max_length=100, temperature=0.7, top_k=50, top_p=0.95):
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
with torch.no_grad():
output = model.generate(
input_ids,
max_length=max_length,
temperature=temperature, # Increased randomness
top_p=top_p,
do_sample=True,
repetition_penalty=1.5, # Added repetition penalty to prevent duplicates
num_beams=5, # Use beam search for higher quality output
no_repeat_ngram_size=2, # Prevent repeating the same n-grams
early_stopping=True,
pad_token_id=tokenizer.pad_token_id
)
return tokenizer.decode(output[0], skip_special_tokens=True)
# Poetry Generation Function with Random Selection from Dataset and Explicit 4-Line Structure
def generate_random_poem(prompt, max_length=180, temperature=1.0, top_p=0.9, num_lines=4):
# Randomly select a line from the dataset
random_line = random.choice(dataset["poem"])
# Prepare the input text with the random line selected, and start with a unique phrase to avoid repetition
input_text = f"{random_line} " # Unique start to force variety
# Tokenize the input text
encoding = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
input_ids = encoding.input_ids.to(model.device)
attention_mask = encoding.attention_mask.to(model.device)
# Set pad_token_id to eos_token_id
pad_token_id = tokenizer.eos_token_id
# Generate the poem using the model with increased randomness
output = model.generate(
input_ids,
attention_mask=attention_mask,
max_length=max_length,
temperature=temperature, # Increased randomness
top_p=top_p,
do_sample=True,
repetition_penalty=1.5, # Added repetition penalty to prevent duplicates
num_beams=5, # Use beam search for higher quality output
no_repeat_ngram_size=2, # Prevent repeating the same n-grams
early_stopping=True,
pad_token_id=pad_token_id
)
# Decode the output and split into lines
generated_poem = tokenizer.decode(output[0], skip_special_tokens=True)
generated_poem = generated_poem.strip()
# Split the generated text into separate lines based on full stops (Hindi poems often end with "।")
poem_lines = generated_poem.split("।")
final_poem = "\n".join(poem_lines)
return final_poem
# 3️⃣ Gradio Interface
interface = gr.Interface(
fn=generate_random_poem, # Use function directly without calling it
inputs=[
gr.Textbox(label="Enter Prompt", placeholder="Start your Hindi poem..."),
gr.Slider(50, 500, step=10, value=180, label="Max Length"),
gr.Slider(0.1, 1.5, step=0.1, value=0.7, label="Temperature"),
gr.Slider(1, 100, step=1, value=50, label="Top-k Sampling"),
gr.Slider(0.1, 1.0, step=0.05, value=0.95, label="Top-p Sampling"),
],
outputs=gr.Textbox(label="Generated Hindi Poem"),
title="Hindi Poetry Generator ✨",
description="Generate beautiful Hindi poetry. Just enter a prompt and adjust parameters. Example: 'मैया मोरी'",
)
# 4️⃣ Run the Gradio App
interface.launch(share=True)
|