File size: 4,322 Bytes
007b4ac
c3d1590
 
f1c0198
 
 
c3d1590
f1c0198
 
 
 
c3d1590
f1c0198
 
 
c3d1590
7e7020a
c3d1590
 
 
 
 
 
 
 
 
 
 
 
7e7020a
 
 
 
 
 
 
 
 
 
 
 
 
5fd2c08
7e7020a
 
 
17c4ee5
8f28876
69135cb
 
 
 
8f28876
69135cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e579029
69135cb
 
 
e579029
 
69135cb
ceb622b
69135cb
 
 
 
 
c3d1590
 
68a4c6f
c3d1590
 
8f28876
c3d1590
 
 
 
 
 
68a4c6f
d836ac3
c3d1590
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import random
from datasets import load_dataset
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load dataset
dataset = load_dataset("rahul7star/hindi-poetry")["train"]

# Load your model and tokenizer
model_name = "rahul7star/hindi_poetry_language_model"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# 2️⃣ Function to Generate Hindi Poetry
def generate_poetry_base(prompt, max_length=100, temperature=0.7, top_k=50, top_p=0.95):
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
    with torch.no_grad():
        output = model.generate(
            input_ids, 
            max_length=max_length, 
            temperature=temperature, 
            top_k=top_k, 
            top_p=top_p,
            pad_token_id=tokenizer.pad_token_id
        )
    return tokenizer.decode(output[0], skip_special_tokens=True)

def generate_poetry(prompt, max_length=100, temperature=0.7, top_k=50, top_p=0.95):
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=max_length,
            temperature=temperature,  # Increased randomness
            top_p=top_p,
            do_sample=True,
            repetition_penalty=1.5,  # Added repetition penalty to prevent duplicates
            num_beams=5,  # Use beam search for higher quality output
            no_repeat_ngram_size=2,  # Prevent repeating the same n-grams
            early_stopping=True,
            pad_token_id=tokenizer.pad_token_id
        )
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Poetry Generation Function with Random Selection from Dataset and Explicit 4-Line Structure
def generate_random_poem(prompt, max_length=180, temperature=1.0, top_p=0.9, num_lines=4):
    # Randomly select a line from the dataset
    random_line = random.choice(dataset["poem"])

    # Prepare the input text with the random line selected, and start with a unique phrase to avoid repetition
    input_text = f"{random_line} "  # Unique start to force variety

    # Tokenize the input text
    encoding = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
    input_ids = encoding.input_ids.to(model.device)
    attention_mask = encoding.attention_mask.to(model.device)

    # Set pad_token_id to eos_token_id
    pad_token_id = tokenizer.eos_token_id

    # Generate the poem using the model with increased randomness
    output = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=max_length,
        temperature=temperature,  # Increased randomness
        top_p=top_p,
        do_sample=True,
        repetition_penalty=1.5,  # Added repetition penalty to prevent duplicates
        num_beams=5,  # Use beam search for higher quality output
        no_repeat_ngram_size=2,  # Prevent repeating the same n-grams
        early_stopping=True,
        pad_token_id=pad_token_id
    )

    # Decode the output and split into lines
    generated_poem = tokenizer.decode(output[0], skip_special_tokens=True)
    generated_poem = generated_poem.strip()

    # Split the generated text into separate lines based on full stops (Hindi poems often end with "।")
    poem_lines = generated_poem.split("।")

   

    final_poem = "\n".join(poem_lines)

    return final_poem

# 3️⃣ Gradio Interface
interface = gr.Interface(
    fn=generate_random_poem,  # Use function directly without calling it
    inputs=[
        gr.Textbox(label="Enter Prompt", placeholder="Start your Hindi poem..."),
        gr.Slider(50, 500, step=10, value=180, label="Max Length"),
        gr.Slider(0.1, 1.5, step=0.1, value=0.7, label="Temperature"),
        gr.Slider(1, 100, step=1, value=50, label="Top-k Sampling"),
        gr.Slider(0.1, 1.0, step=0.05, value=0.95, label="Top-p Sampling"),
    ],
    outputs=gr.Textbox(label="Generated Hindi Poem"),
    title="Hindi Poetry Generator ✨",
    description="Generate beautiful Hindi poetry. Just enter a prompt and adjust parameters. Example: 'मैया मोरी'",
)

# 4️⃣ Run the Gradio App
interface.launch(share=True)