Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -24,9 +24,65 @@ def generate_poetry(prompt, max_length=100, temperature=0.7, top_k=50, top_p=0.9
|
|
24 |
)
|
25 |
return tokenizer.decode(output[0], skip_special_tokens=True)
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
# 3️⃣ Gradio Interface
|
28 |
interface = gr.Interface(
|
29 |
-
fn=generate_poetry,
|
|
|
30 |
inputs=[
|
31 |
gr.Textbox(label="Enter Prompt", placeholder="Start your Hindi poem..."),
|
32 |
gr.Slider(50, 500, step=10, value=100, label="Max Length"),
|
|
|
24 |
)
|
25 |
return tokenizer.decode(output[0], skip_special_tokens=True)
|
26 |
|
27 |
+
def generate_random_poem(num_lines=4, max_length=150, temperature=1.0, top_p=0.9):
|
28 |
+
# Randomly select a line from the dataset
|
29 |
+
random_line = random.choice(dataset["poem"])
|
30 |
+
|
31 |
+
# Prepare the input text with the random line selected, and start with a unique phrase to avoid repetition
|
32 |
+
input_text = f"मैया मोरी {random_line} " # Unique start to force variety
|
33 |
+
|
34 |
+
# Tokenize the input text
|
35 |
+
encoding = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
|
36 |
+
input_ids = encoding.input_ids.to(model.device)
|
37 |
+
attention_mask = encoding.attention_mask.to(model.device)
|
38 |
+
|
39 |
+
# Set pad_token_id to eos_token_id
|
40 |
+
pad_token_id = tokenizer.eos_token_id
|
41 |
+
|
42 |
+
# Generate the poem using the model with increased randomness
|
43 |
+
output = model.generate(
|
44 |
+
input_ids,
|
45 |
+
attention_mask=attention_mask,
|
46 |
+
max_length=max_length,
|
47 |
+
temperature=temperature, # Increased randomness
|
48 |
+
top_p=top_p,
|
49 |
+
do_sample=True,
|
50 |
+
repetition_penalty=1.5, # Added repetition penalty to prevent duplicates
|
51 |
+
num_beams=5, # Use beam search for higher quality output
|
52 |
+
no_repeat_ngram_size=2, # Prevent repeating the same n-grams
|
53 |
+
early_stopping=True,
|
54 |
+
pad_token_id=pad_token_id
|
55 |
+
)
|
56 |
+
|
57 |
+
# Decode the output and split into lines using punctuation or newlines
|
58 |
+
generated_poem = tokenizer.decode(output[0], skip_special_tokens=True)
|
59 |
+
generated_poem = generated_poem.strip()
|
60 |
+
|
61 |
+
# Split the generated text into separate lines based on punctuation or line breaks
|
62 |
+
poem_lines = generated_poem.split("।") # Hindi poems often end with "।" (full stop)
|
63 |
+
|
64 |
+
# If fewer than 4 lines, generate more content; else, return the first 4 lines
|
65 |
+
poem_lines = [line.strip() for line in poem_lines if line.strip()]
|
66 |
+
|
67 |
+
# Ensure that we have exactly 4 lines
|
68 |
+
if len(poem_lines) < num_lines:
|
69 |
+
while len(poem_lines) < num_lines:
|
70 |
+
# If not enough lines, add more content from the dataset
|
71 |
+
random_line = random.choice(dataset["poem"])
|
72 |
+
poem_lines.append(random_line.strip())
|
73 |
+
|
74 |
+
poem_lines = poem_lines[:num_lines] # Ensure there are only 4 lines
|
75 |
+
|
76 |
+
final_poem = "\n".join(poem_lines)
|
77 |
+
|
78 |
+
return final_poem
|
79 |
+
|
80 |
+
|
81 |
+
|
82 |
# 3️⃣ Gradio Interface
|
83 |
interface = gr.Interface(
|
84 |
+
#fn=generate_poetry,
|
85 |
+
fn=generate_random_poem(num_lines=4),
|
86 |
inputs=[
|
87 |
gr.Textbox(label="Enter Prompt", placeholder="Start your Hindi poem..."),
|
88 |
gr.Slider(50, 500, step=10, value=100, label="Max Length"),
|