Spaces:

rahul7star
/

hindi

Sleeping

App Files Files Community

rahul7star commited on Feb 27

Commit

69135cb

verified ·

1 Parent(s): 891d702

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -1

app.py CHANGED Viewed

@@ -24,9 +24,65 @@ def generate_poetry(prompt, max_length=100, temperature=0.7, top_k=50, top_p=0.9
         )
     return tokenizer.decode(output[0], skip_special_tokens=True)
 # 3️⃣ Gradio Interface
 interface = gr.Interface(
-    fn=generate_poetry,
     inputs=[
         gr.Textbox(label="Enter Prompt", placeholder="Start your Hindi poem..."),
         gr.Slider(50, 500, step=10, value=100, label="Max Length"),

         )
     return tokenizer.decode(output[0], skip_special_tokens=True)
+def generate_random_poem(num_lines=4, max_length=150, temperature=1.0, top_p=0.9):
+    # Randomly select a line from the dataset
+    random_line = random.choice(dataset["poem"])
+    # Prepare the input text with the random line selected, and start with a unique phrase to avoid repetition
+    input_text = f"मैया मोरी {random_line} "  # Unique start to force variety
+    # Tokenize the input text
+    encoding = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
+    input_ids = encoding.input_ids.to(model.device)
+    attention_mask = encoding.attention_mask.to(model.device)
+    # Set pad_token_id to eos_token_id
+    pad_token_id = tokenizer.eos_token_id
+    # Generate the poem using the model with increased randomness
+    output = model.generate(
+        input_ids,
+        attention_mask=attention_mask,
+        max_length=max_length,
+        temperature=temperature,  # Increased randomness
+        top_p=top_p,
+        do_sample=True,
+        repetition_penalty=1.5,  # Added repetition penalty to prevent duplicates
+        num_beams=5,  # Use beam search for higher quality output
+        no_repeat_ngram_size=2,  # Prevent repeating the same n-grams
+        early_stopping=True,
+        pad_token_id=pad_token_id
+    )
+    # Decode the output and split into lines using punctuation or newlines
+    generated_poem = tokenizer.decode(output[0], skip_special_tokens=True)
+    generated_poem = generated_poem.strip()
+    # Split the generated text into separate lines based on punctuation or line breaks
+    poem_lines = generated_poem.split("।")  # Hindi poems often end with "।" (full stop)
+    # If fewer than 4 lines, generate more content; else, return the first 4 lines
+    poem_lines = [line.strip() for line in poem_lines if line.strip()]
+    # Ensure that we have exactly 4 lines
+    if len(poem_lines) < num_lines:
+        while len(poem_lines) < num_lines:
+            # If not enough lines, add more content from the dataset
+            random_line = random.choice(dataset["poem"])
+            poem_lines.append(random_line.strip())
+    poem_lines = poem_lines[:num_lines]  # Ensure there are only 4 lines
+    final_poem = "\n".join(poem_lines)
+    return final_poem
 # 3️⃣ Gradio Interface
 interface = gr.Interface(
+    #fn=generate_poetry,
+    fn=generate_random_poem(num_lines=4),
     inputs=[
         gr.Textbox(label="Enter Prompt", placeholder="Start your Hindi poem..."),
         gr.Slider(50, 500, step=10, value=100, label="Max Length"),