rahul7star commited on
Commit
69135cb
·
verified ·
1 Parent(s): 891d702

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -1
app.py CHANGED
@@ -24,9 +24,65 @@ def generate_poetry(prompt, max_length=100, temperature=0.7, top_k=50, top_p=0.9
24
  )
25
  return tokenizer.decode(output[0], skip_special_tokens=True)
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # 3️⃣ Gradio Interface
28
  interface = gr.Interface(
29
- fn=generate_poetry,
 
30
  inputs=[
31
  gr.Textbox(label="Enter Prompt", placeholder="Start your Hindi poem..."),
32
  gr.Slider(50, 500, step=10, value=100, label="Max Length"),
 
24
  )
25
  return tokenizer.decode(output[0], skip_special_tokens=True)
26
 
27
+ def generate_random_poem(num_lines=4, max_length=150, temperature=1.0, top_p=0.9):
28
+ # Randomly select a line from the dataset
29
+ random_line = random.choice(dataset["poem"])
30
+
31
+ # Prepare the input text with the random line selected, and start with a unique phrase to avoid repetition
32
+ input_text = f"मैया मोरी {random_line} " # Unique start to force variety
33
+
34
+ # Tokenize the input text
35
+ encoding = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
36
+ input_ids = encoding.input_ids.to(model.device)
37
+ attention_mask = encoding.attention_mask.to(model.device)
38
+
39
+ # Set pad_token_id to eos_token_id
40
+ pad_token_id = tokenizer.eos_token_id
41
+
42
+ # Generate the poem using the model with increased randomness
43
+ output = model.generate(
44
+ input_ids,
45
+ attention_mask=attention_mask,
46
+ max_length=max_length,
47
+ temperature=temperature, # Increased randomness
48
+ top_p=top_p,
49
+ do_sample=True,
50
+ repetition_penalty=1.5, # Added repetition penalty to prevent duplicates
51
+ num_beams=5, # Use beam search for higher quality output
52
+ no_repeat_ngram_size=2, # Prevent repeating the same n-grams
53
+ early_stopping=True,
54
+ pad_token_id=pad_token_id
55
+ )
56
+
57
+ # Decode the output and split into lines using punctuation or newlines
58
+ generated_poem = tokenizer.decode(output[0], skip_special_tokens=True)
59
+ generated_poem = generated_poem.strip()
60
+
61
+ # Split the generated text into separate lines based on punctuation or line breaks
62
+ poem_lines = generated_poem.split("।") # Hindi poems often end with "।" (full stop)
63
+
64
+ # If fewer than 4 lines, generate more content; else, return the first 4 lines
65
+ poem_lines = [line.strip() for line in poem_lines if line.strip()]
66
+
67
+ # Ensure that we have exactly 4 lines
68
+ if len(poem_lines) < num_lines:
69
+ while len(poem_lines) < num_lines:
70
+ # If not enough lines, add more content from the dataset
71
+ random_line = random.choice(dataset["poem"])
72
+ poem_lines.append(random_line.strip())
73
+
74
+ poem_lines = poem_lines[:num_lines] # Ensure there are only 4 lines
75
+
76
+ final_poem = "\n".join(poem_lines)
77
+
78
+ return final_poem
79
+
80
+
81
+
82
  # 3️⃣ Gradio Interface
83
  interface = gr.Interface(
84
+ #fn=generate_poetry,
85
+ fn=generate_random_poem(num_lines=4),
86
  inputs=[
87
  gr.Textbox(label="Enter Prompt", placeholder="Start your Hindi poem..."),
88
  gr.Slider(50, 500, step=10, value=100, label="Max Length"),