tasal9 commited on
Commit
b57fe52
·
1 Parent(s): 8e58a9b

Refactor _clean_generated_text function to improve text processing and handling of response markers

Browse files
Files changed (2) hide show
  1. __pycache__/app.cpython-313.pyc +0 -0
  2. app.py +38 -9
__pycache__/app.cpython-313.pyc CHANGED
Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ
 
app.py CHANGED
@@ -72,15 +72,44 @@ def generate_prompt(instruction: str, input_text: str = "") -> str:
72
 
73
 
74
  def _clean_generated_text(generated: str, prompt: str) -> str:
75
- generated = generated.strip()
76
- # remove prompt if accidentally included
77
- if generated.startswith(prompt):
78
- generated = generated[len(prompt):].strip()
79
- # remove common markers
80
- for cut in ["### Instruction:", "### Response:", "ځواب:"]:
81
- if generated.startswith(cut):
82
- generated = generated[len(cut):].strip()
83
- return generated
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
 
86
  def _detect_device() -> int:
 
72
 
73
 
74
  def _clean_generated_text(generated: str, prompt: str) -> str:
75
+ import re
76
+
77
+ g = generated.strip()
78
+
79
+ # If the model included a response marker, prefer the content after the last one
80
+ resp_token = "### Response:"
81
+ if resp_token in g:
82
+ g = g.split(resp_token)[-1].strip()
83
+
84
+ # Remove the original prompt (if present anywhere)
85
+ try:
86
+ if prompt and prompt.strip() and prompt in g:
87
+ g = g.replace(prompt, "").strip()
88
+ except Exception:
89
+ pass
90
+
91
+ # Remove common markers that may appear anywhere
92
+ for cut in ["### Instruction:", "### Response:", "Instruction:", "Response:", "ځواب:"]:
93
+ g = g.replace(cut, "").strip()
94
+
95
+ # If the prompt contained an instruction section, remove that instruction text
96
+ try:
97
+ if "### Instruction:" in prompt:
98
+ ins_part = prompt.split("### Instruction:", 1)[1]
99
+ # stop at response if present
100
+ if "### Response:" in ins_part:
101
+ ins_text = ins_part.split("### Response:", 1)[0].strip()
102
+ else:
103
+ ins_text = ins_part.strip()
104
+ if ins_text and ins_text in g:
105
+ g = g.replace(ins_text, "").strip()
106
+ except Exception:
107
+ pass
108
+
109
+ # Trim surrounding non-word characters and excessive whitespace
110
+ g = re.sub(r"^[\W_]+|[\W_]+$", "", g).strip()
111
+
112
+ return g
113
 
114
 
115
  def _detect_device() -> int: