Spaces:
Build error
Build error
Update llm_utils.py
Browse files- llm_toolkit/llm_utils.py +4 -38
llm_toolkit/llm_utils.py
CHANGED
|
@@ -12,6 +12,10 @@ from tqdm import tqdm
|
|
| 12 |
|
| 13 |
def get_template(model_name):
|
| 14 |
model_name = model_name.lower()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
if "llama" in model_name:
|
| 16 |
return "llama3"
|
| 17 |
if "internlm" in model_name:
|
|
@@ -118,44 +122,6 @@ def test_model(model, tokenizer, prompt, device="cuda"):
|
|
| 118 |
)
|
| 119 |
|
| 120 |
|
| 121 |
-
def extract_answer(text, debug=False):
|
| 122 |
-
if text:
|
| 123 |
-
# Remove the begin and end tokens
|
| 124 |
-
text = re.sub(
|
| 125 |
-
r".*?(assistant|\[/INST\]).+?\b",
|
| 126 |
-
"",
|
| 127 |
-
text,
|
| 128 |
-
flags=re.DOTALL | re.MULTILINE,
|
| 129 |
-
)
|
| 130 |
-
if debug:
|
| 131 |
-
print("--------\nstep 1:", text)
|
| 132 |
-
|
| 133 |
-
text = re.sub(r"<.+?>.*", "", text, flags=re.DOTALL | re.MULTILINE)
|
| 134 |
-
if debug:
|
| 135 |
-
print("--------\nstep 2:", text)
|
| 136 |
-
|
| 137 |
-
text = re.sub(
|
| 138 |
-
r".*?end_header_id\|>\n\n", "", text, flags=re.DOTALL | re.MULTILINE
|
| 139 |
-
)
|
| 140 |
-
if debug:
|
| 141 |
-
print("--------\nstep 3:", text)
|
| 142 |
-
|
| 143 |
-
text = text.split("。")[0].strip()
|
| 144 |
-
if debug:
|
| 145 |
-
print("--------\nstep 4:", text)
|
| 146 |
-
|
| 147 |
-
text = re.sub(
|
| 148 |
-
r"^Response:.+?\b",
|
| 149 |
-
"",
|
| 150 |
-
text,
|
| 151 |
-
flags=re.DOTALL | re.MULTILINE,
|
| 152 |
-
)
|
| 153 |
-
if debug:
|
| 154 |
-
print("--------\nstep 5:", text)
|
| 155 |
-
|
| 156 |
-
return text
|
| 157 |
-
|
| 158 |
-
|
| 159 |
def eval_model(
|
| 160 |
model,
|
| 161 |
tokenizer,
|
|
|
|
| 12 |
|
| 13 |
def get_template(model_name):
|
| 14 |
model_name = model_name.lower()
|
| 15 |
+
if "mistral" in model_name:
|
| 16 |
+
return "mistral"
|
| 17 |
+
if "qwen" in model_name:
|
| 18 |
+
return "qwen"
|
| 19 |
if "llama" in model_name:
|
| 20 |
return "llama3"
|
| 21 |
if "internlm" in model_name:
|
|
|
|
| 122 |
)
|
| 123 |
|
| 124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
def eval_model(
|
| 126 |
model,
|
| 127 |
tokenizer,
|