Spaces:
Build error
Build error
add env vars: MAX_NEW_TOKENS & REPETITION_PENALTY
Browse files
llm_toolkit/eval_logical_reasoning.py
CHANGED
|
@@ -24,6 +24,8 @@ data_path = os.getenv("LOGICAL_REASONING_DATA_PATH")
|
|
| 24 |
results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
|
| 25 |
use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
|
| 26 |
using_p1 = os.getenv("USING_P1_PROMPT_TEMPLATE") == "true"
|
|
|
|
|
|
|
| 27 |
|
| 28 |
dtype = (
|
| 29 |
torch.bfloat16 if os.getenv("USE_BF16_FOR_INFERENCE") == "true" else torch.float16
|
|
@@ -66,7 +68,13 @@ if len(sys.argv) > 1:
|
|
| 66 |
print_row_details(datasets["test"].to_pandas(), indices=[0, -1])
|
| 67 |
|
| 68 |
print("Evaluating model: " + model_name)
|
| 69 |
-
predictions = eval_model(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
gpu_stats = torch.cuda.get_device_properties(0)
|
| 72 |
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
|
|
|
| 24 |
results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
|
| 25 |
use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
|
| 26 |
using_p1 = os.getenv("USING_P1_PROMPT_TEMPLATE") == "true"
|
| 27 |
+
max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 16))
|
| 28 |
+
repetition_penalty = float(os.getenv("REPETITION_PENALTY", 1.0))
|
| 29 |
|
| 30 |
dtype = (
|
| 31 |
torch.bfloat16 if os.getenv("USE_BF16_FOR_INFERENCE") == "true" else torch.float16
|
|
|
|
| 68 |
print_row_details(datasets["test"].to_pandas(), indices=[0, -1])
|
| 69 |
|
| 70 |
print("Evaluating model: " + model_name)
|
| 71 |
+
predictions = eval_model(
|
| 72 |
+
model,
|
| 73 |
+
tokenizer,
|
| 74 |
+
datasets["test"],
|
| 75 |
+
max_new_tokens=max_new_tokens,
|
| 76 |
+
repetition_penalty=repetition_penalty,
|
| 77 |
+
)
|
| 78 |
|
| 79 |
gpu_stats = torch.cuda.get_device_properties(0)
|
| 80 |
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
llm_toolkit/llm_utils.py
CHANGED
|
@@ -133,7 +133,14 @@ def extract_answer(text, debug=False):
|
|
| 133 |
return text
|
| 134 |
|
| 135 |
|
| 136 |
-
def eval_model(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
total = len(eval_dataset)
|
| 138 |
predictions = []
|
| 139 |
for i in tqdm(range(total)):
|
|
@@ -144,7 +151,7 @@ def eval_model(model, tokenizer, eval_dataset, device="cuda", repetition_penalty
|
|
| 144 |
|
| 145 |
outputs = model.generate(
|
| 146 |
**inputs,
|
| 147 |
-
max_new_tokens=
|
| 148 |
repetition_penalty=repetition_penalty,
|
| 149 |
use_cache=False,
|
| 150 |
)
|
|
|
|
| 133 |
return text
|
| 134 |
|
| 135 |
|
| 136 |
+
def eval_model(
|
| 137 |
+
model,
|
| 138 |
+
tokenizer,
|
| 139 |
+
eval_dataset,
|
| 140 |
+
device="cuda",
|
| 141 |
+
max_new_tokens=4096,
|
| 142 |
+
repetition_penalty=1.0,
|
| 143 |
+
):
|
| 144 |
total = len(eval_dataset)
|
| 145 |
predictions = []
|
| 146 |
for i in tqdm(range(total)):
|
|
|
|
| 151 |
|
| 152 |
outputs = model.generate(
|
| 153 |
**inputs,
|
| 154 |
+
max_new_tokens=max_new_tokens,
|
| 155 |
repetition_penalty=repetition_penalty,
|
| 156 |
use_cache=False,
|
| 157 |
)
|