Spaces:
Build error
Build error
ready for generic prompt
Browse files- .env.example +1 -0
- .gitignore +1 -0
- llm_toolkit/eval_rpp.py +3 -2
- llm_toolkit/translation_utils.py +3 -2
- requirements.txt +1 -1
- scripts/eval-4gpu.sh +6 -1
- scripts/eval-mac.sh +10 -5
.env.example
CHANGED
|
@@ -2,6 +2,7 @@ MODEL_NAME=Qwen/Qwen2-7B-Instruct
|
|
| 2 |
|
| 3 |
BATCH_SIZE=2
|
| 4 |
MAX_NEW_TOKENS=300
|
|
|
|
| 5 |
|
| 6 |
HF_TOKEN=
|
| 7 |
|
|
|
|
| 2 |
|
| 3 |
BATCH_SIZE=2
|
| 4 |
MAX_NEW_TOKENS=300
|
| 5 |
+
USING_CHAT_TEMPLATE=true
|
| 6 |
|
| 7 |
HF_TOKEN=
|
| 8 |
|
.gitignore
CHANGED
|
@@ -151,3 +151,4 @@ dmypy.json
|
|
| 151 |
/llama.cpp
|
| 152 |
/llama-factory/config/models
|
| 153 |
/codedrive
|
|
|
|
|
|
| 151 |
/llama.cpp
|
| 152 |
/llama-factory/config/models
|
| 153 |
/codedrive
|
| 154 |
+
temp.csv
|
llm_toolkit/eval_rpp.py
CHANGED
|
@@ -27,6 +27,7 @@ data_path = os.getenv("DATA_PATH")
|
|
| 27 |
results_path = os.getenv("RESULTS_PATH")
|
| 28 |
batch_size = int(os.getenv("BATCH_SIZE", 1))
|
| 29 |
use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
|
|
|
|
| 30 |
max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 2048))
|
| 31 |
start_repetition_penalty = float(os.getenv("START_REPETITION_PENALTY", 1.0))
|
| 32 |
end_repetition_penalty = float(os.getenv("END_REPETITION_PENALTY", 1.3))
|
|
@@ -63,7 +64,7 @@ if is_cuda:
|
|
| 63 |
print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
| 64 |
print(f"{start_gpu_memory} GB of memory reserved.")
|
| 65 |
|
| 66 |
-
datasets = load_translation_dataset(data_path, tokenizer)
|
| 67 |
|
| 68 |
if len(sys.argv) > 1:
|
| 69 |
num = int(sys.argv[1])
|
|
@@ -82,7 +83,7 @@ def on_repetition_penalty_step_completed(model_name, predictions):
|
|
| 82 |
predictions,
|
| 83 |
)
|
| 84 |
|
| 85 |
-
metrics = calc_metrics(datasets["test"]["english"], predictions, debug=True)
|
| 86 |
print(f"{model_name} metrics: {metrics}")
|
| 87 |
|
| 88 |
|
|
|
|
| 27 |
results_path = os.getenv("RESULTS_PATH")
|
| 28 |
batch_size = int(os.getenv("BATCH_SIZE", 1))
|
| 29 |
use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
|
| 30 |
+
using_chat_template = os.getenv("USING_CHAT_TEMPLATE") == "true"
|
| 31 |
max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 2048))
|
| 32 |
start_repetition_penalty = float(os.getenv("START_REPETITION_PENALTY", 1.0))
|
| 33 |
end_repetition_penalty = float(os.getenv("END_REPETITION_PENALTY", 1.3))
|
|
|
|
| 64 |
print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
| 65 |
print(f"{start_gpu_memory} GB of memory reserved.")
|
| 66 |
|
| 67 |
+
datasets = load_translation_dataset(data_path, tokenizer, using_chat_template=using_chat_template)
|
| 68 |
|
| 69 |
if len(sys.argv) > 1:
|
| 70 |
num = int(sys.argv[1])
|
|
|
|
| 83 |
predictions,
|
| 84 |
)
|
| 85 |
|
| 86 |
+
metrics = calc_metrics(datasets["test"]["english"], predictions, datasets["test"]["chinese"], debug=True)
|
| 87 |
print(f"{model_name} metrics: {metrics}")
|
| 88 |
|
| 89 |
|
llm_toolkit/translation_utils.py
CHANGED
|
@@ -118,7 +118,7 @@ def get_few_shot_prompt(dataset, num_shots=5):
|
|
| 118 |
return translation_prompt
|
| 119 |
|
| 120 |
|
| 121 |
-
def load_translation_dataset(data_path, tokenizer=None, num_shots=0, for_openai=False):
|
| 122 |
train_data_file = data_path.replace(".tsv", "-train.tsv")
|
| 123 |
test_data_file = data_path.replace(".tsv", "-test.tsv")
|
| 124 |
|
|
@@ -187,7 +187,8 @@ def load_translation_dataset(data_path, tokenizer=None, num_shots=0, for_openai=
|
|
| 187 |
else:
|
| 188 |
prompt = tokenizer.apply_chat_template(
|
| 189 |
messages, tokenize=False, add_generation_prompt=True
|
| 190 |
-
)
|
|
|
|
| 191 |
prompts.append(prompt)
|
| 192 |
texts.append(prompt + output + tokenizer.eos_token)
|
| 193 |
|
|
|
|
| 118 |
return translation_prompt
|
| 119 |
|
| 120 |
|
| 121 |
+
def load_translation_dataset(data_path, tokenizer=None, num_shots=0, for_openai=False, using_chat_template=True):
|
| 122 |
train_data_file = data_path.replace(".tsv", "-train.tsv")
|
| 123 |
test_data_file = data_path.replace(".tsv", "-test.tsv")
|
| 124 |
|
|
|
|
| 187 |
else:
|
| 188 |
prompt = tokenizer.apply_chat_template(
|
| 189 |
messages, tokenize=False, add_generation_prompt=True
|
| 190 |
+
) if using_chat_template else prompt
|
| 191 |
+
|
| 192 |
prompts.append(prompt)
|
| 193 |
texts.append(prompt + output + tokenizer.eos_token)
|
| 194 |
|
requirements.txt
CHANGED
|
@@ -13,7 +13,7 @@ packaging
|
|
| 13 |
langchain_openai==0.1.13
|
| 14 |
wandb==0.17.6
|
| 15 |
transformers==4.43.3
|
| 16 |
-
bitsandbytes
|
| 17 |
sentencepiece==0.1.98
|
| 18 |
einops==0.8.0
|
| 19 |
accelerate==0.32.0
|
|
|
|
| 13 |
langchain_openai==0.1.13
|
| 14 |
wandb==0.17.6
|
| 15 |
transformers==4.43.3
|
| 16 |
+
bitsandbytes #==0.43.3
|
| 17 |
sentencepiece==0.1.98
|
| 18 |
einops==0.8.0
|
| 19 |
accelerate==0.32.0
|
scripts/eval-4gpu.sh
CHANGED
|
@@ -31,7 +31,12 @@ export LOAD_IN_4BIT=true
|
|
| 31 |
export MAX_NEW_TOKENS=2048
|
| 32 |
export START_REPETITION_PENALTY=1.0
|
| 33 |
export END_REPETITION_PENALTY=1.1
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
./scripts/eval-rpp.sh shenzhi-wang Llama3.1-70B-Chinese-Chat checkpoint-210
|
| 37 |
|
|
|
|
| 31 |
export MAX_NEW_TOKENS=2048
|
| 32 |
export START_REPETITION_PENALTY=1.0
|
| 33 |
export END_REPETITION_PENALTY=1.1
|
| 34 |
+
|
| 35 |
+
export USING_CHAT_TEMPLATE=false
|
| 36 |
+
export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048_generic_prompt.csv
|
| 37 |
+
|
| 38 |
+
# export USING_CHAT_TEMPLATE=true
|
| 39 |
+
# export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048.csv
|
| 40 |
|
| 41 |
./scripts/eval-rpp.sh shenzhi-wang Llama3.1-70B-Chinese-Chat checkpoint-210
|
| 42 |
|
scripts/eval-mac.sh
CHANGED
|
@@ -43,14 +43,19 @@ export RESULTS_PATH=results/mac-results_fine_tuned.csv
|
|
| 43 |
export MAX_NEW_TOKENS=2048
|
| 44 |
export START_REPETITION_PENALTY=1.0
|
| 45 |
export END_REPETITION_PENALTY=1.1
|
| 46 |
-
export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048.csv
|
| 47 |
|
| 48 |
-
|
|
|
|
| 49 |
|
| 50 |
-
#
|
|
|
|
| 51 |
|
| 52 |
-
|
| 53 |
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
./scripts/eval-rpp.sh microsoft Phi-3.5-mini-instruct checkpoint-210
|
|
|
|
| 43 |
export MAX_NEW_TOKENS=2048
|
| 44 |
export START_REPETITION_PENALTY=1.0
|
| 45 |
export END_REPETITION_PENALTY=1.1
|
|
|
|
| 46 |
|
| 47 |
+
export USING_CHAT_TEMPLATE=false
|
| 48 |
+
export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048_generic_prompt.csv
|
| 49 |
|
| 50 |
+
# export USING_CHAT_TEMPLATE=true
|
| 51 |
+
# export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048.csv
|
| 52 |
|
| 53 |
+
./scripts/eval-rpp.sh internlm internlm2_5-7b-chat checkpoint-140
|
| 54 |
|
| 55 |
+
./scripts/eval-rpp.sh Qwen Qwen2-7B-Instruct checkpoint-105
|
| 56 |
+
|
| 57 |
+
./scripts/eval-rpp.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat checkpoint-70
|
| 58 |
+
|
| 59 |
+
./scripts/eval-rpp.sh shenzhi-wang Llama3.1-8B-Chinese-Chat checkpoint-105
|
| 60 |
|
| 61 |
./scripts/eval-rpp.sh microsoft Phi-3.5-mini-instruct checkpoint-210
|