Spaces:
Build error
Build error
set max_new_tokens to 2048
Browse files- llm_toolkit/llm_utils.py +1 -1
- llm_toolkit/translation_utils.py +27 -96
- notebooks/00_Data Analysis.ipynb +1 -0
llm_toolkit/llm_utils.py
CHANGED
@@ -162,7 +162,7 @@ def eval_model(
|
|
162 |
tokenizer,
|
163 |
eval_dataset,
|
164 |
device="cuda",
|
165 |
-
max_new_tokens=
|
166 |
repetition_penalty=1.0,
|
167 |
batch_size=1,
|
168 |
):
|
|
|
162 |
tokenizer,
|
163 |
eval_dataset,
|
164 |
device="cuda",
|
165 |
+
max_new_tokens=2048,
|
166 |
repetition_penalty=1.0,
|
167 |
batch_size=1,
|
168 |
):
|
llm_toolkit/translation_utils.py
CHANGED
@@ -8,6 +8,7 @@ from datasets import load_dataset
|
|
8 |
from langchain_openai import ChatOpenAI
|
9 |
from langchain_core.prompts import ChatPromptTemplate
|
10 |
from tqdm import tqdm
|
|
|
11 |
|
12 |
print(f"loading {__file__}")
|
13 |
|
@@ -45,14 +46,7 @@ def calc_metrics(references, predictions, debug=False):
|
|
45 |
), f"lengths are difference: {len(references)} != {len(predictions)}"
|
46 |
|
47 |
predictions = [extract_answer(text) for text in predictions]
|
48 |
-
|
49 |
-
correct = [1 if ref == pred else 0 for ref, pred in zip(references, predictions)]
|
50 |
-
accuracy = sum(correct) / len(references)
|
51 |
-
|
52 |
-
results = {"accuracy": accuracy}
|
53 |
-
if debug:
|
54 |
-
correct_ids = [i for i, c in enumerate(correct) if c == 1]
|
55 |
-
results["correct_ids"] = correct_ids
|
56 |
|
57 |
results["meteor"] = meteor.compute(predictions=predictions, references=references)[
|
58 |
"meteor"
|
@@ -64,6 +58,15 @@ def calc_metrics(references, predictions, debug=False):
|
|
64 |
results["rouge_scores"] = rouge.compute(
|
65 |
predictions=predictions, references=references
|
66 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
return results
|
68 |
|
69 |
|
@@ -159,114 +162,42 @@ def load_translation_dataset(data_path, tokenizer=None):
|
|
159 |
return datasets
|
160 |
|
161 |
|
162 |
-
def eval_model(model, tokenizer, eval_dataset, device="cuda"):
|
163 |
-
total = len(eval_dataset)
|
164 |
-
predictions = []
|
165 |
-
for i in tqdm(range(total)):
|
166 |
-
inputs = tokenizer(
|
167 |
-
eval_dataset["prompt"][i : i + 1],
|
168 |
-
return_tensors="pt",
|
169 |
-
).to(device)
|
170 |
-
|
171 |
-
outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)
|
172 |
-
decoded_output = tokenizer.batch_decode(outputs)
|
173 |
-
debug = i == 0
|
174 |
-
decoded_output = [
|
175 |
-
extract_answer(output, debug=debug) for output in decoded_output
|
176 |
-
]
|
177 |
-
predictions.extend(decoded_output)
|
178 |
-
|
179 |
-
return predictions
|
180 |
-
|
181 |
-
|
182 |
-
def save_model(
|
183 |
-
model,
|
184 |
-
tokenizer,
|
185 |
-
include_gguf=True,
|
186 |
-
include_merged=True,
|
187 |
-
publish=True,
|
188 |
-
):
|
189 |
-
try:
|
190 |
-
token = os.getenv("HF_TOKEN") or None
|
191 |
-
model_name = os.getenv("MODEL_NAME")
|
192 |
-
|
193 |
-
save_method = "lora"
|
194 |
-
quantization_method = "q5_k_m"
|
195 |
-
|
196 |
-
model_names = get_model_names(
|
197 |
-
model_name, save_method=save_method, quantization_method=quantization_method
|
198 |
-
)
|
199 |
-
|
200 |
-
model.save_pretrained(model_names["local"])
|
201 |
-
tokenizer.save_pretrained(model_names["local"])
|
202 |
-
|
203 |
-
if publish:
|
204 |
-
model.push_to_hub(
|
205 |
-
model_names["hub"],
|
206 |
-
token=token,
|
207 |
-
)
|
208 |
-
tokenizer.push_to_hub(
|
209 |
-
model_names["hub"],
|
210 |
-
token=token,
|
211 |
-
)
|
212 |
-
|
213 |
-
if include_merged:
|
214 |
-
model.save_pretrained_merged(
|
215 |
-
model_names["local"] + "-merged", tokenizer, save_method=save_method
|
216 |
-
)
|
217 |
-
if publish:
|
218 |
-
model.push_to_hub_merged(
|
219 |
-
model_names["hub"] + "-merged",
|
220 |
-
tokenizer,
|
221 |
-
save_method="lora",
|
222 |
-
token="",
|
223 |
-
)
|
224 |
-
|
225 |
-
if include_gguf:
|
226 |
-
model.save_pretrained_gguf(
|
227 |
-
model_names["local-gguf"],
|
228 |
-
tokenizer,
|
229 |
-
quantization_method=quantization_method,
|
230 |
-
)
|
231 |
-
|
232 |
-
if publish:
|
233 |
-
model.push_to_hub_gguf(
|
234 |
-
model_names["hub-gguf"],
|
235 |
-
tokenizer,
|
236 |
-
quantization_method=quantization_method,
|
237 |
-
token=token,
|
238 |
-
)
|
239 |
-
except Exception as e:
|
240 |
-
print(e)
|
241 |
-
|
242 |
-
|
243 |
def get_metrics(df):
|
244 |
metrics_df = pd.DataFrame(df.columns.T)[2:]
|
245 |
metrics_df.rename(columns={0: "model"}, inplace=True)
|
246 |
-
metrics_df["
|
|
|
247 |
metrics_df.reset_index(inplace=True)
|
248 |
metrics_df = metrics_df.drop(columns=["index"])
|
249 |
|
250 |
-
accuracy = []
|
251 |
meteor = []
|
252 |
bleu_1 = []
|
253 |
rouge_l = []
|
254 |
-
|
|
|
|
|
|
|
255 |
for col in df.columns[2:]:
|
256 |
metrics = calc_metrics(df["english"], df[col], debug=True)
|
257 |
print(f"{col}: {metrics}")
|
258 |
|
259 |
-
accuracy.append(metrics["accuracy"])
|
260 |
meteor.append(metrics["meteor"])
|
261 |
bleu_1.append(metrics["bleu_scores"]["bleu"])
|
262 |
rouge_l.append(metrics["rouge_scores"]["rougeL"])
|
263 |
-
all_metrics.append(metrics)
|
264 |
|
265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
metrics_df["meteor"] = meteor
|
267 |
metrics_df["bleu_1"] = bleu_1
|
268 |
metrics_df["rouge_l"] = rouge_l
|
269 |
-
metrics_df["
|
|
|
|
|
270 |
|
271 |
return metrics_df
|
272 |
|
|
|
8 |
from langchain_openai import ChatOpenAI
|
9 |
from langchain_core.prompts import ChatPromptTemplate
|
10 |
from tqdm import tqdm
|
11 |
+
from eval_modules.calc_repetitions import *
|
12 |
|
13 |
print(f"loading {__file__}")
|
14 |
|
|
|
46 |
), f"lengths are difference: {len(references)} != {len(predictions)}"
|
47 |
|
48 |
predictions = [extract_answer(text) for text in predictions]
|
49 |
+
results = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
results["meteor"] = meteor.compute(predictions=predictions, references=references)[
|
52 |
"meteor"
|
|
|
58 |
results["rouge_scores"] = rouge.compute(
|
59 |
predictions=predictions, references=references
|
60 |
)
|
61 |
+
|
62 |
+
correct = [1 if ref == pred else 0 for ref, pred in zip(references, predictions)]
|
63 |
+
accuracy = sum(correct) / len(references)
|
64 |
+
|
65 |
+
results["accuracy"] = accuracy
|
66 |
+
if debug:
|
67 |
+
correct_ids = [i for i, c in enumerate(correct) if c == 1]
|
68 |
+
results["correct_ids"] = correct_ids
|
69 |
+
|
70 |
return results
|
71 |
|
72 |
|
|
|
162 |
return datasets
|
163 |
|
164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
def get_metrics(df):
|
166 |
metrics_df = pd.DataFrame(df.columns.T)[2:]
|
167 |
metrics_df.rename(columns={0: "model"}, inplace=True)
|
168 |
+
metrics_df["rpp"] = metrics_df["model"].apply(lambda x: x.split("rpp-")[-1])
|
169 |
+
metrics_df["model"] = metrics_df["model"].apply(lambda x: x.split("/")[1])
|
170 |
metrics_df.reset_index(inplace=True)
|
171 |
metrics_df = metrics_df.drop(columns=["index"])
|
172 |
|
|
|
173 |
meteor = []
|
174 |
bleu_1 = []
|
175 |
rouge_l = []
|
176 |
+
ews_score = []
|
177 |
+
repetition_score = []
|
178 |
+
total_repetitions = []
|
179 |
+
|
180 |
for col in df.columns[2:]:
|
181 |
metrics = calc_metrics(df["english"], df[col], debug=True)
|
182 |
print(f"{col}: {metrics}")
|
183 |
|
|
|
184 |
meteor.append(metrics["meteor"])
|
185 |
bleu_1.append(metrics["bleu_scores"]["bleu"])
|
186 |
rouge_l.append(metrics["rouge_scores"]["rougeL"])
|
|
|
187 |
|
188 |
+
df[["ews_score", "repetition_score", "total_repetitions"]] = df[col].apply(
|
189 |
+
detect_scores
|
190 |
+
)
|
191 |
+
ews_score.append(df["ews_score"].mean())
|
192 |
+
repetition_score.append(df["repetition_score"].mean())
|
193 |
+
total_repetitions.append(df["total_repetitions"].mean())
|
194 |
+
|
195 |
metrics_df["meteor"] = meteor
|
196 |
metrics_df["bleu_1"] = bleu_1
|
197 |
metrics_df["rouge_l"] = rouge_l
|
198 |
+
metrics_df["ews_score"] = ews_score
|
199 |
+
metrics_df["repetition_score"] = ews_score
|
200 |
+
metrics_df["total_repetitions"] = ews_score
|
201 |
|
202 |
return metrics_df
|
203 |
|
notebooks/00_Data Analysis.ipynb
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"cells":[{"cell_type":"code","execution_count":72,"metadata":{"executionInfo":{"elapsed":476,"status":"ok","timestamp":1720679526275,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"uWKRSV6eZsCn"},"outputs":[{"name":"stdout","output_type":"stream","text":["The autoreload extension is already loaded. To reload it, use:\n"," %reload_ext autoreload\n"]}],"source":["%load_ext autoreload\n","%autoreload 2"]},{"cell_type":"code","execution_count":73,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"6d394937-6c99-4a7c-9d32-7600a280032f","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"G5pNu3zgZBrL","outputId":"160a554f-fb08-4aa0-bc00-0422fb7c1fac"},"outputs":[{"name":"stdout","output_type":"stream","text":["workding dir: /Users/inflaton/code/engd/papers/rapget-translation\n"]}],"source":["import os\n","import sys\n","from pathlib import Path\n","\n","# check if workding_dir is in local variables\n","if 'workding_dir' not in locals():\n"," workding_dir = str(Path.cwd().parent)\n","\n","os.chdir(workding_dir)\n","sys.path.append(workding_dir)\n","print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":74,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"9f67ec60-2f24-411c-84eb-0dd664b44775","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"hPCC-6m7ZBrM","outputId":"c7aa2c96-5e99-440a-c148-201d79465ff9"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading env vars from: /Users/inflaton/code/engd/papers/rapget-translation/.env\n"]},{"data":{"text/plain":["True"]},"execution_count":74,"metadata":{},"output_type":"execute_result"}],"source":["from dotenv import find_dotenv, load_dotenv\n","\n","found_dotenv = find_dotenv(\".env\")\n","\n","if len(found_dotenv) == 0:\n"," found_dotenv = find_dotenv(\".env.example\")\n","print(f\"loading env vars from: {found_dotenv}\")\n","load_dotenv(found_dotenv, override=True)"]},{"cell_type":"code","execution_count":75,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"f1597656-8042-4878-9d3b-9ebfb8dd86dc","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"1M3IraVtZBrM","outputId":"29ab35f6-2970-4ade-d85d-3174acf8cda0"},"outputs":[{"name":"stdout","output_type":"stream","text":["Qwen/Qwen2-7B-Instruct None False datasets/mac/mac.tsv results/mac-results.csv\n"]}],"source":["import os\n","\n","model_name = os.getenv(\"MODEL_NAME\")\n","adapter_name_or_path = os.getenv(\"ADAPTER_NAME_OR_PATH\")\n","load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n","data_path = os.getenv(\"DATA_PATH\")\n","results_path = os.getenv(\"RESULTS_PATH\")\n","use_english_datasets = os.getenv(\"USE_ENGLISH_DATASETS\") == \"true\"\n","\n","print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)"]},{"cell_type":"code","execution_count":76,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"b2a43943-9324-4839-9a47-cfa72de2244b","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":564,"status":"ok","timestamp":1720679529907,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"UgMvt6dIZBrM","outputId":"ce37581c-fd26-46c2-ad87-d933d99f68f7"},"outputs":[{"name":"stdout","output_type":"stream","text":["Python 3.11.9\n","Name: torch\n","Version: 2.4.0\n","Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration\n","Home-page: https://pytorch.org/\n","Author: PyTorch Team\n","Author-email: [email protected]\n","License: BSD-3\n","Location: /Users/inflaton/anaconda3/envs/rapget/lib/python3.11/site-packages\n","Requires: filelock, fsspec, jinja2, networkx, sympy, typing-extensions\n","Required-by: accelerate, peft, torchaudio, torchvision\n","---\n","Name: transformers\n","Version: 4.43.3\n","Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow\n","Home-page: https://github.com/huggingface/transformers\n","Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)\n","Author-email: [email protected]\n","License: Apache 2.0 License\n","Location: /Users/inflaton/anaconda3/envs/rapget/lib/python3.11/site-packages\n","Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, safetensors, tokenizers, tqdm\n","Required-by: peft\n","CPU times: user 7.77 ms, sys: 12.1 ms, total: 19.9 ms\n","Wall time: 1.86 s\n"]}],"source":["%%time\n","os.environ[\"TOKENIZERS_PARALLELISM\"] = \"true\"\n","\n","!python --version\n","!pip show torch transformers"]},{"cell_type":"code","execution_count":77,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1685,"status":"ok","timestamp":1720679531591,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"ZuS_FsLyZBrN","outputId":"2cba0105-c505-4395-afbd-2f2fee6581d0"},"outputs":[{"name":"stdout","output_type":"stream","text":["MPS is available\n"]}],"source":["from llm_toolkit.llm_utils import *\n","from llm_toolkit.translation_utils import *\n","\n","device = check_gpu()"]},{"cell_type":"code","execution_count":78,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 1133 entries, 0 to 1132\n","Data columns (total 20 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 chinese 1133 non-null object\n"," 1 english 1133 non-null object\n"," 2 Qwen/Qwen2-7B-Instruct/rpp-1.00 1133 non-null object\n"," 3 Qwen/Qwen2-7B-Instruct/rpp-1.02 1133 non-null object\n"," 4 Qwen/Qwen2-7B-Instruct/rpp-1.04 1133 non-null object\n"," 5 Qwen/Qwen2-7B-Instruct/rpp-1.06 1133 non-null object\n"," 6 Qwen/Qwen2-7B-Instruct/rpp-1.08 1133 non-null object\n"," 7 Qwen/Qwen2-7B-Instruct/rpp-1.10 1133 non-null object\n"," 8 Qwen/Qwen2-7B-Instruct/rpp-1.12 1133 non-null object\n"," 9 Qwen/Qwen2-7B-Instruct/rpp-1.14 1133 non-null object\n"," 10 Qwen/Qwen2-7B-Instruct/rpp-1.16 1133 non-null object\n"," 11 Qwen/Qwen2-7B-Instruct/rpp-1.18 1133 non-null object\n"," 12 Qwen/Qwen2-7B-Instruct/rpp-1.20 1133 non-null object\n"," 13 Qwen/Qwen2-7B-Instruct/rpp-1.22 1133 non-null object\n"," 14 Qwen/Qwen2-7B-Instruct/rpp-1.24 1133 non-null object\n"," 15 Qwen/Qwen2-7B-Instruct/rpp-1.26 1133 non-null object\n"," 16 Qwen/Qwen2-7B-Instruct/rpp-1.28 1133 non-null object\n"," 17 Qwen/Qwen2-7B-Instruct/rpp-1.30 1133 non-null object\n"," 18 internlm/internlm2_5-7b-chat-1m/rpp-1.00 1133 non-null object\n"," 19 internlm/internlm2_5-7b-chat-1m/rpp-1.02 1133 non-null object\n","dtypes: object(20)\n","memory usage: 177.2+ KB\n"]}],"source":["import pandas as pd\n","\n","df = pd.read_csv(results_path)\n","df.info()"]},{"cell_type":"code","execution_count":79,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Qwen/Qwen2-7B-Instruct/rpp-1.00: {'meteor': 0.37372392521659187, 'bleu_scores': {'bleu': 0.11236357019695803, 'precisions': [0.42194734753274243, 0.15166821289901386, 0.07014658562745799, 0.035509352410760864], 'brevity_penalty': 1.0, 'length_ratio': 1.0015236833388539, 'translation_length': 30236, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42297796931661225, 'rouge2': 0.17403983204578213, 'rougeL': 0.3658856686382874, 'rougeLsum': 0.3659204687398736}, 'accuracy': 0.00264783759929391, 'correct_ids': [240, 364, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.02: {'meteor': 0.37190869873532223, 'bleu_scores': {'bleu': 0.1100390286367209, 'precisions': [0.4184028352820377, 0.1497005988023952, 0.06846911369740376, 0.03418803418803419], 'brevity_penalty': 1.0, 'length_ratio': 1.0000331235508446, 'translation_length': 30191, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4195845948970141, 'rouge2': 0.17186391930180184, 'rougeL': 0.361164889670589, 'rougeLsum': 0.36115367651131036}, 'accuracy': 0.00176522506619594, 'correct_ids': [364, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.04: {'meteor': 0.37406550703189817, 'bleu_scores': {'bleu': 0.10956931974949688, 'precisions': [0.4130972529618539, 0.14825453685242135, 0.06855717197273174, 0.03432753888380604], 'brevity_penalty': 1.0, 'length_ratio': 1.0176879761510433, 'translation_length': 30724, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.41943813931860635, 'rouge2': 0.1731766218403924, 'rougeL': 0.36189598520170224, 'rougeLsum': 0.3622987164716138}, 'accuracy': 0.00176522506619594, 'correct_ids': [364, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.06: {'meteor': 0.3743075569159023, 'bleu_scores': {'bleu': 0.10936803300009316, 'precisions': [0.4205087175384205, 0.15056818181818182, 0.06837113699469907, 0.03391232423490488], 'brevity_penalty': 0.99358667697838, 'length_ratio': 0.9936071546869825, 'translation_length': 29997, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4188563626995033, 'rouge2': 0.17237953917851812, 'rougeL': 0.3614238400989537, 'rougeLsum': 0.36157826104604907}, 'accuracy': 0.00088261253309797, 'correct_ids': [364]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.08: {'meteor': 0.37389232127803795, 'bleu_scores': {'bleu': 0.11252598445639024, 'precisions': [0.41829608938547486, 0.14934862987663705, 0.0709559087966626, 0.03674127394743748], 'brevity_penalty': 0.9960837725634155, 'length_ratio': 0.9960914210003312, 'translation_length': 30072, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42031703148419475, 'rouge2': 0.17243974225774378, 'rougeL': 0.36154500391739963, 'rougeLsum': 0.36189135440678893}, 'accuracy': 0.00264783759929391, 'correct_ids': [364, 533, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.10: {'meteor': 0.3702585216722875, 'bleu_scores': {'bleu': 0.10710301916085879, 'precisions': [0.41596456758466815, 0.1461794019933555, 0.06721175665454021, 0.032895230942546004], 'brevity_penalty': 0.9946528632038626, 'length_ratio': 0.9946671083140113, 'translation_length': 30029, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.41782139222094783, 'rouge2': 0.17158596223024214, 'rougeL': 0.3589904425478737, 'rougeLsum': 0.359006959234046}, 'accuracy': 0.00264783759929391, 'correct_ids': [240, 364, 533]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.12: {'meteor': 0.3705755989089089, 'bleu_scores': {'bleu': 0.10258222363948062, 'precisions': [0.40718875371016666, 0.14041861410282463, 0.06307892790476526, 0.03070320237702212], 'brevity_penalty': 1.0, 'length_ratio': 1.015534945346141, 'translation_length': 30659, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4166572027441734, 'rouge2': 0.16822111899564623, 'rougeL': 0.357008902161872, 'rougeLsum': 0.35745653956369094}, 'accuracy': 0.00441306266548985, 'correct_ids': [240, 364, 533, 658, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.14: {'meteor': 0.3641110975372605, 'bleu_scores': {'bleu': 0.0990318198813296, 'precisions': [0.4006066338345129, 0.13617583310755893, 0.060538827258320126, 0.02912372079374977], 'brevity_penalty': 1.0, 'length_ratio': 1.0156011924478303, 'translation_length': 30661, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.41237689483692086, 'rouge2': 0.1647530979631625, 'rougeL': 0.3524579320803469, 'rougeLsum': 0.3525094464318763}, 'accuracy': 0.00353045013239188, 'correct_ids': [240, 364, 658, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.16: {'meteor': 0.36313903134249, 'bleu_scores': {'bleu': 0.0952870490417418, 'precisions': [0.3922494060232454, 0.13080998234098556, 0.05827562326869806, 0.027570548167369445], 'brevity_penalty': 1.0, 'length_ratio': 1.031666114607486, 'translation_length': 31146, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.40833803439030314, 'rouge2': 0.16062251994620244, 'rougeL': 0.3497483615160329, 'rougeLsum': 0.34978679608530117}, 'accuracy': 0.00353045013239188, 'correct_ids': [240, 364, 658, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.18: {'meteor': 0.3621942846022667, 'bleu_scores': {'bleu': 0.09476128635888595, 'precisions': [0.39506776127182697, 0.13002739911375708, 0.05743932465705241, 0.027328009377976407], 'brevity_penalty': 1.0, 'length_ratio': 1.0167605167273932, 'translation_length': 30696, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4085466936844559, 'rouge2': 0.16039337465336806, 'rougeL': 0.34699495728564167, 'rougeLsum': 0.3476358801921955}, 'accuracy': 0.00176522506619594, 'correct_ids': [364, 533]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.20: {'meteor': 0.3538173238431576, 'bleu_scores': {'bleu': 0.08968963366420402, 'precisions': [0.3857477091050887, 0.12459093822745522, 0.053809456994527854, 0.025021917007597896], 'brevity_penalty': 1.0, 'length_ratio': 1.019344153693276, 'translation_length': 30774, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4011401317225115, 'rouge2': 0.1560858750463962, 'rougeL': 0.34218460807223944, 'rougeLsum': 0.3425934037679277}, 'accuracy': 0.00264783759929391, 'correct_ids': [364, 658, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.22: {'meteor': 0.35181344675948434, 'bleu_scores': {'bleu': 0.08468174500037418, 'precisions': [0.3809662304087898, 0.11948208774989937, 0.04979253112033195, 0.022688496024975496], 'brevity_penalty': 1.0, 'length_ratio': 1.0250082808877112, 'translation_length': 30945, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.39827961553333474, 'rouge2': 0.15193285343874843, 'rougeL': 0.3372750185470212, 'rougeLsum': 0.33759743837835204}, 'accuracy': 0.00176522506619594, 'correct_ids': [364, 658]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.24: {'meteor': 0.3478194405208726, 'bleu_scores': {'bleu': 0.0805927787556035, 'precisions': [0.37261687333781923, 0.11477590105067163, 0.04729986525239263, 0.020855057351407715], 'brevity_penalty': 1.0, 'length_ratio': 1.033752898310699, 'translation_length': 31209, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.39685525200407556, 'rouge2': 0.15120218239789796, 'rougeL': 0.3358927244287901, 'rougeLsum': 0.3360244508591187}, 'accuracy': 0.00176522506619594, 'correct_ids': [364, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.26: {'meteor': 0.34377313642615803, 'bleu_scores': {'bleu': 0.07378753043886055, 'precisions': [0.3538180711895345, 0.10627973658505845, 0.04270683570775062, 0.018458823928607784], 'brevity_penalty': 1.0, 'length_ratio': 1.0887711162636635, 'translation_length': 32870, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3900349430194469, 'rouge2': 0.14626610466291398, 'rougeL': 0.3286622597539425, 'rougeLsum': 0.32885867843106675}, 'accuracy': 0.00353045013239188, 'correct_ids': [240, 364, 658, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.28: {'meteor': 0.34017553840969, 'bleu_scores': {'bleu': 0.07269823806123552, 'precisions': [0.350266309534805, 0.10201607758070691, 0.041866622538474264, 0.01867070109686071], 'brevity_penalty': 1.0, 'length_ratio': 1.0758860549850944, 'translation_length': 32481, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3878054004863958, 'rouge2': 0.14229440977177754, 'rougeL': 0.32657427170563236, 'rougeLsum': 0.3267111227947044}, 'accuracy': 0.00264783759929391, 'correct_ids': [240, 364, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.30: {'meteor': 0.3353695673104313, 'bleu_scores': {'bleu': 0.05830111152417765, 'precisions': [0.2870419881343417, 0.08188253801782905, 0.03320993325587051, 0.014801393728222997], 'brevity_penalty': 1.0, 'length_ratio': 1.300861212321961, 'translation_length': 39273, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.380210905537834, 'rouge2': 0.14103422879273908, 'rougeL': 0.3209978795799936, 'rougeLsum': 0.321330521060505}, 'accuracy': 0.00264783759929391, 'correct_ids': [364, 658, 659]}\n","internlm/internlm2_5-7b-chat-1m/rpp-1.00: {'meteor': 0.37152961221312103, 'bleu_scores': {'bleu': 0.09920863765540926, 'precisions': [0.3728006623887394, 0.13422469709949822, 0.06224842318785458, 0.031100006575054243], 'brevity_penalty': 1.0, 'length_ratio': 1.1201391189135474, 'translation_length': 33817, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4203311940833312, 'rouge2': 0.17021837671248571, 'rougeL': 0.3632707656424509, 'rougeLsum': 0.3630393657869949}, 'accuracy': 0.0, 'correct_ids': []}\n","internlm/internlm2_5-7b-chat-1m/rpp-1.02: {'meteor': 0.352901317633597, 'bleu_scores': {'bleu': 0.08697903417673139, 'precisions': [0.3666595931730682, 0.11979657185910718, 0.05260074213918365, 0.024771882392700235], 'brevity_penalty': 1.0, 'length_ratio': 1.0926465717124876, 'translation_length': 32987, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3998861989122014, 'rouge2': 0.15164946531097323, 'rougeL': 0.34028230510270174, 'rougeLsum': 0.3410649041040759}, 'accuracy': 0.00088261253309797, 'correct_ids': [511]}\n"]},{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>model</th>\n"," <th>rpp</th>\n"," <th>meteor</th>\n"," <th>bleu_1</th>\n"," <th>rouge_l</th>\n"," <th>ews_score</th>\n"," <th>repetition_score</th>\n"," <th>total_repetitions</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.00</td>\n"," <td>0.373724</td>\n"," <td>0.112364</td>\n"," <td>0.365886</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.02</td>\n"," <td>0.371909</td>\n"," <td>0.110039</td>\n"," <td>0.361165</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.04</td>\n"," <td>0.374066</td>\n"," <td>0.109569</td>\n"," <td>0.361896</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.06</td>\n"," <td>0.374308</td>\n"," <td>0.109368</td>\n"," <td>0.361424</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.08</td>\n"," <td>0.373892</td>\n"," <td>0.112526</td>\n"," <td>0.361545</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>5</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.10</td>\n"," <td>0.370259</td>\n"," <td>0.107103</td>\n"," <td>0.358990</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>6</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.12</td>\n"," <td>0.370576</td>\n"," <td>0.102582</td>\n"," <td>0.357009</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>7</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.14</td>\n"," <td>0.364111</td>\n"," <td>0.099032</td>\n"," <td>0.352458</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>8</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.16</td>\n"," <td>0.363139</td>\n"," <td>0.095287</td>\n"," <td>0.349748</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>9</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.18</td>\n"," <td>0.362194</td>\n"," <td>0.094761</td>\n"," <td>0.346995</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>10</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.20</td>\n"," <td>0.353817</td>\n"," <td>0.089690</td>\n"," <td>0.342185</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>11</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.22</td>\n"," <td>0.351813</td>\n"," <td>0.084682</td>\n"," <td>0.337275</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>12</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.24</td>\n"," <td>0.347819</td>\n"," <td>0.080593</td>\n"," <td>0.335893</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>13</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.26</td>\n"," <td>0.343773</td>\n"," <td>0.073788</td>\n"," <td>0.328662</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>14</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.28</td>\n"," <td>0.340176</td>\n"," <td>0.072698</td>\n"," <td>0.326574</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>15</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.30</td>\n"," <td>0.335370</td>\n"," <td>0.058301</td>\n"," <td>0.320998</td>\n"," <td>0.004413</td>\n"," <td>0.004413</td>\n"," <td>0.004413</td>\n"," </tr>\n"," <tr>\n"," <th>16</th>\n"," <td>internlm2_5-7b-chat-1m</td>\n"," <td>1.00</td>\n"," <td>0.371530</td>\n"," <td>0.099209</td>\n"," <td>0.363271</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>17</th>\n"," <td>internlm2_5-7b-chat-1m</td>\n"," <td>1.02</td>\n"," <td>0.352901</td>\n"," <td>0.086979</td>\n"," <td>0.340282</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" model rpp meteor bleu_1 rouge_l ews_score \\\n","0 Qwen2-7B-Instruct 1.00 0.373724 0.112364 0.365886 0.000000 \n","1 Qwen2-7B-Instruct 1.02 0.371909 0.110039 0.361165 0.000000 \n","2 Qwen2-7B-Instruct 1.04 0.374066 0.109569 0.361896 0.000000 \n","3 Qwen2-7B-Instruct 1.06 0.374308 0.109368 0.361424 0.000000 \n","4 Qwen2-7B-Instruct 1.08 0.373892 0.112526 0.361545 0.000000 \n","5 Qwen2-7B-Instruct 1.10 0.370259 0.107103 0.358990 0.000000 \n","6 Qwen2-7B-Instruct 1.12 0.370576 0.102582 0.357009 0.000000 \n","7 Qwen2-7B-Instruct 1.14 0.364111 0.099032 0.352458 0.000000 \n","8 Qwen2-7B-Instruct 1.16 0.363139 0.095287 0.349748 0.000000 \n","9 Qwen2-7B-Instruct 1.18 0.362194 0.094761 0.346995 0.000000 \n","10 Qwen2-7B-Instruct 1.20 0.353817 0.089690 0.342185 0.000000 \n","11 Qwen2-7B-Instruct 1.22 0.351813 0.084682 0.337275 0.000000 \n","12 Qwen2-7B-Instruct 1.24 0.347819 0.080593 0.335893 0.000000 \n","13 Qwen2-7B-Instruct 1.26 0.343773 0.073788 0.328662 0.000000 \n","14 Qwen2-7B-Instruct 1.28 0.340176 0.072698 0.326574 0.000000 \n","15 Qwen2-7B-Instruct 1.30 0.335370 0.058301 0.320998 0.004413 \n","16 internlm2_5-7b-chat-1m 1.00 0.371530 0.099209 0.363271 0.000000 \n","17 internlm2_5-7b-chat-1m 1.02 0.352901 0.086979 0.340282 0.000000 \n","\n"," repetition_score total_repetitions \n","0 0.000000 0.000000 \n","1 0.000000 0.000000 \n","2 0.000000 0.000000 \n","3 0.000000 0.000000 \n","4 0.000000 0.000000 \n","5 0.000000 0.000000 \n","6 0.000000 0.000000 \n","7 0.000000 0.000000 \n","8 0.000000 0.000000 \n","9 0.000000 0.000000 \n","10 0.000000 0.000000 \n","11 0.000000 0.000000 \n","12 0.000000 0.000000 \n","13 0.000000 0.000000 \n","14 0.000000 0.000000 \n","15 0.004413 0.004413 \n","16 0.000000 0.000000 \n","17 0.000000 0.000000 "]},"execution_count":79,"metadata":{},"output_type":"execute_result"}],"source":["metrics_df = get_metrics(df)\n","metrics_df"]},{"cell_type":"code","execution_count":82,"metadata":{},"outputs":[],"source":["col = \"Qwen/Qwen2-7B-Instruct/rpp-1.30\"\n","df[[\"ews_score\", \"repetition_score\", \"total_repetitions\"]] = df[col].apply(\n"," detect_scores\n",")"]},{"cell_type":"code","execution_count":88,"metadata":{},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>chinese</th>\n"," <th>english</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.00</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.02</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.04</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.06</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.08</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.10</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.12</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.14</th>\n"," <th>...</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.22</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.24</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.26</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.28</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.30</th>\n"," <th>internlm/internlm2_5-7b-chat-1m/rpp-1.00</th>\n"," <th>internlm/internlm2_5-7b-chat-1m/rpp-1.02</th>\n"," <th>ews_score</th>\n"," <th>repetition_score</th>\n"," <th>total_repetitions</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>905</th>\n"," <td>顾炎武举起酒杯,高声吟道:</td>\n"," <td>Gu Yanwu raised his wine-cup and, in ringing t...</td>\n"," <td>Ku Yanyu lifted his wine cup and recited aloud:</td>\n"," <td>Ku Yanyu lifted his wine cup and recited loudly:</td>\n"," <td>Ku Yanyu raised his cup and recited aloud:</td>\n"," <td>Ku Yanyu raised his wine cup and recited aloud:</td>\n"," <td>Ku Yanyu lifted his wine cup and recited loudly:</td>\n"," <td>Ku Yanyu raised his wine cup and recited aloud:</td>\n"," <td>Ku Yanyu raised his cup and recited loudly:\\n\\...</td>\n"," <td>Ku Yanyu lifted his wine cup and recited loudl...</td>\n"," <td>...</td>\n"," <td>Ku Yanyun raised his cup and recited loudly:\\n...</td>\n"," <td>Ku Yanyuan raised his cup and recited loudly:\\...</td>\n"," <td>Ku Yanyun raised his cup and recited aloud:</td>\n"," <td>Ku Yanyun raised his cup and recited aloud:</td>\n"," <td>Ku Yanyuan raised his cup and recited loudly:\\...</td>\n"," <td>Gu Yanwu raised his wine cup and recited loudly:</td>\n"," <td>Gu Yanwu raised his wine cup and recited loudly:</td>\n"," <td>5</td>\n"," <td>0</td>\n"," <td>5</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>1 rows × 23 columns</p>\n","</div>"],"text/plain":[" chinese english \\\n","905 顾炎武举起酒杯,高声吟道: Gu Yanwu raised his wine-cup and, in ringing t... \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.00 \\\n","905 Ku Yanyu lifted his wine cup and recited aloud: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.02 \\\n","905 Ku Yanyu lifted his wine cup and recited loudly: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.04 \\\n","905 Ku Yanyu raised his cup and recited aloud: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.06 \\\n","905 Ku Yanyu raised his wine cup and recited aloud: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.08 \\\n","905 Ku Yanyu lifted his wine cup and recited loudly: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.10 \\\n","905 Ku Yanyu raised his wine cup and recited aloud: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.12 \\\n","905 Ku Yanyu raised his cup and recited loudly:\\n\\... \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.14 ... \\\n","905 Ku Yanyu lifted his wine cup and recited loudl... ... \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.22 \\\n","905 Ku Yanyun raised his cup and recited loudly:\\n... \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.24 \\\n","905 Ku Yanyuan raised his cup and recited loudly:\\... \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.26 \\\n","905 Ku Yanyun raised his cup and recited aloud: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.28 \\\n","905 Ku Yanyun raised his cup and recited aloud: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.30 \\\n","905 Ku Yanyuan raised his cup and recited loudly:\\... \n","\n"," internlm/internlm2_5-7b-chat-1m/rpp-1.00 \\\n","905 Gu Yanwu raised his wine cup and recited loudly: \n","\n"," internlm/internlm2_5-7b-chat-1m/rpp-1.02 ews_score \\\n","905 Gu Yanwu raised his wine cup and recited loudly: 5 \n","\n"," repetition_score total_repetitions \n","905 0 5 \n","\n","[1 rows x 23 columns]"]},"execution_count":88,"metadata":{},"output_type":"execute_result"}],"source":["rows = df.query(\"ews_score > 0\")\n","rows"]},{"cell_type":"code","execution_count":92,"metadata":{},"outputs":[],"source":["row = rows.iloc[0]"]},{"cell_type":"code","execution_count":93,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["顾炎武举起酒杯,高声吟道:\n"]}],"source":["print(row[\"chinese\"])"]},{"cell_type":"code","execution_count":94,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Gu Yanwu raised his wine-cup and, in ringing tones, recited the following couplet:\n"]}],"source":["print(row[\"english\"])"]},{"cell_type":"code","execution_count":97,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Ku Yanyuan raised his cup and recited loudly:\n","\n","\"Under Heaven's vast expanse,\n","The world is full of myriad paths;\n","Yet every step I take leads me back home.\n","\n","I've sought solace amidst mountains high,\n","And found tranquility by rivers wide; \n","But my heart yearns for that familiar place.\"\n","\n","He then took a deep sip from his wine goblet before continuing with another verse:\n"," \n","\"In this mortal coil we wander on,\n","Seeking truth where'er our feet have gone;\n","Our souls forever roam like birds free,\n","\n","Yearning always after what was lost or missed,\n","In dreams as well as waking hours past.\"\n"," \n","With each word echoing through the hallways filled with ancient wisdom, Ku’s verses seemed to carry echoes beyond their literal meaning—touching upon themes deeply rooted within human experience: longing, memory, identity, and perhaps most importantly—the universal quest for belongingness amid life’s transient nature. \n","\n","His poetic outpourings resonated not just because they captured moments specific to one man but due to how universally relatable these sentiments truly were—to anyone who has ever felt displaced yet longed for connection, searching for roots amongst shifting sands of time. In essence, he articulated something profound about existence itself—a poignant reminder of humanity's common struggle against loneliness and dislocation even when surrounded by countless others seeking similar answers along parallel journeys across distant lands.\n"]}],"source":["print(row[\"Qwen/Qwen2-7B-Instruct/rpp-1.30\"])"]}],"metadata":{"accelerator":"GPU","application/vnd.databricks.v1+notebook":{"dashboards":[],"environmentMetadata":null,"language":"python","notebookMetadata":{"mostRecentlyExecutedCommandWithImplicitDF":{"commandId":-1,"dataframes":["_sqldf"]},"pythonIndentUnit":4},"notebookName":"10_eval-lf-medium-py3.11","widgets":{}},"colab":{"gpuType":"L4","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.9"}},"nbformat":4,"nbformat_minor":0}
|