dh-mc commited on
Commit
927a69c
·
1 Parent(s): 101f384

COMET eval results

Browse files
.gitignore CHANGED
@@ -150,3 +150,4 @@ dmypy.json
150
  /models
151
  /llama.cpp
152
  /llama-factory/config/models
 
 
150
  /models
151
  /llama.cpp
152
  /llama-factory/config/models
153
+ /codedrive
llm_toolkit/translation_utils.py CHANGED
@@ -308,9 +308,17 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp", existing_metrics_df=N
308
 
309
  comet.append(metrics["comet"])
310
  meteor.append(metrics["meteor"])
311
- spbleu.append(metrics["sacrebleu"]["score"])
312
- bleu_1.append(metrics["bleu_scores"]["bleu"])
313
- rouge_l.append(metrics["rouge_scores"]["rougeL"])
 
 
 
 
 
 
 
 
314
 
315
  df[["ews_score", "repetition_score", "total_repetitions"]] = df.apply(
316
  lambda x: detect_repetition_scores(x, col), axis=1
@@ -327,10 +335,10 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp", existing_metrics_df=N
327
  )
328
 
329
  new_col = f"count_chinese_characters-{col}"
330
- df[new_col] = df[col].apply(count_chinese_characters)
331
- translation_completeness.append(
332
- 1 - df[new_col].sum() / df["count_chinese_characters-ground_truth"].sum()
333
  )
 
334
 
335
  new_col = f"output_tokens-{col}"
336
  df[new_col] = df[col].apply(lambda x: len(tokenizers[model](x)["input_ids"]))
@@ -593,9 +601,12 @@ def process_log_file(log_file, total_entries, variant):
593
  shots = []
594
  eval_time = []
595
 
 
 
596
  with open(log_file, "r") as f:
597
  try:
598
  for line in f:
 
599
  matches = time_pattern.search(line)
600
  if matches:
601
  time_pattern_matches = matches
@@ -614,7 +625,7 @@ def process_log_file(log_file, total_entries, variant):
614
  convert_time_to_seconds(time_str) / total_entries
615
  )
616
  except Exception as e:
617
- print(f"Error processing log file: {log_file}")
618
  print(e)
619
 
620
  df = pd.DataFrame(
 
308
 
309
  comet.append(metrics["comet"])
310
  meteor.append(metrics["meteor"])
311
+ spbleu.append(
312
+ metrics["spbleu"] if "spbleu" in metrics else metrics["sacrebleu"]["score"]
313
+ )
314
+ bleu_1.append(
315
+ metrics["bleu_1"] if "bleu_1" in metrics else metrics["bleu_scores"]["bleu"]
316
+ )
317
+ rouge_l.append(
318
+ metrics["rouge_l"]
319
+ if "rouge_l" in metrics
320
+ else metrics["rouge_scores"]["rougeL"]
321
+ )
322
 
323
  df[["ews_score", "repetition_score", "total_repetitions"]] = df.apply(
324
  lambda x: detect_repetition_scores(x, col), axis=1
 
335
  )
336
 
337
  new_col = f"count_chinese_characters-{col}"
338
+ df[new_col] = df[col].apply(
339
+ lambda x: 1 if count_chinese_characters(x) > 0 else 0
 
340
  )
341
+ translation_completeness.append(1 - df[new_col].sum() / len(df))
342
 
343
  new_col = f"output_tokens-{col}"
344
  df[new_col] = df[col].apply(lambda x: len(tokenizers[model](x)["input_ids"]))
 
601
  shots = []
602
  eval_time = []
603
 
604
+ i = 0
605
+
606
  with open(log_file, "r") as f:
607
  try:
608
  for line in f:
609
+ i += 1
610
  matches = time_pattern.search(line)
611
  if matches:
612
  time_pattern_matches = matches
 
625
  convert_time_to_seconds(time_str) / total_entries
626
  )
627
  except Exception as e:
628
+ print(f"Error processing log file: {log_file} at line {i}: {line}")
629
  print(e)
630
 
631
  df = pd.DataFrame(
logs/l40-1gpu-1.txt CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/00b_Data Analysis_Few_Shots.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/00c_Data Analysis_Fine_Tuned.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
results/mac-results_few_shots_metrics.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f1a365cbe33bfd36ebae3cb08e0dc4e3c1fe5d2dfbf9f05ddb14df4e5842cd7
3
- size 12417
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19773c5c30c728bc5645e233d3c1994c88519f293746ea7e211c25e1eed6c820
3
+ size 13305
results/mac-results_fine_tuned_metrics.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9debbd64265afdd7d85f030adb03733490e1c45e47501c28cd3e8d6fba3654cc
3
- size 12038
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7915d8409c510a055ddd8fbd0d904f8701a67f104919099992da3653a917e16f
3
+ size 14988