machine-translation

Build error

App Files Files Community

dh-mc commited on Sep 17, 2024

Commit

927a69c

1 Parent(s): 101f384

COMET eval results

Browse files

Files changed (7) hide show

.gitignore +1 -0
llm_toolkit/translation_utils.py +18 -7
logs/l40-1gpu-1.txt +0 -0
notebooks/00b_Data Analysis_Few_Shots.ipynb +0 -0
notebooks/00c_Data Analysis_Fine_Tuned.ipynb +0 -0
results/mac-results_few_shots_metrics.csv +2 -2
results/mac-results_fine_tuned_metrics.csv +2 -2

.gitignore CHANGED Viewed

@@ -150,3 +150,4 @@ dmypy.json
 /models
 /llama.cpp
 /llama-factory/config/models

 /models
 /llama.cpp
 /llama-factory/config/models
+/codedrive

llm_toolkit/translation_utils.py CHANGED Viewed

@@ -308,9 +308,17 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp", existing_metrics_df=N
         comet.append(metrics["comet"])
         meteor.append(metrics["meteor"])
-        spbleu.append(metrics["sacrebleu"]["score"])
-        bleu_1.append(metrics["bleu_scores"]["bleu"])
-        rouge_l.append(metrics["rouge_scores"]["rougeL"])
         df[["ews_score", "repetition_score", "total_repetitions"]] = df.apply(
             lambda x: detect_repetition_scores(x, col), axis=1
@@ -327,10 +335,10 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp", existing_metrics_df=N
         )
         new_col = f"count_chinese_characters-{col}"
-        df[new_col] = df[col].apply(count_chinese_characters)
-        translation_completeness.append(
-            1 - df[new_col].sum() / df["count_chinese_characters-ground_truth"].sum()
         )
         new_col = f"output_tokens-{col}"
         df[new_col] = df[col].apply(lambda x: len(tokenizers[model](x)["input_ids"]))
@@ -593,9 +601,12 @@ def process_log_file(log_file, total_entries, variant):
     shots = []
     eval_time = []
     with open(log_file, "r") as f:
         try:
             for line in f:
                 matches = time_pattern.search(line)
                 if matches:
                     time_pattern_matches = matches
@@ -614,7 +625,7 @@ def process_log_file(log_file, total_entries, variant):
                             convert_time_to_seconds(time_str) / total_entries
                         )
         except Exception as e:
-            print(f"Error processing log file: {log_file}")
             print(e)
     df = pd.DataFrame(

         comet.append(metrics["comet"])
         meteor.append(metrics["meteor"])
+        spbleu.append(
+            metrics["spbleu"] if "spbleu" in metrics else metrics["sacrebleu"]["score"]
+        )
+        bleu_1.append(
+            metrics["bleu_1"] if "bleu_1" in metrics else metrics["bleu_scores"]["bleu"]
+        )
+        rouge_l.append(
+            metrics["rouge_l"]
+            if "rouge_l" in metrics
+            else metrics["rouge_scores"]["rougeL"]
+        )
         df[["ews_score", "repetition_score", "total_repetitions"]] = df.apply(
             lambda x: detect_repetition_scores(x, col), axis=1
         )
         new_col = f"count_chinese_characters-{col}"
+        df[new_col] = df[col].apply(
+            lambda x: 1 if count_chinese_characters(x) > 0 else 0
         )
+        translation_completeness.append(1 - df[new_col].sum() / len(df))
         new_col = f"output_tokens-{col}"
         df[new_col] = df[col].apply(lambda x: len(tokenizers[model](x)["input_ids"]))
     shots = []
     eval_time = []
+    i = 0
     with open(log_file, "r") as f:
         try:
             for line in f:
+                i += 1
                 matches = time_pattern.search(line)
                 if matches:
                     time_pattern_matches = matches
                             convert_time_to_seconds(time_str) / total_entries
                         )
         except Exception as e:
+            print(f"Error processing log file: {log_file} at line {i}: {line}")
             print(e)
     df = pd.DataFrame(

logs/l40-1gpu-1.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

notebooks/00b_Data Analysis_Few_Shots.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

notebooks/00c_Data Analysis_Fine_Tuned.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

results/mac-results_few_shots_metrics.csv CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f1a365cbe33bfd36ebae3cb08e0dc4e3c1fe5d2dfbf9f05ddb14df4e5842cd7
-size 12417

 version https://git-lfs.github.com/spec/v1
+oid sha256:19773c5c30c728bc5645e233d3c1994c88519f293746ea7e211c25e1eed6c820
+size 13305

results/mac-results_fine_tuned_metrics.csv CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9debbd64265afdd7d85f030adb03733490e1c45e47501c28cd3e8d6fba3654cc
-size 12038

 version https://git-lfs.github.com/spec/v1
+oid sha256:7915d8409c510a055ddd8fbd0d904f8701a67f104919099992da3653a917e16f
+size 14988