Spaces:

inflaton-ai
/

logical-reasoning

Build error

App Files Files Community

inflaton commited on Jul 6, 2024

Commit

5002792

1 Parent(s): 36cb2cb

InternLM 2.5 results

Browse files

Files changed (15) hide show

competition/04_InternLM_T4.ipynb +0 -0
competition/05_InternLM_NV4080.ipynb +0 -0
llm_toolkit/{chat_mac.py → chat.py} +0 -0
llm_toolkit/{eval_mac.py → eval.py} +0 -0
llm_toolkit/eval_lf.py +110 -0
llm_toolkit/eval_logical_reasoning.py +73 -0
llm_toolkit/llm_utils.py +160 -0
llm_toolkit/logical_reasoning_utils.py +222 -0
llm_toolkit/{tune_mac.py → tune.py} +3 -4
novel-translation/00_Data_Analysis.ipynb +0 -0
novel-translation/07r2_tune-lf-py3.11.ipynb +0 -0
novel-translation/08r2_eval-lf-py3.11.ipynb +0 -0
novel-translation/09_tune-lf-medium-py3.11.ipynb +0 -0
results/mgtv-results.csv +0 -0
results/mgtv-results_nv4080.csv +0 -0

competition/04_InternLM_T4.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

competition/05_InternLM_NV4080.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

llm_toolkit/{chat_mac.py → chat.py} RENAMED Viewed

File without changes

llm_toolkit/{eval_mac.py → eval.py} RENAMED Viewed

File without changes

llm_toolkit/eval_lf.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import os
+import sys
+import torch
+from dotenv import find_dotenv, load_dotenv
+from llamafactory.chat import ChatModel
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+found_dotenv = find_dotenv(".env")
+if len(found_dotenv) == 0:
+    found_dotenv = find_dotenv(".env.example")
+print(f"loading env vars from: {found_dotenv}")
+load_dotenv(found_dotenv, override=False)
+path = os.path.dirname(found_dotenv)
+print(f"Adding {path} to sys.path")
+sys.path.append(path)
+from llm_toolkit.translation_utils import *
+model_name = os.getenv("MODEL_NAME")
+adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
+load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
+data_path = os.getenv("DATA_PATH")
+results_path = os.getenv("RESULTS_PATH")
+print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)
+def load_model(
+    model_name,
+    max_seq_length=2048,
+    dtype=torch.bfloat16,
+    load_in_4bit=False,
+    adapter_name_or_path=None,
+):
+    print(f"loading model: {model_name}")
+    if adapter_name_or_path:
+        template = "llama3" if "llama-3" in model_name.lower() else "chatml"
+        args = dict(
+            model_name_or_path=model_name,
+            adapter_name_or_path=adapter_name_or_path,  # load the saved LoRA adapters
+            template=template,  # same to the one in training
+            finetuning_type="lora",  # same to the one in training
+            quantization_bit=4 if load_in_4bit else None,  # load 4-bit quantized model
+        )
+        chat_model = ChatModel(args)
+        return chat_model.engine.model, chat_model.engine.tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=load_in_4bit,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_use_double_quant=False,
+        bnb_4bit_compute_dtype=dtype,
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        quantization_config=bnb_config,
+        torch_dtype=dtype,
+        trust_remote_code=True,
+        device_map="auto",
+    )
+    return model, tokenizer
+gpu_stats = torch.cuda.get_device_properties(0)
+start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
+max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
+print(f"(1) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
+print(f"{start_gpu_memory} GB of memory reserved.")
+model, tokenizer = load_model(
+    model_name, load_in_4bit=load_in_4bit, adapter_name_or_path=adapter_name_or_path
+)
+gpu_stats = torch.cuda.get_device_properties(0)
+start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
+max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
+print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
+print(f"{start_gpu_memory} GB of memory reserved.")
+datasets = load_translation_dataset(data_path, tokenizer)
+print("Evaluating model: " + model_name)
+predictions = eval_model(model, tokenizer, datasets["test"])
+gpu_stats = torch.cuda.get_device_properties(0)
+start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
+max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
+print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
+print(f"{start_gpu_memory} GB of memory reserved.")
+if adapter_name_or_path is not None:
+    model_name += "_" + adapter_name_or_path.split("/")[-1]
+save_results(
+    model_name,
+    results_path,
+    datasets["test"],
+    predictions,
+    debug=True,
+)
+metrics = calc_metrics(datasets["test"]["english"], predictions, debug=True)
+print(metrics)

llm_toolkit/eval_logical_reasoning.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import os
+import sys
+import torch
+from dotenv import find_dotenv, load_dotenv
+found_dotenv = find_dotenv(".env")
+if len(found_dotenv) == 0:
+    found_dotenv = find_dotenv(".env.example")
+print(f"loading env vars from: {found_dotenv}")
+load_dotenv(found_dotenv, override=False)
+path = os.path.dirname(found_dotenv)
+print(f"Adding {path} to sys.path")
+sys.path.append(path)
+from llm_toolkit.llm_utils import *
+from llm_toolkit.logical_reasoning_utils import *
+model_name = os.getenv("MODEL_NAME")
+adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
+load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
+data_path = os.getenv("LOGICAL_REASONING_DATA_PATH")
+results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
+print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)
+gpu_stats = torch.cuda.get_device_properties(0)
+start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
+max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
+print(f"(1) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
+print(f"{start_gpu_memory} GB of memory reserved.")
+model, tokenizer = load_model(
+    model_name, load_in_4bit=load_in_4bit, adapter_name_or_path=adapter_name_or_path
+)
+gpu_stats = torch.cuda.get_device_properties(0)
+start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
+max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
+print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
+print(f"{start_gpu_memory} GB of memory reserved.")
+datasets = load_logical_reasoning_dataset(data_path, tokenizer)
+if len(sys.argv) > 1:
+    num = int(sys.argv[1])
+    if num > 0:
+        print(f"--- evaluating {num} entries")
+        # create new dataset exluding those idx
+        datasets["test"] = datasets["test"].select(range(num))
+print("Evaluating model: " + model_name)
+predictions = eval_model(model, tokenizer, datasets["test"])
+gpu_stats = torch.cuda.get_device_properties(0)
+start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
+max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
+print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
+print(f"{start_gpu_memory} GB of memory reserved.")
+if adapter_name_or_path is not None:
+    model_name += "_" + adapter_name_or_path.split("/")[-1]
+save_results(
+    model_name,
+    results_path,
+    datasets["test"],
+    predictions,
+    debug=True,
+)
+metrics = calc_metrics(datasets["test"]["label"], predictions, debug=True)
+print(metrics)

llm_toolkit/llm_utils.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import os
+import re
+import sys
+import torch
+from llamafactory.chat import ChatModel
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextStreamer
+def load_model(
+    model_name,
+    max_seq_length=2048,
+    dtype=torch.bfloat16,
+    load_in_4bit=False,
+    adapter_name_or_path=None,
+):
+    print(f"loading model: {model_name}")
+    if adapter_name_or_path:
+        template = "llama3" if "llama-3" in model_name.lower() else "chatml"
+        args = dict(
+            model_name_or_path=model_name,
+            adapter_name_or_path=adapter_name_or_path,  # load the saved LoRA adapters
+            template=template,  # same to the one in training
+            finetuning_type="lora",  # same to the one in training
+            quantization_bit=4 if load_in_4bit else None,  # load 4-bit quantized model
+        )
+        chat_model = ChatModel(args)
+        return chat_model.engine.model, chat_model.engine.tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=load_in_4bit,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_use_double_quant=False,
+        bnb_4bit_compute_dtype=dtype,
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        quantization_config=bnb_config,
+        torch_dtype=dtype,
+        trust_remote_code=True,
+        device_map="auto",
+    )
+    return model, tokenizer
+def test_model(model, tokenizer, prompt):
+    inputs = tokenizer(
+        [prompt],
+        return_tensors="pt",
+    ).to("cuda")
+    text_streamer = TextStreamer(tokenizer)
+    _ = model.generate(
+        **inputs, max_new_tokens=2048, streamer=text_streamer, use_cache=True
+    )
+def extract_answer(text, debug=False):
+    if text:
+        # Remove the begin and end tokens
+        text = re.sub(
+            r".*?(assistant|\[/INST\]).+?\b", "", text, flags=re.DOTALL | re.MULTILINE
+        )
+        if debug:
+            print("--------\nstep 1:", text)
+        text = re.sub(r"<.+?>.*", "", text, flags=re.DOTALL | re.MULTILINE)
+        if debug:
+            print("--------\nstep 2:", text)
+        text = re.sub(
+            r".*?end_header_id\|>\n\n", "", text, flags=re.DOTALL | re.MULTILINE
+        )
+        if debug:
+            print("--------\nstep 3:", text)
+    return text
+def eval_model(model, tokenizer, eval_dataset):
+    total = len(eval_dataset)
+    predictions = []
+    for i in tqdm(range(total)):
+        inputs = tokenizer(
+            eval_dataset["prompt"][i : i + 1],
+            return_tensors="pt",
+        ).to("cuda")
+        outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)
+        decoded_output = tokenizer.batch_decode(outputs)
+        debug = i == 0
+        decoded_output = [
+            extract_answer(output, debug=debug) for output in decoded_output
+        ]
+        predictions.extend(decoded_output)
+    return predictions
+def save_model(
+    model,
+    tokenizer,
+    include_gguf=True,
+    include_merged=True,
+    publish=True,
+):
+    try:
+        token = os.getenv("HF_TOKEN") or None
+        model_name = os.getenv("MODEL_NAME")
+        save_method = "lora"
+        quantization_method = "q5_k_m"
+        model_names = get_model_names(
+            model_name, save_method=save_method, quantization_method=quantization_method
+        )
+        model.save_pretrained(model_names["local"])
+        tokenizer.save_pretrained(model_names["local"])
+        if publish:
+            model.push_to_hub(
+                model_names["hub"],
+                token=token,
+            )
+            tokenizer.push_to_hub(
+                model_names["hub"],
+                token=token,
+            )
+        if include_merged:
+            model.save_pretrained_merged(
+                model_names["local"] + "-merged", tokenizer, save_method=save_method
+            )
+            if publish:
+                model.push_to_hub_merged(
+                    model_names["hub"] + "-merged",
+                    tokenizer,
+                    save_method="lora",
+                    token="",
+                )
+        if include_gguf:
+            model.save_pretrained_gguf(
+                model_names["local-gguf"],
+                tokenizer,
+                quantization_method=quantization_method,
+            )
+            if publish:
+                model.push_to_hub_gguf(
+                    model_names["hub-gguf"],
+                    tokenizer,
+                    quantization_method=quantization_method,
+                    token=token,
+                )
+    except Exception as e:
+        print(e)

llm_toolkit/logical_reasoning_utils.py ADDED Viewed

	@@ -0,0 +1,222 @@

+import os
+import re
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+from datasets import load_dataset
+from llm_toolkit.llm_utils import extract_answer
+from tqdm import tqdm
+print(f"loading {__file__}")
+def calc_metrics(references, predictions, debug=False):
+    assert len(references) == len(
+        predictions
+    ), f"lengths are difference: {len(references)} != {len(predictions)}"
+    predictions = [extract_answer(text) for text in predictions]
+    correct = [1 if ref == pred else 0 for ref, pred in zip(references, predictions)]
+    accuracy = sum(correct) / len(references)
+    results = {"accuracy": accuracy}
+    if debug:
+        incorrect_ids = [i for i, c in enumerate(correct) if c == 0]
+        results["incorrect_ids"] = incorrect_ids
+    return results
+def save_results(model_name, results_path, dataset, predictions, debug=False):
+    if not os.path.exists(results_path):
+        # Get the directory part of the file path
+        dir_path = os.path.dirname(results_path)
+        # Create all directories in the path (if they don't exist)
+        os.makedirs(dir_path, exist_ok=True)
+        df = dataset.to_pandas()
+        df.drop(columns=["answer", "prompt", "train_text"], inplace=True)
+    else:
+        df = pd.read_csv(results_path, on_bad_lines="warn")
+    df[model_name] = predictions
+    if debug:
+        print(df.head(1))
+    df.to_csv(results_path, index=False)
+def load_logical_reasoning_dataset(data_path, tokenizer=None):
+    train_data_file = data_path + "/train.csv"
+    test_data_file = data_path + "/dev.csv"
+    print("loading train/test data files")
+    datasets = load_dataset(
+        "csv",
+        data_files={"train": train_data_file, "test": test_data_file},
+    )
+    if tokenizer:
+        reasoning_prompt = """你是一个逻辑游戏的主持人。游戏规则如下：
+1. 参与者会得到一个谜题。
+2. 参与者可以通过提问来获取线索，尝试解开谜题。
+3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。
+4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。
+5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。
+请严格按照这些规则回答参与者提出的问题。
+谜题: {}
+实际情况: {}
+参与者提出的问题: {}
+"""
+        def formatting_prompts_func(examples):
+            inputs = examples["text"]
+            outputs = examples["label"]
+            puzzles = examples["puzzle"]
+            truths = examples["truth"]
+            messages = [
+                {
+                    "role": "system",
+                    "content": "You are an expert in logical reasoning.",
+                },
+                None,
+            ]
+            model_name = os.getenv("MODEL_NAME")
+            if "mistral" in model_name.lower():
+                messages = messages[1:]
+            texts = []
+            prompts = []
+            for input, output,  puzzle, truth in zip(inputs, outputs, puzzles, truths):
+                prompt = reasoning_prompt.format(puzzle, truth, input)
+                messages[-1] = {"role": "user", "content": prompt}
+                prompt = tokenizer.apply_chat_template(
+                    messages, tokenize=False, add_generation_prompt=True
+                )
+                prompts.append(prompt)
+                texts.append(prompt + output + tokenizer.eos_token)
+            return {"train_text": texts, "prompt": prompts}
+        datasets = datasets.map(
+            formatting_prompts_func,
+            batched=True,
+        )
+    print(datasets)
+    return datasets
+def eval_model(model, tokenizer, eval_dataset):
+    total = len(eval_dataset)
+    predictions = []
+    for i in tqdm(range(total)):
+        inputs = tokenizer(
+            eval_dataset["prompt"][i : i + 1],
+            return_tensors="pt",
+        ).to("cuda")
+        outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)
+        decoded_output = tokenizer.batch_decode(outputs)
+        debug = i == 0
+        decoded_output = [
+            extract_answer(output, debug=debug) for output in decoded_output
+        ]
+        predictions.extend(decoded_output)
+    return predictions
+def save_model(
+    model,
+    tokenizer,
+    include_gguf=True,
+    include_merged=True,
+    publish=True,
+):
+    try:
+        token = os.getenv("HF_TOKEN") or None
+        model_name = os.getenv("MODEL_NAME")
+        save_method = "lora"
+        quantization_method = "q5_k_m"
+        model_names = get_model_names(
+            model_name, save_method=save_method, quantization_method=quantization_method
+        )
+        model.save_pretrained(model_names["local"])
+        tokenizer.save_pretrained(model_names["local"])
+        if publish:
+            model.push_to_hub(
+                model_names["hub"],
+                token=token,
+            )
+            tokenizer.push_to_hub(
+                model_names["hub"],
+                token=token,
+            )
+        if include_merged:
+            model.save_pretrained_merged(
+                model_names["local"] + "-merged", tokenizer, save_method=save_method
+            )
+            if publish:
+                model.push_to_hub_merged(
+                    model_names["hub"] + "-merged",
+                    tokenizer,
+                    save_method="lora",
+                    token="",
+                )
+        if include_gguf:
+            model.save_pretrained_gguf(
+                model_names["local-gguf"],
+                tokenizer,
+                quantization_method=quantization_method,
+            )
+            if publish:
+                model.push_to_hub_gguf(
+                    model_names["hub-gguf"],
+                    tokenizer,
+                    quantization_method=quantization_method,
+                    token=token,
+                )
+    except Exception as e:
+        print(e)
+def get_metrics(df):
+    metrics_df = pd.DataFrame(df.columns.T)[2:]
+    metrics_df.rename(columns={0: "model"}, inplace=True)
+    metrics_df["model"] = metrics_df["model"].apply(lambda x: x.split("/")[-1])
+    metrics_df.reset_index(inplace=True)
+    metrics_df = metrics_df.drop(columns=["index"])
+    accuracy = []
+    meteor = []
+    bleu_1 = []
+    rouge_l = []
+    all_metrics = []
+    for col in df.columns[2:]:
+        metrics = calc_metrics(df["english"], df[col], debug=True)
+        print(f"{col}: {metrics}")
+        accuracy.append(metrics["accuracy"])
+        all_metrics.append(metrics)
+    metrics_df["accuracy"] = accuracy
+    metrics_df["all_metrics"] = all_metrics
+    return metrics_df

llm_toolkit/{tune_mac.py → tune.py} RENAMED Viewed

@@ -22,7 +22,6 @@ model_name = os.getenv("MODEL_NAME")
 load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
 eval_base_model = os.getenv("EVAL_BASE_MODEL") == "true"
 eval_fine_tuned = os.getenv("EVAL_FINE_TUNED") == "true"
-do_fine_tuning = os.getenv("DO_FINE_TUNING") == "true"
 save_fine_tuned_model = os.getenv("SAVE_FINE_TUNED") == "true"
 num_train_epochs = int(os.getenv("NUM_TRAIN_EPOCHS") or 0)
 data_path = os.getenv("DATA_PATH")
@@ -42,7 +41,6 @@ print(
     data_path,
     results_path,
     eval_base_model,
-    do_fine_tuning,
     eval_fine_tuned,
     save_fine_tuned_model,
 )
@@ -84,8 +82,9 @@ print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
 print(f"{start_gpu_memory} GB of memory reserved.")
-if not do_fine_tuning:
-    sys.exit(0)
 trainer = load_trainer(
     model,

 load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
 eval_base_model = os.getenv("EVAL_BASE_MODEL") == "true"
 eval_fine_tuned = os.getenv("EVAL_FINE_TUNED") == "true"
 save_fine_tuned_model = os.getenv("SAVE_FINE_TUNED") == "true"
 num_train_epochs = int(os.getenv("NUM_TRAIN_EPOCHS") or 0)
 data_path = os.getenv("DATA_PATH")
     data_path,
     results_path,
     eval_base_model,
     eval_fine_tuned,
     save_fine_tuned_model,
 )
 print(f"{start_gpu_memory} GB of memory reserved.")
+def is_bfloat16_supported():
+    return True
 trainer = load_trainer(
     model,

novel-translation/00_Data_Analysis.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

novel-translation/07r2_tune-lf-py3.11.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

novel-translation/08r2_eval-lf-py3.11.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

novel-translation/09_tune-lf-medium-py3.11.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

results/mgtv-results.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

results/mgtv-results_nv4080.csv ADDED Viewed

The diff for this file is too large to render. See raw diff