TestKMMLU / app.py
wabang's picture
Update app.py
3433e6f verified
import gradio as gr
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from huggingface_hub import login
import os
from datasets import load_dataset
import accelerate
# ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ ํ† ํฐ ๊ฐ€์ ธ์˜ค๊ธฐ
hf_token = os.environ.get("HF_TOKEN")
# Hugging Face ๋กœ๊ทธ์ธ
if hf_token:
login(token=hf_token, add_to_git_credential=True)
else:
print("HF_TOKEN ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ • ์˜ค๋ฅ˜")
# model, tokenizer ์…‹ํŒ…
model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, low_cpu_mem_usage=False ,token=hf_token)
#์ง€์šด ์˜ต์…˜: device_map="auto"
# KMMLU ๋ฐ์ดํ„ฐ์…‹ ๋กœ๋“œ
dataset = load_dataset("HAERAE-HUB/KMMLU", "Accounting")
#dataset = load_dataset("HAERAE-HUB/KMMLU")
df = dataset['test'].to_pandas()
def evaluate_model(question, choices):
prompt = f"์งˆ๋ฌธ: {question}\n\n์„ ํƒ์ง€:\n"
for i, choice in enumerate(choices):
prompt += f"{chr(65 + i)}. {choice}\n"
prompt += "\n๋‹ต๋ณ€:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(**inputs, max_new_tokens=1, temperature=0.1)
answer = tokenizer.decode(outputs[0][-1:], skip_special_tokens=True).strip()
return answer
def run_kmmlu_test():
correct = 0
total = len(df)
results = []
for _, row in df.iterrows():
question = row['question']
choices = [row['A'], row['B'], row['C'], row['D']]
correct_answer = row['answer']
model_answer = evaluate_model(question, choices)
is_correct = model_answer == correct_answer
if is_correct:
correct += 1
results.append(f"์งˆ๋ฌธ: {question}\n๋ชจ๋ธ ๋‹ต๋ณ€: {model_answer}\n์ •๋‹ต: {correct_answer}\n์ •ํ™•๋„: {'๋งž์Œ' if is_correct else 'ํ‹€๋ฆผ'}\n")
accuracy = correct / total
summary = f"์ „์ฒด ํ…Œ์ŠคํŠธ ๊ฒฐ๊ณผ\n์ •ํ™•๋„: {accuracy:.2%} ({correct}/{total})\n\n"
return summary + "\n".join(results)
iface = gr.Interface(
fn=run_kmmlu_test,
inputs=None,
#inputs=gr.Dropdown(choices=subjects, label="์ฃผ์ œ ์„ ํƒ"),
outputs="text",
title="Llama 3๋ฅผ ์ด์šฉํ•œ KMMLU ํ…Œ์ŠคํŠธ",
description="Accounting ์˜์—ญ์— ๋Œ€ํ•œ KMMLU ํ…Œ์ŠคํŠธ ์ˆ˜ํ–‰"
)
iface.launch(share=True)