#์ธํผ๋Ÿฐ์Šค ์ฝ”๋“œ ์˜ˆ์‹œ

import os import pandas as pd import torch from transformers import AutoTokenizer, AutoModelForCausalLM

1) ๊ฒฝ๋กœ ์„ค์ •

base_dir = '/content/drive/MyDrive/PEFT(Qwen3)' test_excel = os.path.join(base_dir, 'TEST_SET_CLEANED.xlsx') output_excel = os.path.join(base_dir, 'TEST_SET_CLEANED_FIN_2.xlsx')

2) ํ—ˆ๊น…ํŽ˜์ด์Šค ํ—ˆ๋ธŒ ๋ ˆํฌ ID (๋ถ„๋ฅ˜์šฉ ๋ชจ๋ธ๋กœ ๋ณ€๊ฒฝ)

model_id = "junghan/Qwen-3-8B-news-classification"

3) ๋ชจ๋ธ & ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ

tokenizer = AutoTokenizer.from_pretrained( model_id, use_fast=True, trust_remote_code=True ) model = AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map={"": "cuda"}, ) model.config.use_cache = True

inference_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

Instruction:

์•„๋ž˜ ๋‰ด์Šค๋ฅผ ์ฝ๊ณ  ๋ฐ˜๋“œ์‹œ '๊ฒฝ์ œ', '๊ธˆ๋ฆฌ', '์™ธํ™˜', ํ˜น์€ 'None' ์ค‘ ํ•˜๋‚˜๋กœ๋งŒ ๋ถ„๋ฅ˜ํ•˜์„ธ์š”. ๋งŒ์•ฝ ์„ธ ๊ฐ€์ง€ ์นดํ…Œ๊ณ ๋ฆฌ(๊ฒฝ์ œ, ๊ธˆ๋ฆฌ, ์™ธํ™˜)์— ํ•ด๋‹นํ•˜์ง€ ์•Š๋Š”๋‹ค๋ฉด ๋ฐ˜๋“œ์‹œ 'None'์œผ๋กœ ๋‹ต๋ณ€ํ•˜์„ธ์š”.

Question:

{question}

Response:

{cot} {category}"""

df = pd.read_excel(test_excel, engine='openpyxl') print(f"Loaded {len(df)} examples from {test_excel}")

def predict_label(text: str) -> str: # ๋‰ด์Šค ๋ณธ๋ฌธ(ํ˜น์€ ์ œ๋ชฉ+๋ณธ๋ฌธ ๋“ฑ)์„ question์— ๋„ฃ์Œ question = text.strip() prompt = inference_prompt_style.format(question) + tokenizer.eos_token

inputs = tokenizer(
    prompt,
    return_tensors='pt',
    truncation=True,
    max_length=2048
).to('cuda')
outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=10,  # ๋ถ„๋ฅ˜ ํƒœ์Šคํฌ์ด๋ฏ€๋กœ 10~20์ด๋ฉด ์ถฉ๋ถ„
    eos_token_id=tokenizer.eos_token_id,
    use_cache=True,
)
decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
# <think> ํƒœ๊ทธ ์ดํ›„์˜ ์ฒซ ์ค„(์ •๋‹ต)๋งŒ ์ถ”์ถœ
after_think = decoded.split("</think>")[-1].strip()
# ์ฒซ ์ค„๋งŒ ์ •๋‹ต์œผ๋กœ ์‚ฌ์šฉ
label = after_think.splitlines()[0].strip()
# ํ—ˆ์šฉ๋œ ๊ฐ’๋งŒ ๋ฐ˜ํ™˜
if label in ["๊ฒฝ์ œ", "๊ธˆ๋ฆฌ", "์™ธํ™˜", ""]:
    return label
# ์˜ˆ์™ธ์ ์œผ๋กœ ๋‹ค๋ฅธ ๊ฐ’์ด ๋‚˜์˜ค๋ฉด ๋นˆ ๋ฌธ์ž์—ด ๋ฐ˜ํ™˜
return ""

from concurrent.futures import ThreadPoolExecutor, as_completed from tqdm.auto import tqdm

THEME_HIST ์ปฌ๋Ÿผ๋ช…์— ๋งž๊ฒŒ ์ˆ˜์ • (์˜ˆ: 'TITLE'+'C_TEXT' ๋“ฑ)

text_col = "THEME_HIST" # ์‹ค์ œ ์ปฌ๋Ÿผ๋ช…์— ๋งž๊ฒŒ ์ˆ˜์ •

results = [] with ThreadPoolExecutor(max_workers=4) as executor: futures = {executor.submit(predict_label, row[text_col]): idx for idx, row in df.iterrows()} for future in tqdm(as_completed(futures), total=len(futures), desc="๋ถ„๋ฅ˜ ์ง„ํ–‰์ค‘"): idx = futures[future] try: results.append((idx, future.result())) except Exception as e: results.append((idx, "")) print(f"Error at idx {idx}: {e}")

๊ฒฐ๊ณผ๋ฅผ ์›๋ž˜ ์ˆœ์„œ๋Œ€๋กœ ์ •๋ ฌ

results.sort() labels = [label for idx, label in results] df['category'] = labels

df.to_excel(output_excel, index=False) print(f"๋ถ„๋ฅ˜ ๊ฒฐ๊ณผ๊ฐ€ {output_excel}์— ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")

[More Information Needed]

Framework versions

  • PEFT 0.16.0
Downloads last month
3
Inference Providers NEW
This model isn't deployed by any Inference Provider. ๐Ÿ™‹ Ask for provider support

Model tree for junghan/Qwen-3-8B-news-classification

Base model

Qwen/Qwen3-8B-Base
Finetuned
Qwen/Qwen3-8B
Adapter
(194)
this model