Model Details
Model Description
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.
Instruction:
μλ λ΄μ€λ₯Ό μ½κ³ 'κ²½μ ', 'κΈλ¦¬', 'μΈν' μ€ νλλ‘ λΆλ₯νμΈμ.
Question:
{}
Response:
{} {}""" -------------------------------------------------Inference Code
μΈνΌλ°μ€ μ½λ
import os import pandas as pd import torch from transformers import AutoTokenizer, AutoModelForCausalLM from tqdm.auto import tqdm import time
1) κ²½λ‘ μ€μ
base_dir = ## μ€μ test_excel = ## μ€μ output_excel = ## μ€μ
2) νκΉ νμ΄μ€ νλΈ λ ν¬ ID
model_id = ## μ€μ
3) λͺ¨λΈ & ν ν¬λμ΄μ λ‘λ
tokenizer = AutoTokenizer.from_pretrained( model_id, use_fast=True, trust_remote_code=True ) model = AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map={"": "cuda"}, # μ νλΌλ―Έν°λ₯Ό GPUλ‘λ§ λ°°μΉ # low_cpu_mem_usage=True, # (μ ν) λ©λͺ¨λ¦¬ μ¬μ©μ μ€μ΄λ λ‘λ μ΅μ ) model.config.use_cache = True
4) Inference ν둬ννΈ μ€νμΌ μ μ
inference_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.
Instruction:
μλ λ΄μ€λ₯Ό μ½κ³ 'κ²½μ ', 'κΈλ¦¬', 'μΈν' μ€ νλλ‘ λΆλ₯νμΈμ.
Question:
{}
Response:
{} {}"""5) ν μ€νΈμ λ‘λ
df = pd.read_excel(test_excel, engine='openpyxl') print(f"Loaded {len(df)} examples from {test_excel}")
6) μΈνΌλ°μ€ ν¨μ μμ : THEME_HISTλ§ μ λ ₯μΌλ‘ λ°μμ μμ½ μμ±
def predict_label(text: str) -> str: # THEME_HIST(λ΄μ€ λ³Έλ¬Έ)λ§ questionμ λ£μ΅λλ€ question = text.strip() # inference_prompt_styleμ questionλ§ μ²« λ²μ§Έ {}μ, λλ¨Έμ§ λ μ리λ λΉ λ¬Έμμ΄("")λ‘ μ±μμ€λλ€ prompt = inference_prompt_style.format(question, "", "") + tokenizer.eos_token
inputs = tokenizer(
prompt,
return_tensors='pt',
truncation=True,
max_length=2048
).to('cuda')
outputs = model.generate(
input_ids=inputs.input_ids,
attention_mask=inputs.attention_mask,
max_new_tokens=100,
eos_token_id=tokenizer.eos_token_id,
use_cache=True,
)
decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
# "### Response:" λ€μ ν
μ€νΈλ₯Ό μμ½μΌλ‘ κ°μ Έμ΅λλ€
summary = decoded.split("### Response:")[-1].strip()
return summary
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
7) μ§λ ¬ 루ν λμ ThreadPoolExecutorλ‘ λ³λ ¬ μΈνΌλ°μ€
import torch from concurrent.futures import ThreadPoolExecutor, as_completed
7-1) λͺ¨λ THEME_HISTλ₯Ό 미리 ν둬ννΈν
prompts = [ inference_prompt_style.format(row['THEME_HIST'].strip(), "", "") + tokenizer.eos_token for _, row in df.iterrows() ]
7-2) μ€λ λ λ¨μλ‘ ν κ±΄μ© infer μ€ν
def infer_one(prompt: str) -> str: # LangSmithμ βllmβ νμ μΌλ‘ run μμ± with trace(name="Qwen3-8B Summarization", run_type="llm", inputs={"prompt": prompt}) as run: start = time.time()
# ν ν¬λμ΄μ§
inputs_tok = tokenizer(
prompt,
return_tensors="pt",
truncation=True,
max_length=2048
).to("cuda")
input_tokens = inputs_tok.input_ids.numel()
# λͺ¨λΈ μμ±
outputs = model.generate(
input_ids=inputs_tok.input_ids,
attention_mask=inputs_tok.attention_mask,
max_new_tokens=100,
eos_token_id=tokenizer.eos_token_id,
use_cache=True,
)
output_tokens = outputs.sequences.shape[1]
# λμ½λ© λ° μμ½ μΆμΆ
decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
summary = decoded.split("### Response:")[-1].strip()
latency_ms = int((time.time() - start) * 1000)
# λ©νλ°μ΄ν° κΈ°λ‘
run.metadata["input_tokens"] = int(input_tokens)
run.metadata["output_tokens"] = int(output_tokens)
run.metadata["latency_ms"] = latency_ms
# κ²°κ³Ό μ μ₯ λ° run μ’
λ£
run.end(outputs={"summary": summary})
return summary
7-6) νμ²λ¦¬: νκ·Έ λ€λ§ λ¨κΈ°κΈ°
df['summary'] = ( df['summary'] .astype(str) .str.split(r'', n=1) .str[-1] .str.strip() )
8) μμ μ μ₯μ½λ λ³λ μμ± εΏ
Framework versions
- PEFT 0.15.2
- Downloads last month
- 3