tourist_rag / reader_llm.py
Maria15's picture
Upload 13 files
e37fa39 verified
raw
history blame contribute delete
915 Bytes
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
def get_reader_llm(name="Qwen/Qwen2.5-3B-Instruct"):
READER_MODEL_NAME = name
# Для CPU-only лучше не использовать device_map
model = AutoModelForCausalLM.from_pretrained(
READER_MODEL_NAME,
torch_dtype=torch.float16,
low_cpu_mem_usage=True
)
tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)
READER_LLM = pipeline(
model=model,
tokenizer=tokenizer,
task="text-generation",
# Убираем device, так как модель уже на CPU
do_sample=True,
temperature=0.2,
repetition_penalty=1.1,
return_full_text=False,
max_new_tokens=50 # Еще больше уменьшаем для надежности
)
return READER_LLM