Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
def get_reader_llm(name="Qwen/Qwen2.5-3B-Instruct"): | |
READER_MODEL_NAME = name | |
# Для CPU-only лучше не использовать device_map | |
model = AutoModelForCausalLM.from_pretrained( | |
READER_MODEL_NAME, | |
torch_dtype=torch.float16, | |
low_cpu_mem_usage=True | |
) | |
tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME) | |
READER_LLM = pipeline( | |
model=model, | |
tokenizer=tokenizer, | |
task="text-generation", | |
# Убираем device, так как модель уже на CPU | |
do_sample=True, | |
temperature=0.2, | |
repetition_penalty=1.1, | |
return_full_text=False, | |
max_new_tokens=50 # Еще больше уменьшаем для надежности | |
) | |
return READER_LLM |