from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline class EndpointHandler: def __init__(self, model_path: str, task="text-generation"): self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) self.model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True) self.pipe = pipeline(task=task, model=self.model, tokenizer=self.tokenizer) def __call__(self, inputs: dict) -> dict: prompt = inputs.get("inputs", "") params = inputs.get("parameters", {}) outputs = self.pipe(prompt, **params) return outputs