chatglm2-6b-int4 / handler.py
Linsad's picture
Create handler.py
41d4c26
raw
history blame
780 Bytes
from typing import Dict, List, Any
from transformers import AutoTokenizer, AutoModel
class EndpointHandler():
def __init__(self):
self.tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b-int4", trust_remote_code=True)
self.model = AutoModel.from_pretrained("THUDM/chatglm2-6b-int4", trust_remote_code=True).half().cuda()
self.model = self.model.eval()
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
data args:
inputs (:obj: `str`)
Return:
A :obj:`list` | `dict`: will be serialized and returned
"""
# get inputs
inputs = data.pop("inputs", data)
response, history = self.model.chat(self.tokenizer, inputs, history=[])
return [{'response': response, 'history': history}]