--- base_model: - knowledgator/gliclass-large-v3.0 pipeline_tag: text-classification library_name: onnx tags: - onnx --- Original model [here](https://huggingface.co/knowledgator/gliclass-large-v3.0) Code for converting to onnx and quantizing [here](https://gist.github.com/cnmoro/3c66de4f92716e8cf044e550a23ee9d2) Dependencies: ```plaintext pip install huggingface-hub onnx onnxruntime numpy tokenizers ``` Inference code: ```python from huggingface_hub import hf_hub_download from tokenizers import Tokenizer import onnxruntime as ort import numpy as np class GLiClassOnnxInference: def __init__(self, model_id: str, use_int8_quant: bool = False): self.onnx_runtime_session = ort.InferenceSession( hf_hub_download(repo_id=model_id, filename="model_i8.onnx" if use_int8_quant else "model.onnx") ) self.tokenizer = Tokenizer.from_file( hf_hub_download(repo_id=model_id, filename="tokenizer.json") ) def encode(self, text: str, max_length: int = 512, pad: bool = True): encoded = self.tokenizer.encode(text) ids = encoded.ids mask = encoded.attention_mask if pad and len(ids) < max_length: pad_len = max_length - len(ids) ids += [self.tokenizer.token_to_id("[PAD]")] * pad_len mask += [0] * pad_len ids = ids[:max_length] mask = mask[:max_length] return np.array([ids], dtype=np.int64), np.array([mask], dtype=np.int64) def onnx_predict(self, text: str, labels: list[str]): full_text = "".join([f"<