Spaces:
Build error
Build error
| from transformers import AutoTokenizer, AutoModelForMaskedLM | |
| from transformers import pipeline as hf_pipeline | |
| from pathlib import Path | |
| class NpcBertMLM(): | |
| r"""A class for performing masked language modeling with BERT. | |
| This class provides functionality to perform masked language modeling | |
| predictions using a BERT model fine-tuned on NPC staging reports. The | |
| base model used is an uncased model released by Microsoft, and it can be | |
| found on the Hugging Face model hub under the name | |
| 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext'. | |
| Attributes: | |
| model (transformers.PreTrainedModel): | |
| The fine-tuned BERT model. | |
| tokenizer (transformers.PreTrainedTokenizer): | |
| The tokenizer for the BERT model. | |
| pipeline (transformers.fill-mask): | |
| The Hugging Face fill-mask pipeline. | |
| pretrained_model (str): The path to | |
| the directory containing the fine-tuned model. | |
| """ | |
| def __init__(self): | |
| self.model = None | |
| self.tokenizer = None | |
| self.pipeline = None | |
| # relative to app.py | |
| self.pretrained_model = "./models/npc-bert-best" | |
| def load(self): | |
| """Loads the fine-tuned BERT model and related components. | |
| This method initializes the model, tokenizer, and pipeline for the | |
| masked language modeling tasks using the pre-trained weights from the | |
| specified directory. | |
| Raises: | |
| FileNotFoundError: If the pretrained model directory is not found. | |
| """ | |
| if not Path(self.pretrained_model).is_dir(): | |
| raise FileNotFoundError(f"Cannot found pretrained model at: {self.pretrained_model}") | |
| self.model = AutoModelForMaskedLM.from_pretrained(self.pretrained_model) | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.pretrained_model) | |
| self.pipeline = hf_pipeline("fill-mask", model=self.model, tokenizer=self.tokenizer, device='cpu') | |
| def __call__(self, *args): | |
| """Performs masked language modeling prediction. | |
| This method should be called only after the `load` method has been executed | |
| to ensure that the model and pipeline are properly initialized. It accepts | |
| arguments to pass to the Hugging Face fill-mask pipeline. | |
| Args: | |
| *args: Variable length argument list to pass to the pipeline. | |
| Returns: | |
| The output of the fill-mask pipeline. | |
| Raises: | |
| BrokenPipeError: If the model has not been loaded before calling this method. | |
| """ | |
| if self.pipeline is None: | |
| msg = "Model was not initialized, have you run load()?" | |
| raise BrokenPipeError(msg) | |
| pipe_out = self.pipeline(*args) | |
| # Just use the first output | |
| if not isinstance(pipe_out[0], dict): | |
| pipe_out = pipe_out[0] | |
| pipe_out = {oo['token_str']: oo['score'] for oo in pipe_out} | |
| return pipe_out | |