from transformers import Pipeline class LangDetectionPipeline(Pipeline): def _sanitize_parameters(self, **kwargs): preprocess_kwargs = {} if "text" in kwargs: preprocess_kwargs["text"] = kwargs["text"] return preprocess_kwargs, {}, {} def preprocess(self, text, **kwargs): # Nothing to preprocess return text def _forward(self, text, **kwargs): predictions, probabilities = self.model(text.replace("\n", " ")) return predictions, probabilities def postprocess(self, outputs, **kwargs): predictions, probabilities = outputs label = predictions[0][0].replace("__label__", "") # Remove __label__ prefix confidence = float( probabilities[0][0] ) # Convert to float for JSON serialization # Format as JSON-compatible dictionary model_output = {"language": label, "score": round(confidence, 2)} return model_output