from transformers import Pipeline


class LangDetectionPipeline(Pipeline):

    def _sanitize_parameters(self, **kwargs):
        preprocess_kwargs = {}
        if "text" in kwargs:
            preprocess_kwargs["text"] = kwargs["text"]
        return preprocess_kwargs, {}, {}

    def preprocess(self, text, **kwargs):
        # Nothing to preprocess
        return text

    def _forward(self, text, **kwargs):
        predictions, probabilities = self.model(text.replace("\n", " "))
        return predictions, probabilities

    def postprocess(self, outputs, **kwargs):
        predictions, probabilities = outputs
        label = predictions[0][0].replace("__label__", "")  # Remove __label__ prefix
        confidence = float(
            probabilities[0][0]
        )  # Convert to float for JSON serialization

        # Format as JSON-compatible dictionary
        model_output = {"language": label, "score": round(confidence, 2)}
        return model_output