import * as sdk from 'microsoft-cognitiveservices-speech-sdk';
import dotenv from 'dotenv';

dotenv.config();

// Hardcoded values for Azure Speech service
const SPEECH_KEY = "dd32a86f9b52496b9504bddadb2a5602";
const SPEECH_REGION = "westeurope";

// Example mapping from Azure numeric viseme IDs to custom character values
const azureVisemeToCharacterMap = {
    0: "X", // Silence
    1: "C", // æ, ə, ʌ (e.g., cat, ago, cup)
    2: "C", // ɑ (e.g., father)
    3: "C", // ɔ (e.g., thought)
    4: "C", // ɛ, ʊ (e.g., bed, book)
    5: "C", // ɝ (e.g., bird)
    6: "C", // j, i, ɪ (e.g., yes, see, sit)
    7: "C", // w, u (e.g., wit, too)
    8: "C", // o (e.g., go)
    9: "C", // aʊ (e.g., now)
    10: "C", // ɔɪ (e.g., boy)
    11: "C", // aɪ (e.g., my)
    12: "D", // h (e.g., he)
    13: "D", // ɹ (e.g., red)
    14: "D", // l (e.g., lie)
    15: "D", // s, z (e.g., see, zoo)
    16: "D", // ʃ, tʃ, dʒ, ʒ (e.g., she, chin, gin)
    17: "D", // ð (e.g., the)
    18: "D", // f, v (e.g., fee, vie) - Could also be a separate category like "F"
    19: "D", // d, t, n, θ (e.g., do, to, no, thin)
    20: "D", // k, g, ŋ (e.g., koo, go, sing)
    21: "B"  // p, b, m (e.g., pie, buy, my)
};

async function synthesizeSpeechWithVisemes(textToSpeak, language = "pt-BR", voiceName = null) {
    // Remove the check for environment variables since we're using hardcoded values
    const speechConfig = sdk.SpeechConfig.fromSubscription(SPEECH_KEY, SPEECH_REGION);
    speechConfig.speechSynthesisLanguage = language;

    if (voiceName) {
        speechConfig.speechSynthesisVoiceName = voiceName;
    }

    // Synthesize to memory buffer directly.
    // If audioConfig is null, SpeechSynthesizer returns audio data in result.audioData (ArrayBuffer)
    const synthesizer = new sdk.SpeechSynthesizer(speechConfig, null);

    const visemesData = [];

    synthesizer.visemeReceived = (s, e) => {
        // Audio offset is in 100-nanosecond ticks.
        visemesData.push({ id: e.visemeId, audioOffsetTicks: e.audioOffset });
        // console.log(`Viseme received: ID=${e.visemeId}, Audio offset: ${(e.audioOffset / 10000).toFixed(2)} ms, Animation: ${e.animation}`);
    };

    console.log(`Synthesizing text: '${textToSpeak}' in ${language}${voiceName ? ' using voice ' + voiceName : ''}`);

    return new Promise((resolve, reject) => {
        synthesizer.speakTextAsync(
            textToSpeak,
            result => {
                synthesizer.close();
                if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
                    console.log("Speech synthesized successfully.");
                    const mouthCues = [];
                    if (visemesData.length > 0 && result.audioDuration) {
                        const totalAudioDurationSecs = result.audioDuration / 10000000.0;
                        for (let i = 0; i < visemesData.length; i++) {
                            const currentViseme = visemesData[i];
                            const startSecs = currentViseme.audioOffsetTicks / 10000000.0;
                            let endSecs;
                            if (i < visemesData.length - 1) {
                                endSecs = visemesData[i + 1].audioOffsetTicks / 10000000.0;
                            } else {
                                endSecs = totalAudioDurationSecs;
                            }
                            if (endSecs <= startSecs) {
                                endSecs = startSecs + 0.01; // Minimal duration
                            }
                            mouthCues.push({
                                start: Number(startSecs.toFixed(2)),
                                end: Number(endSecs.toFixed(2)),
                                value: azureVisemeToCharacterMap[currentViseme.id] || "X"
                            });
                        }
                    }
                    resolve({
                        audioData: Buffer.from(result.audioData), // Convert ArrayBuffer to Node.js Buffer
                        mouthCues: mouthCues,
                        audioDurationTicks: result.audioDuration
                    });
                } else if (result.reason === sdk.ResultReason.Canceled) {
                    const cancellation = sdk.CancellationDetails.fromResult(result);
                    console.error(`Speech synthesis CANCELED: Reason=${cancellation.reason}`);
                    if (cancellation.reason === sdk.CancellationReason.Error) {
                        console.error(`CANCELED: ErrorCode=${cancellation.ErrorCode}`);
                        console.error(`CANCELED: ErrorDetails=[${cancellation.errorDetails}]`);
                    }
                    reject(new Error(`Speech synthesis CANCELED: ${cancellation.reason} - ${cancellation.errorDetails}`));
                }
            },
            error => {
                synthesizer.close();
                console.error(`Speech synthesis error: ${error}`);
                reject(new Error(`Speech synthesis error: ${error}`));
            }
        );
    });
}

export { synthesizeSpeechWithVisemes };

// Example usage (for testing this module directly):
/*
async function test() {
    if (require.main === module) { // Only run if executed directly
        const TEXT_TO_SPEAK = "Olá mundo, isto é um teste de síntese de voz com visemas.";
        const LANGUAGE = "pt-BR";
        // const VOICE = "pt-BR-FranciscaNeural"; // Example voice
        try {
            console.log("Attempting Azure TTS test...");
            const result = await synthesizeSpeechWithVisemes(TEXT_TO_SPEAK, LANGUAGE);
            console.log("--- Mouth Cues JSON ---");
            console.log(JSON.stringify({ mouthCues: result.mouthCues }, null, 2));
            console.log("Audio data length (bytes):", result.audioData.length);
            console.log("Audio duration (ticks):", result.audioDurationTicks);
            // To save to file for testing:
            // import fs from 'fs';
            // fs.writeFileSync('test_output.wav', result.audioData);
            // console.log('Audio saved to test_output.wav');
        } catch (error) {
            console.error("Test function error:", error);
        }
    }
}

test();
*/