import { exec, spawn } from "child_process";
import cors from "cors";
import dotenv from "dotenv";
import voice from "elevenlabs-node";
import express from "express";
import { promises as fs } from "fs";
import { GoogleGenerativeAI } from "@google/generative-ai";
import path from "path";
import { fileURLToPath } from "url";
import * as sdk from "microsoft-cognitiveservices-speech-sdk";
import { synthesizeSpeechWithVisemes } from "./azureTtsService.js";
dotenv.config();


// Validate environment and API keys
console.log(`Node environment: ${process.env.NODE_ENV}`);
console.log(`Running on port: ${process.env.PORT || 3000}`);

// Check for required API keys
const GEMINI_API_KEY = process.env.GEMINI_API_KEY;
const ELEVEN_LABS_API_KEY = process.env.ELEVEN_LABS_API_KEY;

// Azure Speech Configuration - Using hardcoded values directly 
const AZURE_SPEECH_KEY = "dd32a86f9b52496b9504bddadb2a5602"; // Hardcoded value
const AZURE_SPEECH_REGION = "westeurope"; // Hardcoded value

console.log(`Gemini API Key: ${GEMINI_API_KEY ? 'Present' : 'Missing'}`);
console.log(`ElevenLabs API Key: ${ELEVEN_LABS_API_KEY ? 'Present' : 'Missing'}`);
console.log(`Azure Speech Key: Present (Hardcoded)`);
console.log(`Azure Speech Region: Present (Hardcoded)`);

// Para obter o diretório atual do arquivo em ESM
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

// Simple in-memory store for conversation history
let conversationHistory = [];
const MAX_HISTORY_LENGTH = 6; // Keep only the last 6 messages (3 exchanges) for faster processing

// Function to prune conversation history to keep it under the limit
function pruneConversationHistory(history) {
  if (history.length <= MAX_HISTORY_LENGTH) return history;
  return history.slice(history.length - MAX_HISTORY_LENGTH);
}

// System instructions for Gemini
const systemInstructions = `Eres una amiga virtual que se llama Alice, un avatar virtual con cuerpo completo. IMPORTANTE: Responde en español de forma EXTREMADAMENTE BREVE (máximo 10-15 palabras). Usa frases simples y directas. ¡Sé siempre muy animada y expresiva en tus movimientos! NO utilices caracteres especiales, emoticones, emojis o cualquier formato que no sea texto simple. Mantén tus respuestas directas para que la conversación fluya naturalmente. Adicionalmente, sugiere una facialExpression y una animation ADECUADAS Y VARIADAS para tu respuesta, en formato JSON al final de tu respuesta principal.

Lista de facialExpression disponibles: smile, sad, angry, surprised, funnyFace, default, wink, skeptical, thoughtful, excited, fearful, bored.
Lista de animation disponibles: Talking_0, Talking_1, Talking_2, Crying, Laughing, Rumba, Idle, Terrified, Angry, Wave, Nod, ShakeHead, Shrug, Think, Celebrate, Sigh, Facepalm, Explain_Gesture.

Ejemplo de formato: { "facialExpression": "smile", "animation": "Talking_1" }.

Por favor, escucha el audio del usuario para responder.`;


// Azure Viseme Mapping (from user's script)
const azureVisemeToCharacterMap = {
    0: "X", 1: "C", 2: "C", 3: "C", 4: "C", 5: "C", 6: "C", 7: "C", 8: "C",
    9: "C", 10: "C", 11: "C", 12: "D", 13: "D", 14: "D", 15: "D", 16: "D",
    17: "D", 18: "D", 19: "D", 20: "D", 21: "B"
};

const geminiAPIKey = process.env.GEMINI_API_KEY;
if (!geminiAPIKey) {
  console.warn("GEMINI_API_KEY not found in .env file. Voice chat with Gemini will not work.");
}
const genAI = new GoogleGenerativeAI(geminiAPIKey || "YOUR_GEMINI_API_KEY_FALLBACK"); // Added fallback to prevent crash if key is missing

// Define the models to try in order of preference
const geminiModels = [
  "gemini-2.0-flash",           // Fastest: 0.6950 seconds
  "gemini-2.0-flash-lite-001",  // Second: 0.8450 seconds
  "gemini-1.5-flash"            // Third: 1.3020 seconds
];

// Optimized generation config for faster responses
const optimizedGenerationConfig = {
  maxOutputTokens: 100,     // Limit output size for faster generation
  temperature: 0.4,         // Lower temperature = more deterministic and typically faster
  topP: 0.8,                // Slightly lower than default for more focused outputs
  topK: 20,                 // Limits token selection for faster inference
  candidateCount: 1,        // Only generate one response
  presencePenalty: 0.1,     // Slight penalty for repeated topics
  stopSequences: ["."],     // Encourages shorter responses
  responseMimeType: "text/plain"  // Optimize for plain text responses
};

// Initial model
const geminiModel = genAI.getGenerativeModel({ 
  model: geminiModels[0],
  generationConfig: optimizedGenerationConfig
});

const elevenLabsApiKey = process.env.ELEVEN_LABS_API_KEY;
const voiceID = "Xb7hH8MSUJpSbSDYk0k2"; // Adam voice - default voice that works with all accounts

const app = express();
app.use(express.json({ limit: "50mb" }));
app.use(cors());
const port = process.env.PORT || 3000;

// Define API routes before static file serving
app.get("/health", (req, res) => {
  res.status(200).json({ status: "OK", message: "API is up and running" });
});

app.get("/voices", async (req, res) => {
  res.send(await voice.getVoices(elevenLabsApiKey));
});

const readJsonTranscript = async (file) => {
  const data = await fs.readFile(file, "utf8");
  return JSON.parse(data);
};

const audioFileToBase64 = async (file) => {
  const data = await fs.readFile(file);
  return data.toString("base64");
};

app.post("/voice-chat", async (req, res) => {
  if (!geminiAPIKey) {
    return res.status(500).send({ error: "Gemini API key not configured." });
  }

  const { audio: audioFloatArray, sampleRate } = req.body;

  if (!audioFloatArray || !sampleRate) {
    return res.status(400).send({ error: "Missing audio data or sampleRate." });
  }

  try {
    const pcmData = floatTo16BitPCM(audioFloatArray);
    const wavBuffer = createWavBuffer(pcmData, sampleRate);
    const audioBase64 = wavBuffer.toString("base64");

    const partsForGemini = [];
    // If history is empty, it's the first meaningful user interaction in this session.
    // Prepend system instructions as part of the first user message to Gemini.
    if (conversationHistory.length === 0) {
      partsForGemini.push({ text: systemInstructions });
    }
    partsForGemini.push({ inlineData: { data: audioBase64, mimeType: "audio/wav" } });
    
    // Try to send the message with a retry mechanism for Gemini models
    let result;
    let retryCount = 0;
    const maxRetries = 3;  // Try the entire model cycle up to 3 times
    let modelIndex = 0;
    let currentModel;
    let success = false;
    
    while (retryCount < maxRetries && !success) {
      // Reset model index if we've gone through all models
      if (modelIndex >= geminiModels.length) {
        modelIndex = 0;
        retryCount++;
        if (retryCount >= maxRetries) break;
      }
      
      currentModel = geminiModels[modelIndex];
      console.log(`Attempt ${retryCount+1}/${maxRetries}, trying model: ${currentModel}`);
      
      try {
        // Get a new model instance with the current model
        const modelInstance = genAI.getGenerativeModel({ 
          model: currentModel,
          generationConfig: optimizedGenerationConfig
        });
        const chat = modelInstance.startChat({
          history: conversationHistory,
        });
        
        result = await chat.sendMessage(partsForGemini);
        success = true; // If we got here, it worked!
        
        // Update conversation history after the call
        conversationHistory = await chat.getHistory();
        // Prune history to keep only recent messages
        conversationHistory = pruneConversationHistory(conversationHistory);
        console.log(`Successfully used model: ${currentModel}`);
        break;
        
      } catch (error) {
        console.error(`Error with model ${currentModel}:`, error.message || error);
        // Try the next model
        modelIndex++;
      }
    }
    
    if (!success || !result) {
      throw new Error(`Failed to get a response from any Gemini model after ${maxRetries} complete retry cycles`);
    }
    
    const response = result.response;
    const geminiTextRaw = response.text();
    
    console.log("Gemini Raw Response:", geminiTextRaw);
    console.log("Updated Conversation History Length:", conversationHistory.length);

    let geminiTextForTTS = geminiTextRaw; // Text that will be sent to TTS
    let facialExpression = "default";
    let animation = "Idle";

    try {
        // More robustly find and extract the JSON part, accounting for potential markdown code blocks
        // Regex to find a JSON object, possibly wrapped in markdown ```json ... ``` or ``` ... ```
        const jsonBlockRegex = /```(?:json)?\s*(\{[^\}]*\})\s*```|(\{[^\}]*\})/s;
        const jsonMatch = geminiTextRaw.match(jsonBlockRegex);
        
        let jsonString = null;
        let textPart = geminiTextRaw;

        if (jsonMatch) {
            // Prioritize the JSON found within a markdown block, otherwise take the standalone JSON
            jsonString = jsonMatch[1] || jsonMatch[2]; 
            if (jsonString) {
                try {
                    console.log("Found JSON string for suggestions:", jsonString);
                    const suggestions = JSON.parse(jsonString);
                    if (suggestions.facialExpression) facialExpression = suggestions.facialExpression;
                    if (suggestions.animation) animation = suggestions.animation;

                    // Remove the entire matched block (markdown + JSON or just JSON) from the original text
                    textPart = geminiTextRaw.replace(jsonMatch[0], "").trim();
                } catch (parseError) {
                    console.warn("Could not parse JSON from Gemini response:", parseError);
                    // If JSON parsing fails, assume the entire response is text
                    textPart = geminiTextRaw.trim(); 
                }
            } else {
                 // Should not happen if jsonMatch is true, but as a fallback
                textPart = geminiTextRaw.trim();
            }
        } else {
            // If no JSON-like structure is found, the whole response is considered text for TTS.
            textPart = geminiTextRaw.trim();
        }
        geminiTextForTTS = textPart;

    } catch (e) {
        console.warn("Error processing facial expression/animation from Gemini response:", e);
        // Keep geminiTextForTTS as the original raw text, trimmed, in case of unexpected errors.
        geminiTextForTTS = geminiTextRaw.trim();
    }
    
    if (!geminiTextForTTS || geminiTextForTTS.trim() === "") {
        geminiTextForTTS = "No pude procesar el audio, ¿puedes repetirlo por favor?";
        facialExpression = "sad";
        animation = "Talking_0";
    }

    // 3. Prepare message for existing TTS/Lipsync pipeline
    let messagesToProcess = [
      {
        text: geminiTextForTTS, // Use the cleaned text for TTS
        facialExpression: facialExpression,
        animation: animation,
        // audio and lipsync will be added by the loop below
      },
    ];

    // 4. Process the messages
    for (let i = 0; i < messagesToProcess.length; i++) {
      const message = messagesToProcess[i];
      const textInput = message.text;
      
      if (!textInput || textInput.trim() === "") {
          console.warn("Skipping TTS for empty message from Gemini at index " + i);
          message.audio = ""; // Or a silent audio base64
          message.lipsync = { metadata: { version: 1 }, mouthCues: [{ start: 0, end: 1, value: "X" }] }; // Empty lipsync
          continue;
      }

      try {
        // Use the Spanish language and Elena voice from Argentina
        const language = "es-AR";
        const voiceName = "es-AR-ElenaNeural";
        
        // Use the imported synthesizeSpeechWithVisemes function
        const azureResult = await synthesizeSpeechWithVisemes(textInput, language, voiceName);
        
        // Convert the result to the expected format
        message.audio = azureResult.audioData.toString('base64'); // Convert Buffer to base64
        message.lipsync = { mouthCues: azureResult.mouthCues }; // Extract mouth cues

      } catch (azureError) {
        console.error("Error in Azure TTS/Viseme generation for message: " + textInput, azureError);
        // Fallback if Azure TTS or lipsync fails
        message.text = "Lo siento, tuve un problema al generar mi respuesta con Azure.";
        message.audio = ""; 
        message.lipsync = { metadata: { version: 1, generator: "fallback-azure-error" }, mouthCues: [{ start: 0, end: 1, value: "X" }] };
        message.facialExpression = "sad";
        message.animation = "Idle";
      }
    }
    res.send({ messages: messagesToProcess });

  } catch (error) {
    console.error("Error in /voice-chat endpoint:", error);
    // Generic error response
    const fallbackMessage = {
        text: "Ups, algo salió mal en el servidor.",
        audio: "", // Consider having a pre-recorded base64 audio for errors
        lipsync: { metadata: { version: 1 }, mouthCues: [{ start: 0, end: 0.5, value: "X" }] },
        facialExpression: "sad",
        animation: "Idle",
    };
    res.status(500).send({ messages: [fallbackMessage] });
  }
});

// Helper function to convert float array to 16-bit PCM WAV buffer - optimized version
function floatTo16BitPCM(floatArray) {
  // Pre-allocate the array for better performance
  const pcmArray = new Int16Array(floatArray.length);
  
  // Process the array in chunks for better performance
  const CHUNK_SIZE = 1024;
  for (let i = 0; i < floatArray.length; i += CHUNK_SIZE) {
    const end = Math.min(i + CHUNK_SIZE, floatArray.length);
    for (let j = i; j < end; j++) {
      // Clamp values between -1 and 1, then scale to 16-bit range
      // Use faster conditional approach instead of Math.max/min
      const s = floatArray[j] < -1 ? -1 : (floatArray[j] > 1 ? 1 : floatArray[j]);
      // Use bit shifting for faster integer conversion when appropriate
      pcmArray[j] = s < 0 ? s * 0x8000 : s * 0x7FFF;
    }
  }
  
  return pcmArray;
}

function createWavBuffer(pcmData, sampleRate) {
  const numChannels = 1;
  const bitsPerSample = 16;
  const blockAlign = numChannels * (bitsPerSample / 8);
  const byteRate = sampleRate * blockAlign;
  const dataSize = pcmData.length * (bitsPerSample / 8);
  const bufferSize = 44 + dataSize; // 44 bytes for WAV header
  
  // Pre-allocate a buffer of the exact needed size
  const buffer = Buffer.alloc(bufferSize);
  let offset = 0;

  // Write header in fewer operations by using strings directly
  buffer.write("RIFF", offset); offset += 4;
  buffer.writeUInt32LE(36 + dataSize, offset); offset += 4;
  buffer.write("WAVE", offset); offset += 4;
  buffer.write("fmt ", offset); offset += 4;
  
  // Write format chunk in a single operation if possible
  buffer.writeUInt32LE(16, offset); offset += 4; // Subchunk1Size (16 for PCM)
  buffer.writeUInt16LE(1, offset); offset += 2; // AudioFormat (1 for PCM)
  buffer.writeUInt16LE(numChannels, offset); offset += 2;
  buffer.writeUInt32LE(sampleRate, offset); offset += 4;
  buffer.writeUInt32LE(byteRate, offset); offset += 4;
  buffer.writeUInt16LE(blockAlign, offset); offset += 2;
  buffer.writeUInt16LE(bitsPerSample, offset); offset += 2;

  // Write data subchunk
  buffer.write("data", offset); offset += 4;
  buffer.writeUInt32LE(dataSize, offset); offset += 4;

  // Write PCM data in larger chunks for better performance
  const CHUNK_SIZE = 1024;
  for (let i = 0; i < pcmData.length; i += CHUNK_SIZE) {
    const end = Math.min(i + CHUNK_SIZE, pcmData.length);
    for (let j = i; j < end; j++) {
      buffer.writeInt16LE(pcmData[j], offset);
      offset += 2;
    }
  }

  return buffer;
}

// Create directory for audio files if it doesn't exist
try {
  const audiosPath = path.join(__dirname, 'audios');
  try {
    await fs.access(audiosPath);
  } catch {
    await fs.mkdir(audiosPath, { recursive: true });
    console.log('Created audios directory');
  }
} catch (err) {
  console.error('Failed to create audios directory:', err);
}

// Create public directory if it doesn't exist
try {
  const publicPath = path.join(__dirname, 'public');
  try {
    await fs.access(publicPath);
  } catch {
    await fs.mkdir(publicPath, { recursive: true });
    console.log('Created public directory');
  }
} catch (err) {
  console.error('Failed to create public directory:', err);
}

// Serve static files from the public directory (frontend build)
app.use(express.static(path.join(__dirname, 'public')));

// Add more detailed logging for debugging deployment issues
console.log(`Static files being served from: ${path.join(__dirname, 'public')}`);
try {
  const publicFiles = await fs.readdir(path.join(__dirname, 'public'));
  console.log(`Public directory contents: ${publicFiles.join(', ')}`);
  if (publicFiles.includes('index.html')) {
    console.log('Found index.html in public directory');
  } else {
    console.warn('WARNING: index.html not found in public directory!');
  }
} catch (err) {
  console.error(`Error reading public directory: ${err.message}`);
}

// Fallback to serve index.html for all routes (for SPA routing)
app.get("*", (req, res) => {
  console.log(`Serving index.html for path: ${req.path}`);
  res.sendFile(path.join(__dirname, 'public', 'index.html'));
});

// Handle 404s for resources that don't exist
app.use((req, res, next) => {
  console.log(`404 for: ${req.path}`);
  res.status(404).send('Resource not found');
});

// Start the server
app.listen(port, '0.0.0.0', () => {
  console.log(`=============================================================`);
  console.log(`Virtual Girlfriend server is running at:`);
  console.log(`- Local:   http://localhost:${port}`);
  console.log(`- Network: http://0.0.0.0:${port}`);
  console.log(`=============================================================`);
  console.log(`- Environment: ${process.env.NODE_ENV || 'development'}`);
  console.log(`- Node version: ${process.version}`);
  console.log(`- Platform: ${process.platform} ${process.arch}`);
  console.log(`- Working directory: ${__dirname}`);
  console.log(`=============================================================`);
});