import { exec, spawn } from "child_process";
import cors from "cors";
import dotenv from "dotenv";
import voice from "elevenlabs-node";
import express from "express";
import { promises as fs } from "fs";
import OpenAI from "openai";
import { GoogleGenerativeAI } from "@google/generative-ai";
import path from "path";
import { fileURLToPath } from "url";
dotenv.config();

// Validate environment and API keys
console.log(`Node environment: ${process.env.NODE_ENV}`);
console.log(`Running on port: ${process.env.PORT || 3000}`);

// Check for required API keys
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const GEMINI_API_KEY = process.env.GEMINI_API_KEY;
const ELEVEN_LABS_API_KEY = process.env.ELEVEN_LABS_API_KEY;

console.log(`OpenAI API Key: ${OPENAI_API_KEY ? 'Present' : 'Missing'}`);
console.log(`Gemini API Key: ${GEMINI_API_KEY ? 'Present' : 'Missing'}`);
console.log(`ElevenLabs API Key: ${ELEVEN_LABS_API_KEY ? 'Present' : 'Missing'}`);

// Para obter o diretório atual do arquivo em ESM
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

// Simple in-memory store for conversation history
let conversationHistory = [];

// System instructions for Gemini
const systemInstructions = `Você é uma amiga virtual que se chama Alice. Responda em português de forma BREVE e CONCISA (máximo 1-2 frases). NÃO utilize caracteres especiais, emoticons, emojis ou qualquer formatação que não seja texto puro. Mantenha suas respostas diretas para que a conversa flua naturalmente. Adicionalmente, sugira uma facialExpression (smile, sad, angry, surprised, funnyFace, default) e uma animation (Talking_0, Talking_1, Talking_2, Crying, Laughing, Rumba, Idle, Terrified, Angry) adequadas para a sua resposta, em formato JSON no final da sua resposta principal, como por exemplo: { "facialExpression": "smile", "animation": "Talking_1" }. Por favor, escute o áudio do usuário para responder.`;

const openai = new OpenAI({
  apiKey: process.env.OPENAI_API_KEY || "-", // Your OpenAI API key here, I used "-" to avoid errors when the key is not set but you should not do that
});

const geminiAPIKey = process.env.GEMINI_API_KEY;
if (!geminiAPIKey) {
  console.warn("GEMINI_API_KEY not found in .env file. Voice chat with Gemini will not work.");
}
const genAI = new GoogleGenerativeAI(geminiAPIKey || "YOUR_GEMINI_API_KEY_FALLBACK"); // Added fallback to prevent crash if key is missing
const geminiModel = genAI.getGenerativeModel({ model: "gemini-1.5-flash" }); // Or your preferred model

const elevenLabsApiKey = process.env.ELEVEN_LABS_API_KEY;
const voiceID = "Xb7hH8MSUJpSbSDYk0k2"; // Adam voice - default voice that works with all accounts

const app = express();
app.use(express.json({ limit: "50mb" }));
app.use(cors());
const port = process.env.PORT || 3000;

// Define API routes before static file serving
app.get("/health", (req, res) => {
  res.status(200).json({ status: "OK", message: "API está ativa e funcionando" });
});

app.get("/voices", async (req, res) => {
  res.send(await voice.getVoices(elevenLabsApiKey));
});

app.post("/chat", async (req, res) => {
  const userMessage = req.body.message;
  if (!userMessage) {
    res.send({
      messages: [
        {
          text: "Olá amigo! Como vai seu dia hoje?",
          audio: await audioFileToBase64("audios/intro_0.wav"),
          lipsync: await readJsonTranscript("audios/intro_0.json"),
          facialExpression: "smile",
          animation: "Talking_1",
        },
        {
          text: "Que bom te ver novamente! Tem novidades para compartilhar comigo?",
          audio: await audioFileToBase64("audios/intro_1.wav"),
          lipsync: await readJsonTranscript("audios/intro_1.json"),
          facialExpression: "smile",
          animation: "Talking_2",
        },
      ],
    });
    return;
  }
  if (!elevenLabsApiKey || openai.apiKey === "-") {
    res.send({
      messages: [
        {
          text: "Por favor, não se esqueça de adicionar suas chaves de API!",
          audio: await audioFileToBase64("audios/api_0.wav"),
          lipsync: await readJsonTranscript("audios/api_0.json"),
          facialExpression: "angry",
          animation: "Angry",
        },
        {
          text: "Não queremos problemas com uma conta alta de ChatGPT e ElevenLabs, certo?",
          audio: await audioFileToBase64("audios/api_1.wav"),
          lipsync: await readJsonTranscript("audios/api_1.json"),
          facialExpression: "smile",
          animation: "Laughing",
        },
      ],
    });
    return;
  }

  const completion = await openai.chat.completions.create({
    model: "gpt-3.5-turbo-1106",
    max_tokens: 1000,
    temperature: 0.6,
    response_format: {
      type: "json_object",
    },
    messages: [
      {
        role: "system",
        content: `
        Você é uma amiga virtual chamada Alice.
        Responda sempre em português.
        Você sempre responderá com um array JSON de mensagens. Com um máximo de 3 mensagens.
        Cada mensagem tem propriedades de texto, facialExpression e animation.
        As diferentes expressões faciais são: smile, sad, angry, surprised, funnyFace e default.
        As diferentes animações são: Talking_0, Talking_1, Talking_2, Crying, Laughing, Rumba, Idle, Terrified e Angry.
        Seu tom deve ser amigável, positivo e respeitoso, como uma amiga próxima.
        `,
      },
      {
        role: "user",
        content: userMessage || "Olá",
      },
    ],
  });
  let messages = JSON.parse(completion.choices[0].message.content);
  if (messages.messages) {
    messages = messages.messages; // ChatGPT is not 100% reliable, sometimes it directly returns an array and sometimes a JSON object with a messages property
  }
  
  for (let i = 0; i < messages.length; i++) {
    const message = messages[i];
    try {
      // Generate filename for temp MP3
      const fileName = `audios/message_${i}.mp3`;
      const textInput = message.text;
      
      // Generate audio with ElevenLabs
      await voice.textToSpeech(elevenLabsApiKey, voiceID, fileName, textInput, 0.75, 0.75, "eleven_flash_v2_5");
      
      // Read the MP3 file into a buffer
      const mp3Buffer = await fs.readFile(fileName);
      
      // Set the audio for client (base64 encoded MP3)
      message.audio = mp3Buffer.toString("base64");
      
      // Convert MP3 to WAV in memory
      const wavBuffer = await convertMp3BufferToWavBuffer(mp3Buffer);
      
      // Generate lip sync data directly from WAV buffer
      message.lipsync = await generateLipSyncDataFromWav(wavBuffer, i.toString());
      
      // Optional: clean up the temp files if no longer needed
      // await fs.unlink(fileName);
    } catch (error) {
      console.error(`Error processing message ${i}:`, error);
      // Provide fallback values
      message.audio = "";
      message.lipsync = { 
        metadata: { version: 1, generator: "fallback" }, 
        mouthCues: [{ start: 0, end: 1, value: "X" }] 
      };
    }
  }

  res.send({ messages });
});

app.post("/voice-chat", async (req, res) => {
  if (!geminiAPIKey) {
    return res.status(500).send({ error: "Chave da API Gemini não configurada." });
  }

  const { audio: audioFloatArray, sampleRate } = req.body;

  if (!audioFloatArray || !sampleRate) {
    return res.status(400).send({ error: "Dados de áudio ou sampleRate ausentes." });
  }

  try {
    const pcmData = floatTo16BitPCM(audioFloatArray);
    const wavBuffer = createWavBuffer(pcmData, sampleRate);
    const audioBase64 = wavBuffer.toString("base64");

    const partsForGemini = [];
    // If history is empty, it's the first meaningful user interaction in this session.
    // Prepend system instructions as part of the first user message to Gemini.
    if (conversationHistory.length === 0) {
      partsForGemini.push({ text: systemInstructions });
    }
    partsForGemini.push({ inlineData: { data: audioBase64, mimeType: "audio/wav" } });
    
    const chat = geminiModel.startChat({
        history: conversationHistory, // Pass the current accumulated history
    });

    const result = await chat.sendMessage(partsForGemini);
    const response = result.response;
    const geminiTextRaw = response.text();
    
    // Update conversation history after the call
    // chat.getHistory() returns a promise with the history array
    conversationHistory = await chat.getHistory();
    
    console.log("Gemini Raw Response:", geminiTextRaw);
    console.log("Updated Conversation History Length:", conversationHistory.length);

    let geminiTextForTTS = geminiTextRaw; // Text that will be sent to TTS
    let facialExpression = "default";
    let animation = "Idle";

    try {
        // Try to find a JSON object starting with { and ending with } (non-greedy match for content)
        const jsonMatch = geminiTextRaw.match(/(\{[^\}]*\})/s);
        
        if (jsonMatch && jsonMatch[0]) {
            const jsonString = jsonMatch[0];
            console.log("Found JSON string for suggestions:", jsonString);
            const suggestions = JSON.parse(jsonString);
            if (suggestions.facialExpression) facialExpression = suggestions.facialExpression;
            if (suggestions.animation) animation = suggestions.animation;
            
            // More robustly remove the JSON part from the text to be spoken.
            // Find the index of the JSON string and take the substring before it.
            const jsonStartIndex = geminiTextRaw.lastIndexOf(jsonString);
            if (jsonStartIndex !== -1) {
                geminiTextForTTS = geminiTextRaw.substring(0, jsonStartIndex).trim();
            } else {
                // Fallback if lastIndexOf somehow fails, though it shouldn't if match was found
                geminiTextForTTS = geminiTextRaw.replace(jsonString, "").trim();
            }
        } else {
            // If no JSON is found, the whole response is considered text for TTS.
            geminiTextForTTS = geminiTextRaw.trim();
        }
    } catch (e) {
        console.warn("Could not parse or process facial expression/animation from Gemini response:", e);
        // Keep geminiTextForTTS as the original raw text, trimmed.
        geminiTextForTTS = geminiTextRaw.trim();
    }
    
    if (!geminiTextForTTS || geminiTextForTTS.trim() === "") {
        geminiTextForTTS = "Não consegui processar o áudio, pode repetir por favor?";
        facialExpression = "sad";
        animation = "Talking_0";
    }

    // 3. Prepare message for existing TTS/Lipsync pipeline
    let messagesToProcess = [
      {
        text: geminiTextForTTS, // Use the cleaned text for TTS
        facialExpression: facialExpression,
        animation: animation,
        // audio and lipsync will be added by the loop below
      },
    ];

    // 4. Process the messages
    for (let i = 0; i < messagesToProcess.length; i++) {
      const message = messagesToProcess[i];
      const textInput = message.text;
      
      if (!textInput || textInput.trim() === "") {
          console.warn("Skipping TTS for empty message from Gemini at index " + i);
          message.audio = ""; // Or a silent audio base64
          message.lipsync = { metadata: { version: 1 }, mouthCues: [{ start: 0, end: 1, value: "X" }] }; // Empty lipsync
          continue;
      }

      try {
        // Generate temporary filename for MP3
        const fileName = `audios/message_voice_${i}.mp3`;
        
        // Generate audio with ElevenLabs
        await voice.textToSpeech(elevenLabsApiKey, voiceID, fileName, textInput, 0.75, 0.75, "eleven_multilingual_v2");
        
        // Read MP3 into buffer
        const mp3Buffer = await fs.readFile(fileName);
        
        // Set audio for client
        message.audio = mp3Buffer.toString("base64");
        
        // Convert MP3 to WAV in memory
        const wavBuffer = await convertMp3BufferToWavBuffer(mp3Buffer);
        
        // Generate lip sync data directly from WAV buffer
        message.lipsync = await generateLipSyncDataFromWav(wavBuffer, `voice_${i}`);
        
        // Optional: clean up the MP3 file if no longer needed
        // await fs.unlink(fileName);
      } catch (ttsError) {
        console.error("Error in TTS/Lipsync for message: " + textInput, ttsError);
        // Fallback if TTS or lipsync fails
        message.text = "Desculpe, tive um problema ao gerar minha resposta.";
        message.audio = ""; // Or a pre-recorded error audio
        message.lipsync = { metadata: { version: 1 }, mouthCues: [{ start: 0, end: 1, value: "X" }] };
        message.facialExpression = "sad";
        message.animation = "Idle";
      }
    }
    res.send({ messages: messagesToProcess });

  } catch (error) {
    console.error("Error in /voice-chat endpoint:", error);
    // Generic error response
    const fallbackMessage = {
        text: "Oops, algo deu errado no servidor.",
        audio: "", // Consider having a pre-recorded base64 audio for errors
        lipsync: { metadata: { version: 1 }, mouthCues: [{ start: 0, end: 0.5, value: "X" }] },
        facialExpression: "sad",
        animation: "Idle",
    };
    res.status(500).send({ messages: [fallbackMessage] });
  }
});

// Serve static files from the public directory (frontend build) after API routes
app.use(express.static(path.join(__dirname, 'public')));

// Add more detailed logging for debugging deployment issues
console.log(`Static files being served from: ${path.join(__dirname, 'public')}`);
try {
  const publicFiles = await fs.readdir(path.join(__dirname, 'public'));
  console.log(`Public directory contents: ${publicFiles.join(', ')}`);
  if (publicFiles.includes('index.html')) {
    console.log('Found index.html in public directory');
  } else {
    console.warn('WARNING: index.html not found in public directory!');
  }
} catch (err) {
  console.error(`Error reading public directory: ${err.message}`);
}

// Fallback to serve index.html for all other GET routes (for SPA routing) after API routes
app.get("*", (req, res) => {
  console.log(`Serving index.html for path: ${req.path}`);
  res.sendFile(path.join(__dirname, 'public', 'index.html'));
});

// Handle 404s for resources that don't exist - place at the very end
app.use((req, res, next) => {
  console.log(`404 for: ${req.path}`);
  res.status(404).send('Recurso não encontrado');
});

const execCommand = (command) => {
  return new Promise((resolve, reject) => {
    exec(command, (error, stdout, stderr) => {
      if (error) reject(error);
      resolve(stdout);
    });
  });
};

/**
 * Converts an MP3 buffer to a WAV buffer using ffmpeg
 * @param {Buffer} mp3Buffer - Buffer containing MP3 audio data
 * @returns {Promise<Buffer>} - Promise resolving to a Buffer containing WAV audio data
 */
const convertMp3BufferToWavBuffer = (mp3Buffer) => {
  return new Promise((resolve, reject) => {
    // Start ffmpeg process with appropriate args
    const ffmpeg = spawn("ffmpeg", [
      "-i", "pipe:0",      // Read from stdin
      "-f", "wav",         // Output format
      "-acodec", "pcm_s16le", // Audio codec
      "-ar", "44100",      // Sample rate
      "-ac", "1",          // Channels (mono)
      "pipe:1"             // Output to stdout
    ]);

    // Buffers to collect output data
    const chunks = [];
    let errorMessage = "";

    // Handle stdout data
    ffmpeg.stdout.on("data", (chunk) => {
      chunks.push(chunk);
    });

    // Handle stderr data (ffmpeg logs to stderr)
    ffmpeg.stderr.on("data", (data) => {
      errorMessage += data.toString();
    });

    // Handle process completion
    ffmpeg.on("close", (code) => {
      if (code !== 0) {
        console.error(`ffmpeg process exited with code ${code}`);
        console.error(`ffmpeg stderr: ${errorMessage}`);
        reject(new Error(`ffmpeg exited with code ${code}`));
        return;
      }
      
      // Combine all chunks into one buffer
      const wavBuffer = Buffer.concat(chunks);
      resolve(wavBuffer);
    });

    // Handle process errors
    ffmpeg.on("error", (err) => {
      console.error("Failed to start ffmpeg process:", err);
      reject(err);
    });

    // Write MP3 data to stdin and close
    ffmpeg.stdin.write(mp3Buffer);
    ffmpeg.stdin.end();
  });
};

/**
 * Generates lip sync data from WAV audio buffer
 * @param {Buffer} wavBuffer - Buffer containing WAV audio data
 * @param {string} identifier - Unique identifier for the message
 * @returns {Promise<Object>} - Promise resolving to lip sync JSON data
 */
const generateLipSyncDataFromWav = async (wavBuffer, identifier) => {
  const time = new Date().getTime();
  console.log(`Starting lip sync generation for message ${identifier}`);
  
  try {
    // Calculate approximate duration from WAV buffer
    // This is a simplified approach - in a WAV buffer:
    // - Bytes 24-27 contain sample rate (bytes per second)
    // - Bytes 40-43 contain data size (after the header)
    const sampleRate = wavBuffer.readUInt32LE(24);
    const dataSize = wavBuffer.length - 44; // WAV header is typically 44 bytes
    const duration = dataSize / (sampleRate * 2); // 2 bytes per sample for 16-bit audio
    
    // Generate frames (24 fps)
    const frames = Math.floor(duration * 24);
    console.log(`Estimated duration: ${duration}s, frames: ${frames}`);
    
    // Create a simple mouth pattern alternating between phonemes
    const mouthCues = [];
    const options = ['X', 'A', 'E', 'B', 'C', 'D', 'F', 'G'];
    
    for (let i = 0; i < frames; i++) {
      const frameTime = i / 24;
      mouthCues.push({
        "start": frameTime,
        "end": frameTime + 0.041667, // 1/24 second
        "value": options[Math.floor(Math.random() * options.length)]
      });
    }
    
    // Create final JSON object with lip sync data
    const lipSyncData = {
      "metadata": {
        "version": 1,
        "generator": "simplified-lipsync",
        "duration": duration.toFixed(2)
      },
      "mouthCues": mouthCues
    };
    
    console.log(`Lip sync generation done in ${new Date().getTime() - time}ms`);
    return lipSyncData;
    
  } catch (error) {
    console.error("Error in generateLipSyncDataFromWav:", error);
    // Return a fallback lip-sync data
    const emptyLipSync = {
      "metadata": { 
        "version": 1, 
        "generator": "fallback" 
      },
      "mouthCues": [{ "start": 0, "end": 1, "value": "X" }]
    };
    return emptyLipSync;
  }
};

// Keep the original function for backward compatibility
// and files that still need the file-based approach
const lipSyncMessage = async (message) => {
  const time = new Date().getTime();
  console.log(`Starting conversion for message ${message}`);
  try {
    await execCommand(
      `ffmpeg -y -i audios/message_${message}.mp3 audios/message_${message}.wav`
    );
    console.log(`Conversion done in ${new Date().getTime() - time}ms`);
    
    // Create a simplified lip-sync JSON file 
    // This is a workaround since Rhubarb is incompatible with arm64 Macs
    const audioData = await fs.readFile(`audios/message_${message}.wav`);
    const duration = audioData.length / 44100; // Approximate duration from file size
    const frames = Math.floor(duration * 24); // 24 fps
    
    // Create a simple mouth pattern alternating between X and A every 5 frames
    const mouthCues = [];
    const options = ['X', 'A', 'E', 'B', 'C', 'D', 'F', 'G'];
    
    for (let i = 0; i < frames; i++) {
      const frameTime = i / 24;
      mouthCues.push({
        "start": frameTime,
        "end": frameTime + 0.041667,
        "value": options[Math.floor(Math.random() * options.length)]
      });
    }
    
    const lipSyncData = {
      "metadata": {
        "version": 1,
        "generator": "simplified-lipsync"
      },
      "mouthCues": mouthCues
    };
    
    await fs.writeFile(
      `audios/message_${message}.json`, 
      JSON.stringify(lipSyncData, null, 2)
    );
    
    console.log(`Lip sync done in ${new Date().getTime() - time}ms`);
  } catch (error) {
    console.error("Error in lipSyncMessage:", error);
    // Create an empty lip-sync file so the app doesn't crash
    const emptyLipSync = {
      "metadata": { "version": 1, "generator": "fallback" },
      "mouthCues": [{ "start": 0, "end": 1, "value": "X" }]
    };
    await fs.writeFile(
      `audios/message_${message}.json`, 
      JSON.stringify(emptyLipSync, null, 2)
    );
  }
};

const readJsonTranscript = async (file) => {
  const data = await fs.readFile(file, "utf8");
  return JSON.parse(data);
};

const audioFileToBase64 = async (file) => {
  const data = await fs.readFile(file);
  return data.toString("base64");
};

// Helper function to convert float array to 16-bit PCM WAV buffer
function floatTo16BitPCM(floatArray) {
  const pcmArray = new Int16Array(floatArray.length);
  for (let i = 0; i < floatArray.length; i++) {
    const s = Math.max(-1, Math.min(1, floatArray[i]));
    pcmArray[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
  }
  return pcmArray;
}

function createWavBuffer(pcmData, sampleRate) {
  const numChannels = 1;
  const bitsPerSample = 16;
  const byteRate = sampleRate * numChannels * (bitsPerSample / 8);
  const blockAlign = numChannels * (bitsPerSample / 8);
  const dataSize = pcmData.length * numChannels * (bitsPerSample / 8);
  const bufferSize = 44 + dataSize; // 44 bytes for WAV header

  const buffer = Buffer.alloc(bufferSize);
  let offset = 0;

  // RIFF header
  buffer.write("RIFF", offset); offset += 4;
  buffer.writeUInt32LE(36 + dataSize, offset); offset += 4; // 전체 파일 크기 - 8
  buffer.write("WAVE", offset); offset += 4;

  // fmt subchunk
  buffer.write("fmt ", offset); offset += 4;
  buffer.writeUInt32LE(16, offset); offset += 4; // Subchunk1Size (16 for PCM)
  buffer.writeUInt16LE(1, offset); offset += 2; // AudioFormat (1 for PCM)
  buffer.writeUInt16LE(numChannels, offset); offset += 2;
  buffer.writeUInt32LE(sampleRate, offset); offset += 4;
  buffer.writeUInt32LE(byteRate, offset); offset += 4;
  buffer.writeUInt16LE(blockAlign, offset); offset += 2;
  buffer.writeUInt16LE(bitsPerSample, offset); offset += 2;

  // data subchunk
  buffer.write("data", offset); offset += 4;
  buffer.writeUInt32LE(dataSize, offset); offset += 4;

  // Write PCM data
  for (let i = 0; i < pcmData.length; i++) {
    buffer.writeInt16LE(pcmData[i], offset);
    offset += 2;
  }

  return buffer;
}

// Create directory for audio files if it doesn't exist
try {
  const audiosPath = path.join(__dirname, 'audios');
  try {
    await fs.access(audiosPath);
  } catch {
    await fs.mkdir(audiosPath, { recursive: true });
    console.log('Created audios directory');
  }
} catch (err) {
  console.error('Failed to create audios directory:', err);
}

// Create public directory if it doesn't exist
try {
  const publicPath = path.join(__dirname, 'public');
  try {
    await fs.access(publicPath);
  } catch {
    await fs.mkdir(publicPath, { recursive: true });
    console.log('Created public directory');
  }
} catch (err) {
  console.error('Failed to create public directory:', err);
}

// Start the server
app.listen(port, '0.0.0.0', () => {
  console.log(`=============================================================`);
  console.log(`Servidor da Amiga Virtual rodando em:`);
  console.log(`- Local:   http://localhost:${port}`);
  console.log(`- Rede:    http://0.0.0.0:${port}`);
  console.log(`=============================================================`);
  console.log(`- Ambiente: ${process.env.NODE_ENV || 'development'}`);
  console.log(`- Versão Node: ${process.version}`);
  console.log(`- Plataforma: ${process.platform} ${process.arch}`);
  console.log(`- Diretório de trabalho: ${__dirname}`);
  console.log(`=============================================================`);
});