Spaces:

marcosremar2
/

mineru

Sleeping

App Files Files Community

Marcos commited on 28 days ago

Commit

dd76c65

1 Parent(s): 0cdb42e

Automated push by deployment script

Browse files

Files changed (3) hide show

Dockerfile +3 -39
backend/azureTtsService.js +143 -0
backend/package.json +1 -2

Dockerfile CHANGED Viewed

@@ -4,37 +4,7 @@ FROM node:18-slim
 RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg curl ca-certificates wget unzip file && \
     apt-get clean && rm -rf /var/lib/apt/lists/*
-# --- Add Rhubarb Installation Here ---
-# Switch to root to install Rhubarb, then switch back to node user later if needed before app execution
-USER root
-ARG RHUBARB_VERSION=1.13.0
-RUN \
-    echo "Downloading Rhubarb version ${RHUBARB_VERSION}" && \
-    wget https://github.com/DanielSWolf/rhubarb-lip-sync/releases/download/v${RHUBARB_VERSION}/rhubarb-lip-sync-${RHUBARB_VERSION}-linux.zip -O /tmp/rhubarb.zip && \
-    echo "--- Listing /tmp to check zip file: ---" && \
-    ls -l /tmp/rhubarb.zip && \
-    echo "--- File type of /tmp/rhubarb.zip: ---" && \
-    file /tmp/rhubarb.zip && \
-    echo "--- Creating temporary extraction directory /tmp/rhubarb_extracted ---" && \
-    mkdir -p /tmp/rhubarb_extracted && \
-    echo "--- Unzipping to /tmp/rhubarb_extracted ---" && \
-    unzip /tmp/rhubarb.zip -d /tmp/rhubarb_extracted && \
-    echo "--- Listing /tmp/rhubarb_extracted (recursive): ---" && \
-    ls -R /tmp/rhubarb_extracted && \
-    echo "--- Finding rhubarb executable in /tmp/rhubarb_extracted ---" && \
-    RHUBARB_EXEC_PATH=$(find /tmp/rhubarb_extracted -name rhubarb -type f -executable -print -quit) && \
-    if [ -z "${RHUBARB_EXEC_PATH}" ]; then \
-        echo "ERROR: Could not find rhubarb executable in /tmp/rhubarb_extracted"; \
-        ls -R /tmp/rhubarb_extracted; \
-        exit 1; \
-    fi && \
-    echo "Found rhubarb executable at: ${RHUBARB_EXEC_PATH}" && \
-    echo "--- Moving ${RHUBARB_EXEC_PATH} to /usr/local/bin/rhubarb ---" && \
-    mv "${RHUBARB_EXEC_PATH}" "/usr/local/bin/rhubarb" && \
-    chmod +x "/usr/local/bin/rhubarb" && \
-    echo "--- Cleaning up /tmp/rhubarb.zip and /tmp/rhubarb_extracted ---" && \
-    rm -rf "/tmp/rhubarb.zip" "/tmp/rhubarb_extracted"
 USER node
@@ -43,7 +13,7 @@ USER node
 # Diagnostic commands for the node user
 RUN echo "Running diagnostics as $(whoami)"
 RUN echo "PATH is: $PATH"
-RUN which rhubarb || echo "rhubarb not found in PATH for node user during build"
 # Set environment variables
 ENV NODE_ENV=production
@@ -76,13 +46,7 @@ RUN mkdir -p /home/node/app/audios /home/node/app/public
 # This prevents ENOENT errors when serving index.html
 RUN echo '<html><body><h1>Placeholder for Virtual Girlfriend App</h1><p>This is a placeholder page. Update with actual content if needed.</p></body></html>' > /home/node/app/public/index.html
-# Download or install rhubarb binary at build time or runtime
-# Replace the URL below with the actual source for rhubarb if available
-# RUN mkdir -p /home/node/app/backend/bin && \
-#     curl -L -o /home/node/app/backend/bin/rhubarb 'https://example.com/path/to/rhubarb-binary' && \
-#     chmod +x /home/node/app/backend/bin/rhubarb
-# Note: The above command is commented out as the URL is a placeholder. Please provide the correct URL or method to obtain rhubarb.
-# Alternatively, consider using a script at startup to download it if it cannot be done at build time.
 # Expose the port the app runs on
 EXPOSE 7860

 RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg curl ca-certificates wget unzip file && \
     apt-get clean && rm -rf /var/lib/apt/lists/*
+# Rhubarb installation has been removed.
 USER node
 # Diagnostic commands for the node user
 RUN echo "Running diagnostics as $(whoami)"
 RUN echo "PATH is: $PATH"
+# Removed: RUN which rhubarb || echo "rhubarb not found in PATH for node user during build"
 # Set environment variables
 ENV NODE_ENV=production
 # This prevents ENOENT errors when serving index.html
 RUN echo '<html><body><h1>Placeholder for Virtual Girlfriend App</h1><p>This is a placeholder page. Update with actual content if needed.</p></body></html>' > /home/node/app/public/index.html
+# Rhubarb download/installation section removed.
 # Expose the port the app runs on
 EXPOSE 7860

backend/azureTtsService.js ADDED Viewed

	@@ -0,0 +1,143 @@

+import * as sdk from 'microsoft-cognitiveservices-speech-sdk';
+import dotenv from 'dotenv';
+dotenv.config();
+// User-provided Azure Speech service details from environment variables
+const SPEECH_KEY = process.env.AZURE_SPEECH_KEY;
+const SPEECH_REGION = process.env.AZURE_SPEECH_REGION;
+// Example mapping from Azure numeric viseme IDs to custom character values
+const azureVisemeToCharacterMap = {
+    0: "X", // Silence
+    1: "C", // æ, ə, ʌ (e.g., cat, ago, cup)
+    2: "C", // ɑ (e.g., father)
+    3: "C", // ɔ (e.g., thought)
+    4: "C", // ɛ, ʊ (e.g., bed, book)
+    5: "C", // ɝ (e.g., bird)
+    6: "C", // j, i, ɪ (e.g., yes, see, sit)
+    7: "C", // w, u (e.g., wit, too)
+    8: "C", // o (e.g., go)
+    9: "C", // aʊ (e.g., now)
+    10: "C", // ɔɪ (e.g., boy)
+    11: "C", // aɪ (e.g., my)
+    12: "D", // h (e.g., he)
+    13: "D", // ɹ (e.g., red)
+    14: "D", // l (e.g., lie)
+    15: "D", // s, z (e.g., see, zoo)
+    16: "D", // ʃ, tʃ, dʒ, ʒ (e.g., she, chin, gin)
+    17: "D", // ð (e.g., the)
+    18: "D", // f, v (e.g., fee, vie) - Could also be a separate category like "F"
+    19: "D", // d, t, n, θ (e.g., do, to, no, thin)
+    20: "D", // k, g, ŋ (e.g., koo, go, sing)
+    21: "B"  // p, b, m (e.g., pie, buy, my)
+};
+async function synthesizeSpeechWithVisemes(textToSpeak, language = "pt-BR", voiceName = null) {
+    if (!SPEECH_KEY || !SPEECH_REGION) {
+        console.error("Azure Speech Key or Region not configured in environment variables.");
+        throw new Error("Azure Speech Key or Region not configured.");
+    }
+    const speechConfig = sdk.SpeechConfig.fromSubscription(SPEECH_KEY, SPEECH_REGION);
+    speechConfig.speechSynthesisLanguage = language;
+    if (voiceName) {
+        speechConfig.speechSynthesisVoiceName = voiceName;
+    }
+    // Synthesize to memory buffer directly.
+    // If audioConfig is null, SpeechSynthesizer returns audio data in result.audioData (ArrayBuffer)
+    const synthesizer = new sdk.SpeechSynthesizer(speechConfig, null);
+    const visemesData = [];
+    synthesizer.visemeReceived = (s, e) => {
+        // Audio offset is in 100-nanosecond ticks.
+        visemesData.push({ id: e.visemeId, audioOffsetTicks: e.audioOffset });
+        // console.log(`Viseme received: ID=${e.visemeId}, Audio offset: ${(e.audioOffset / 10000).toFixed(2)} ms, Animation: ${e.animation}`);
+    };
+    console.log(`Synthesizing text: '${textToSpeak}' in ${language}${voiceName ? ' using voice ' + voiceName : ''}`);
+    return new Promise((resolve, reject) => {
+        synthesizer.speakTextAsync(
+            textToSpeak,
+            result => {
+                synthesizer.close();
+                if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
+                    console.log("Speech synthesized successfully.");
+                    const mouthCues = [];
+                    if (visemesData.length > 0 && result.audioDuration) {
+                        const totalAudioDurationSecs = result.audioDuration / 10000000.0;
+                        for (let i = 0; i < visemesData.length; i++) {
+                            const currentViseme = visemesData[i];
+                            const startSecs = currentViseme.audioOffsetTicks / 10000000.0;
+                            let endSecs;
+                            if (i < visemesData.length - 1) {
+                                endSecs = visemesData[i + 1].audioOffsetTicks / 10000000.0;
+                            } else {
+                                endSecs = totalAudioDurationSecs;
+                            }
+                            if (endSecs <= startSecs) {
+                                endSecs = startSecs + 0.01; // Minimal duration
+                            }
+                            mouthCues.push({
+                                start: Number(startSecs.toFixed(2)),
+                                end: Number(endSecs.toFixed(2)),
+                                value: azureVisemeToCharacterMap[currentViseme.id] || "X"
+                            });
+                        }
+                    }
+                    resolve({
+                        audioData: Buffer.from(result.audioData), // Convert ArrayBuffer to Node.js Buffer
+                        mouthCues: mouthCues,
+                        audioDurationTicks: result.audioDuration
+                    });
+                } else if (result.reason === sdk.ResultReason.Canceled) {
+                    const cancellation = sdk.CancellationDetails.fromResult(result);
+                    console.error(`Speech synthesis CANCELED: Reason=${cancellation.reason}`);
+                    if (cancellation.reason === sdk.CancellationReason.Error) {
+                        console.error(`CANCELED: ErrorCode=${cancellation.ErrorCode}`);
+                        console.error(`CANCELED: ErrorDetails=[${cancellation.errorDetails}]`);
+                    }
+                    reject(new Error(`Speech synthesis CANCELED: ${cancellation.reason} - ${cancellation.errorDetails}`));
+                }
+            },
+            error => {
+                synthesizer.close();
+                console.error(`Speech synthesis error: ${error}`);
+                reject(new Error(`Speech synthesis error: ${error}`));
+            }
+        );
+    });
+}
+export { synthesizeSpeechWithVisemes };
+// Example usage (for testing this module directly):
+/*
+async function test() {
+    if (require.main === module) { // Only run if executed directly
+        const TEXT_TO_SPEAK = "Olá mundo, isto é um teste de síntese de voz com visemas.";
+        const LANGUAGE = "pt-BR";
+        // const VOICE = "pt-BR-FranciscaNeural"; // Example voice
+        try {
+            console.log("Attempting Azure TTS test...");
+            const result = await synthesizeSpeechWithVisemes(TEXT_TO_SPEAK, LANGUAGE);
+            console.log("--- Mouth Cues JSON ---");
+            console.log(JSON.stringify({ mouthCues: result.mouthCues }, null, 2));
+            console.log("Audio data length (bytes):", result.audioData.length);
+            console.log("Audio duration (ticks):", result.audioDurationTicks);
+            // To save to file for testing:
+            // import fs from 'fs';
+            // fs.writeFileSync('test_output.wav', result.audioData);
+            // console.log('Audio saved to test_output.wav');
+        } catch (error) {
+            console.error("Test function error:", error);
+        }
+    }
+}
+test();
+*/

backend/package.json CHANGED Viewed

@@ -16,8 +16,7 @@
     "dotenv": "^16.3.1",
     "elevenlabs-node": "^1.2.0",
     "express": "^4.18.2",
-    "microsoft-cognitiveservices-speech-sdk": "^1.38.0",
-    "openai": "^4.26.0"
   },
   "devDependencies": {
     "nodemon": "^3.0.1"

     "dotenv": "^16.3.1",
     "elevenlabs-node": "^1.2.0",
     "express": "^4.18.2",
+    "microsoft-cognitiveservices-speech-sdk": "^1.38.0"
   },
   "devDependencies": {
     "nodemon": "^3.0.1"