Marcos commited on
Commit
dd76c65
·
1 Parent(s): 0cdb42e

Automated push by deployment script

Browse files
Files changed (3) hide show
  1. Dockerfile +3 -39
  2. backend/azureTtsService.js +143 -0
  3. backend/package.json +1 -2
Dockerfile CHANGED
@@ -4,37 +4,7 @@ FROM node:18-slim
4
  RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg curl ca-certificates wget unzip file && \
5
  apt-get clean && rm -rf /var/lib/apt/lists/*
6
 
7
- # --- Add Rhubarb Installation Here ---
8
- # Switch to root to install Rhubarb, then switch back to node user later if needed before app execution
9
- USER root
10
-
11
- ARG RHUBARB_VERSION=1.13.0
12
- RUN \
13
- echo "Downloading Rhubarb version ${RHUBARB_VERSION}" && \
14
- wget https://github.com/DanielSWolf/rhubarb-lip-sync/releases/download/v${RHUBARB_VERSION}/rhubarb-lip-sync-${RHUBARB_VERSION}-linux.zip -O /tmp/rhubarb.zip && \
15
- echo "--- Listing /tmp to check zip file: ---" && \
16
- ls -l /tmp/rhubarb.zip && \
17
- echo "--- File type of /tmp/rhubarb.zip: ---" && \
18
- file /tmp/rhubarb.zip && \
19
- echo "--- Creating temporary extraction directory /tmp/rhubarb_extracted ---" && \
20
- mkdir -p /tmp/rhubarb_extracted && \
21
- echo "--- Unzipping to /tmp/rhubarb_extracted ---" && \
22
- unzip /tmp/rhubarb.zip -d /tmp/rhubarb_extracted && \
23
- echo "--- Listing /tmp/rhubarb_extracted (recursive): ---" && \
24
- ls -R /tmp/rhubarb_extracted && \
25
- echo "--- Finding rhubarb executable in /tmp/rhubarb_extracted ---" && \
26
- RHUBARB_EXEC_PATH=$(find /tmp/rhubarb_extracted -name rhubarb -type f -executable -print -quit) && \
27
- if [ -z "${RHUBARB_EXEC_PATH}" ]; then \
28
- echo "ERROR: Could not find rhubarb executable in /tmp/rhubarb_extracted"; \
29
- ls -R /tmp/rhubarb_extracted; \
30
- exit 1; \
31
- fi && \
32
- echo "Found rhubarb executable at: ${RHUBARB_EXEC_PATH}" && \
33
- echo "--- Moving ${RHUBARB_EXEC_PATH} to /usr/local/bin/rhubarb ---" && \
34
- mv "${RHUBARB_EXEC_PATH}" "/usr/local/bin/rhubarb" && \
35
- chmod +x "/usr/local/bin/rhubarb" && \
36
- echo "--- Cleaning up /tmp/rhubarb.zip and /tmp/rhubarb_extracted ---" && \
37
- rm -rf "/tmp/rhubarb.zip" "/tmp/rhubarb_extracted"
38
 
39
  USER node
40
 
@@ -43,7 +13,7 @@ USER node
43
  # Diagnostic commands for the node user
44
  RUN echo "Running diagnostics as $(whoami)"
45
  RUN echo "PATH is: $PATH"
46
- RUN which rhubarb || echo "rhubarb not found in PATH for node user during build"
47
 
48
  # Set environment variables
49
  ENV NODE_ENV=production
@@ -76,13 +46,7 @@ RUN mkdir -p /home/node/app/audios /home/node/app/public
76
  # This prevents ENOENT errors when serving index.html
77
  RUN echo '<html><body><h1>Placeholder for Virtual Girlfriend App</h1><p>This is a placeholder page. Update with actual content if needed.</p></body></html>' > /home/node/app/public/index.html
78
 
79
- # Download or install rhubarb binary at build time or runtime
80
- # Replace the URL below with the actual source for rhubarb if available
81
- # RUN mkdir -p /home/node/app/backend/bin && \
82
- # curl -L -o /home/node/app/backend/bin/rhubarb 'https://example.com/path/to/rhubarb-binary' && \
83
- # chmod +x /home/node/app/backend/bin/rhubarb
84
- # Note: The above command is commented out as the URL is a placeholder. Please provide the correct URL or method to obtain rhubarb.
85
- # Alternatively, consider using a script at startup to download it if it cannot be done at build time.
86
 
87
  # Expose the port the app runs on
88
  EXPOSE 7860
 
4
  RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg curl ca-certificates wget unzip file && \
5
  apt-get clean && rm -rf /var/lib/apt/lists/*
6
 
7
+ # Rhubarb installation has been removed.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  USER node
10
 
 
13
  # Diagnostic commands for the node user
14
  RUN echo "Running diagnostics as $(whoami)"
15
  RUN echo "PATH is: $PATH"
16
+ # Removed: RUN which rhubarb || echo "rhubarb not found in PATH for node user during build"
17
 
18
  # Set environment variables
19
  ENV NODE_ENV=production
 
46
  # This prevents ENOENT errors when serving index.html
47
  RUN echo '<html><body><h1>Placeholder for Virtual Girlfriend App</h1><p>This is a placeholder page. Update with actual content if needed.</p></body></html>' > /home/node/app/public/index.html
48
 
49
+ # Rhubarb download/installation section removed.
 
 
 
 
 
 
50
 
51
  # Expose the port the app runs on
52
  EXPOSE 7860
backend/azureTtsService.js ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import * as sdk from 'microsoft-cognitiveservices-speech-sdk';
2
+ import dotenv from 'dotenv';
3
+
4
+ dotenv.config();
5
+
6
+ // User-provided Azure Speech service details from environment variables
7
+ const SPEECH_KEY = process.env.AZURE_SPEECH_KEY;
8
+ const SPEECH_REGION = process.env.AZURE_SPEECH_REGION;
9
+
10
+ // Example mapping from Azure numeric viseme IDs to custom character values
11
+ const azureVisemeToCharacterMap = {
12
+ 0: "X", // Silence
13
+ 1: "C", // æ, ə, ʌ (e.g., cat, ago, cup)
14
+ 2: "C", // ɑ (e.g., father)
15
+ 3: "C", // ɔ (e.g., thought)
16
+ 4: "C", // ɛ, ʊ (e.g., bed, book)
17
+ 5: "C", // ɝ (e.g., bird)
18
+ 6: "C", // j, i, ɪ (e.g., yes, see, sit)
19
+ 7: "C", // w, u (e.g., wit, too)
20
+ 8: "C", // o (e.g., go)
21
+ 9: "C", // aʊ (e.g., now)
22
+ 10: "C", // ɔɪ (e.g., boy)
23
+ 11: "C", // aɪ (e.g., my)
24
+ 12: "D", // h (e.g., he)
25
+ 13: "D", // ɹ (e.g., red)
26
+ 14: "D", // l (e.g., lie)
27
+ 15: "D", // s, z (e.g., see, zoo)
28
+ 16: "D", // ʃ, tʃ, dʒ, ʒ (e.g., she, chin, gin)
29
+ 17: "D", // ð (e.g., the)
30
+ 18: "D", // f, v (e.g., fee, vie) - Could also be a separate category like "F"
31
+ 19: "D", // d, t, n, θ (e.g., do, to, no, thin)
32
+ 20: "D", // k, g, ŋ (e.g., koo, go, sing)
33
+ 21: "B" // p, b, m (e.g., pie, buy, my)
34
+ };
35
+
36
+ async function synthesizeSpeechWithVisemes(textToSpeak, language = "pt-BR", voiceName = null) {
37
+ if (!SPEECH_KEY || !SPEECH_REGION) {
38
+ console.error("Azure Speech Key or Region not configured in environment variables.");
39
+ throw new Error("Azure Speech Key or Region not configured.");
40
+ }
41
+
42
+ const speechConfig = sdk.SpeechConfig.fromSubscription(SPEECH_KEY, SPEECH_REGION);
43
+ speechConfig.speechSynthesisLanguage = language;
44
+
45
+ if (voiceName) {
46
+ speechConfig.speechSynthesisVoiceName = voiceName;
47
+ }
48
+
49
+ // Synthesize to memory buffer directly.
50
+ // If audioConfig is null, SpeechSynthesizer returns audio data in result.audioData (ArrayBuffer)
51
+ const synthesizer = new sdk.SpeechSynthesizer(speechConfig, null);
52
+
53
+ const visemesData = [];
54
+
55
+ synthesizer.visemeReceived = (s, e) => {
56
+ // Audio offset is in 100-nanosecond ticks.
57
+ visemesData.push({ id: e.visemeId, audioOffsetTicks: e.audioOffset });
58
+ // console.log(`Viseme received: ID=${e.visemeId}, Audio offset: ${(e.audioOffset / 10000).toFixed(2)} ms, Animation: ${e.animation}`);
59
+ };
60
+
61
+ console.log(`Synthesizing text: '${textToSpeak}' in ${language}${voiceName ? ' using voice ' + voiceName : ''}`);
62
+
63
+ return new Promise((resolve, reject) => {
64
+ synthesizer.speakTextAsync(
65
+ textToSpeak,
66
+ result => {
67
+ synthesizer.close();
68
+ if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
69
+ console.log("Speech synthesized successfully.");
70
+ const mouthCues = [];
71
+ if (visemesData.length > 0 && result.audioDuration) {
72
+ const totalAudioDurationSecs = result.audioDuration / 10000000.0;
73
+ for (let i = 0; i < visemesData.length; i++) {
74
+ const currentViseme = visemesData[i];
75
+ const startSecs = currentViseme.audioOffsetTicks / 10000000.0;
76
+ let endSecs;
77
+ if (i < visemesData.length - 1) {
78
+ endSecs = visemesData[i + 1].audioOffsetTicks / 10000000.0;
79
+ } else {
80
+ endSecs = totalAudioDurationSecs;
81
+ }
82
+ if (endSecs <= startSecs) {
83
+ endSecs = startSecs + 0.01; // Minimal duration
84
+ }
85
+ mouthCues.push({
86
+ start: Number(startSecs.toFixed(2)),
87
+ end: Number(endSecs.toFixed(2)),
88
+ value: azureVisemeToCharacterMap[currentViseme.id] || "X"
89
+ });
90
+ }
91
+ }
92
+ resolve({
93
+ audioData: Buffer.from(result.audioData), // Convert ArrayBuffer to Node.js Buffer
94
+ mouthCues: mouthCues,
95
+ audioDurationTicks: result.audioDuration
96
+ });
97
+ } else if (result.reason === sdk.ResultReason.Canceled) {
98
+ const cancellation = sdk.CancellationDetails.fromResult(result);
99
+ console.error(`Speech synthesis CANCELED: Reason=${cancellation.reason}`);
100
+ if (cancellation.reason === sdk.CancellationReason.Error) {
101
+ console.error(`CANCELED: ErrorCode=${cancellation.ErrorCode}`);
102
+ console.error(`CANCELED: ErrorDetails=[${cancellation.errorDetails}]`);
103
+ }
104
+ reject(new Error(`Speech synthesis CANCELED: ${cancellation.reason} - ${cancellation.errorDetails}`));
105
+ }
106
+ },
107
+ error => {
108
+ synthesizer.close();
109
+ console.error(`Speech synthesis error: ${error}`);
110
+ reject(new Error(`Speech synthesis error: ${error}`));
111
+ }
112
+ );
113
+ });
114
+ }
115
+
116
+ export { synthesizeSpeechWithVisemes };
117
+
118
+ // Example usage (for testing this module directly):
119
+ /*
120
+ async function test() {
121
+ if (require.main === module) { // Only run if executed directly
122
+ const TEXT_TO_SPEAK = "Olá mundo, isto é um teste de síntese de voz com visemas.";
123
+ const LANGUAGE = "pt-BR";
124
+ // const VOICE = "pt-BR-FranciscaNeural"; // Example voice
125
+ try {
126
+ console.log("Attempting Azure TTS test...");
127
+ const result = await synthesizeSpeechWithVisemes(TEXT_TO_SPEAK, LANGUAGE);
128
+ console.log("--- Mouth Cues JSON ---");
129
+ console.log(JSON.stringify({ mouthCues: result.mouthCues }, null, 2));
130
+ console.log("Audio data length (bytes):", result.audioData.length);
131
+ console.log("Audio duration (ticks):", result.audioDurationTicks);
132
+ // To save to file for testing:
133
+ // import fs from 'fs';
134
+ // fs.writeFileSync('test_output.wav', result.audioData);
135
+ // console.log('Audio saved to test_output.wav');
136
+ } catch (error) {
137
+ console.error("Test function error:", error);
138
+ }
139
+ }
140
+ }
141
+
142
+ test();
143
+ */
backend/package.json CHANGED
@@ -16,8 +16,7 @@
16
  "dotenv": "^16.3.1",
17
  "elevenlabs-node": "^1.2.0",
18
  "express": "^4.18.2",
19
- "microsoft-cognitiveservices-speech-sdk": "^1.38.0",
20
- "openai": "^4.26.0"
21
  },
22
  "devDependencies": {
23
  "nodemon": "^3.0.1"
 
16
  "dotenv": "^16.3.1",
17
  "elevenlabs-node": "^1.2.0",
18
  "express": "^4.18.2",
19
+ "microsoft-cognitiveservices-speech-sdk": "^1.38.0"
 
20
  },
21
  "devDependencies": {
22
  "nodemon": "^3.0.1"