Spaces:
Paused
Paused
| import { getPreviewString, saveTtsProviderSettings } from './index.js'; | |
| import { getBase64Async } from '../../utils.js'; | |
| import { getRequestHeaders } from '../../../script.js'; | |
| export { SpeechT5TtsProvider }; | |
| class SpeechT5TtsProvider { | |
| //########// | |
| // Config // | |
| //########// | |
| settings; | |
| ready = false; | |
| voices = []; | |
| separator = ' .. '; | |
| audioElement = document.createElement('audio'); | |
| defaultSettings = { | |
| speakers: [], | |
| speaker: '', | |
| voiceMap: {}, | |
| }; | |
| get settingsHtml() { | |
| let html = ` | |
| <label for="speecht5_tts_speaker">Speaker:</label> | |
| <div class="flex-container"> | |
| <select id="speecht5_tts_speaker" class="text_pole flex1"> | |
| </select> | |
| <div id="speecht5_tts_speaker_upload_button" class="menu_button" title="Upload speaker"> | |
| <i class="fa-solid fa-upload"></i> | |
| </div> | |
| <div id="speecht5_tts_delete_speaker_button" class="menu_button" title="Delete speaker"> | |
| <i class="fa-solid fa-trash"></i> | |
| </div> | |
| </div> | |
| <input type="file" id="speecht5_tts_speaker_upload" class="displayNone"> | |
| <div><i>Loading model for the first time may take a while!</i></div> | |
| `; | |
| return html; | |
| } | |
| onSettingsChange() { | |
| // Used when provider settings are updated from UI | |
| this.settings.speaker = $('#speecht5_tts_speaker').val(); | |
| saveTtsProviderSettings(); | |
| } | |
| async previewTtsVoice(voiceId) { | |
| this.audioElement.pause(); | |
| this.audioElement.currentTime = 0; | |
| const text = getPreviewString('en-US'); | |
| const response = await this.fetchTtsGeneration(text, voiceId); | |
| if (!response.ok) { | |
| throw new Error(`HTTP ${response.status}`); | |
| } | |
| const audio = await response.blob(); | |
| const url = URL.createObjectURL(audio); | |
| this.audioElement.src = url; | |
| this.audioElement.play(); | |
| this.audioElement.onended = () => URL.revokeObjectURL(url); | |
| } | |
| async loadSettings(settings) { | |
| // Pupulate Provider UI given input settings | |
| if (Object.keys(settings).length == 0) { | |
| console.info('Using default TTS Provider settings'); | |
| } | |
| // Only accept keys defined in defaultSettings | |
| this.settings = this.defaultSettings; | |
| for (const key in settings) { | |
| if (key in this.settings) { | |
| this.settings[key] = settings[key]; | |
| } else { | |
| throw `Invalid setting passed to TTS Provider: ${key}`; | |
| } | |
| } | |
| for (const speaker of this.settings.speakers) { | |
| $('#speecht5_tts_speaker').append($('<option>', { | |
| value: speaker.voice_id, | |
| text: speaker.name, | |
| })); | |
| } | |
| $('#speecht5_tts_speaker').val(this.settings.speaker); | |
| $('#speecht5_tts_speaker').on('change', this.onSettingsChange.bind(this)); | |
| $('#speecht5_tts_speaker_upload_button').on('click', () => { | |
| $('#speecht5_tts_speaker_upload').trigger('click'); | |
| }); | |
| $('#speecht5_tts_speaker_upload').on('change', async (event) => { | |
| const file = event.target.files[0]; | |
| if (file.size != 2048) { | |
| toastr.error('Invalid speaker file size, expected 2048 bytes'); | |
| return; | |
| } | |
| const data = await getBase64Async(file); | |
| const speaker = { | |
| voice_id: file.name, | |
| name: file.name, | |
| data: data, | |
| lang: 'en-US', | |
| preview_url: false, | |
| }; | |
| this.settings.speakers.push(speaker); | |
| $('#speecht5_tts_speaker').append($('<option>', { | |
| value: speaker.voice_id, | |
| text: speaker.name, | |
| })); | |
| $('#speecht5_tts_speaker').val(speaker.name); | |
| this.onSettingsChange(); | |
| }); | |
| $('#speecht5_tts_delete_speaker_button').on('click', () => { | |
| const confirmDelete = confirm('Are you sure you want to delete this speaker?'); | |
| if (!confirmDelete) { | |
| return; | |
| } | |
| const speaker = this.settings.speakers.find(s => s.voice_id === this.settings.speaker); | |
| if (!speaker) { | |
| toastr.error('Speaker not found'); | |
| return; | |
| } | |
| const index = this.settings.speakers.indexOf(speaker); | |
| this.settings.speakers.splice(index, 1); | |
| $(`#speecht5_tts_speaker option[value="${speaker.voice_id}"]`).remove(); | |
| if (this.settings.speakers.length == 0) { | |
| console.log('No speakers left'); | |
| return; | |
| } | |
| $('#speecht5_tts_speaker').val(this.settings.speakers[0].voice_id); | |
| this.onSettingsChange(); | |
| }); | |
| await this.checkReady(); | |
| console.debug('SpeechT5: Settings loaded'); | |
| } | |
| async checkReady() { | |
| return Promise.resolve(); | |
| } | |
| async getVoice(voiceName) { | |
| return this.settings.speakers.find(s => s.voice_id === voiceName); | |
| } | |
| async generateTts(text, voiceId) { | |
| const response = await this.fetchTtsGeneration(text, voiceId); | |
| return response; | |
| } | |
| async fetchTtsVoiceObjects() { | |
| return this.settings.speakers; | |
| } | |
| async fetchTtsGeneration(inputText, voiceId) { | |
| console.info(`Generating new TTS for voice_id ${voiceId}`); | |
| const speaker = await this.getVoice(voiceId); | |
| if (!speaker) { | |
| toastr.error(`Speaker not found: ${voiceId}`, 'TTS Generation Failed'); | |
| throw new Error(`Speaker not found: ${voiceId}`); | |
| } | |
| const response = await fetch( | |
| '/api/speech/synthesize', | |
| { | |
| method: 'POST', | |
| headers: getRequestHeaders(), | |
| body: JSON.stringify({ | |
| 'text': inputText, | |
| 'speaker': speaker.data, | |
| 'model': 'Xenova/speecht5_tts', | |
| }), | |
| }, | |
| ); | |
| if (!response.ok) { | |
| toastr.error(response.statusText, 'TTS Generation Failed'); | |
| throw new Error(`HTTP ${response.status}: ${await response.text()}`); | |
| } | |
| return response; | |
| } | |
| async fetchTtsFromHistory(history_item_id) { | |
| return Promise.resolve(history_item_id); | |
| } | |
| } | |