Spaces:
Running
on
A10G
Running
on
A10G
Multiple user API keys
Browse files- config.yaml +1 -1
- pipeline.py +34 -32
- share_btn.py +2 -2
- ui_client.py +12 -8
- utils.py +1 -2
- wavjourney_cli.py +4 -1
config.yaml
CHANGED
|
@@ -18,4 +18,4 @@ Voice-Parser:
|
|
| 18 |
|
| 19 |
Service-Port: 5000
|
| 20 |
|
| 21 |
-
OpenAI-Key: ''
|
|
|
|
| 18 |
|
| 19 |
Service-Port: 5000
|
| 20 |
|
| 21 |
+
OpenAI-Key: ''
|
pipeline.py
CHANGED
|
@@ -4,7 +4,6 @@ from string import Template
|
|
| 4 |
import openai
|
| 5 |
import re
|
| 6 |
import glob
|
| 7 |
-
from utils import get_key
|
| 8 |
import pickle
|
| 9 |
import time
|
| 10 |
import json5
|
|
@@ -26,28 +25,33 @@ if USE_OPENAI_CACHE:
|
|
| 26 |
with open(cache_file, 'rb') as file:
|
| 27 |
openai_cache.append(pickle.load(file))
|
| 28 |
|
| 29 |
-
openai.api_key = get_key()
|
| 30 |
|
| 31 |
-
def chat_with_gpt(prompt):
|
| 32 |
if USE_OPENAI_CACHE:
|
| 33 |
filtered_object = list(filter(lambda x: x['prompt'] == prompt, openai_cache))
|
| 34 |
if len(filtered_object) > 0:
|
| 35 |
response = filtered_object[0]['response']
|
| 36 |
return response
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
if USE_OPENAI_CACHE:
|
| 52 |
cache_obj = {
|
| 53 |
'prompt': prompt,
|
|
@@ -120,10 +124,10 @@ def init_session(session_id=''):
|
|
| 120 |
return session_id
|
| 121 |
|
| 122 |
@retry(stop_max_attempt_number=3)
|
| 123 |
-
def input_text_to_json_script_with_retry(complete_prompt_path):
|
| 124 |
print(" trying ...")
|
| 125 |
complete_prompt = get_file_content(complete_prompt_path)
|
| 126 |
-
json_response = try_extract_content_from_quotes(chat_with_gpt(complete_prompt))
|
| 127 |
json_data = json5.loads(json_response)
|
| 128 |
|
| 129 |
try:
|
|
@@ -138,22 +142,20 @@ def input_text_to_json_script_with_retry(complete_prompt_path):
|
|
| 138 |
return json_response
|
| 139 |
|
| 140 |
# Step 1: input_text to json
|
| 141 |
-
def input_text_to_json_script(input_text, output_path):
|
| 142 |
print('Step 1: Writing audio script with LLM ...')
|
| 143 |
input_text = maybe_get_content_from_file(input_text)
|
| 144 |
text_to_audio_script_prompt = get_file_content('prompts/text_to_json.prompt')
|
| 145 |
prompt = f'{text_to_audio_script_prompt}\n\nInput text: {input_text}\n\nScript:\n'
|
| 146 |
complete_prompt_path = output_path / 'complete_input_text_to_audio_script.prompt'
|
| 147 |
write_to_file(complete_prompt_path, prompt)
|
| 148 |
-
audio_script_response = input_text_to_json_script_with_retry(complete_prompt_path)
|
| 149 |
generated_audio_script_filename = output_path / 'audio_script.json'
|
| 150 |
write_to_file(generated_audio_script_filename, audio_script_response)
|
| 151 |
return audio_script_response
|
| 152 |
|
| 153 |
# Step 2: json to char-voice map
|
| 154 |
-
def json_script_to_char_voice_map(json_script, voices, output_path):
|
| 155 |
-
def create_complete_char_voice_map(char_voice_map):
|
| 156 |
-
return
|
| 157 |
print('Step 2: Parsing character voice with LLM...')
|
| 158 |
json_script_content = maybe_get_content_from_file(json_script)
|
| 159 |
prompt = get_file_content('prompts/audio_script_to_character_voice_map.prompt')
|
|
@@ -161,7 +163,7 @@ def json_script_to_char_voice_map(json_script, voices, output_path):
|
|
| 161 |
prompt = Template(prompt).substitute(voice_and_desc=presets_str)
|
| 162 |
prompt = f"{prompt}\n\nAudio script:\n'''\n{json_script_content}\n'''\n\noutput:\n"
|
| 163 |
write_to_file(output_path / 'complete_audio_script_to_char_voice_map.prompt', prompt)
|
| 164 |
-
char_voice_map_response = try_extract_content_from_quotes(chat_with_gpt(prompt))
|
| 165 |
char_voice_map = json5.loads(char_voice_map_response)
|
| 166 |
# enrich char_voice_map with voice preset metadata
|
| 167 |
complete_char_voice_map = {c: voices[char_voice_map[c]] for c in char_voice_map}
|
|
@@ -188,19 +190,19 @@ def audio_code_gen_to_result(audio_gen_code_path):
|
|
| 188 |
os.system(f'python {audio_gen_code_filename}')
|
| 189 |
|
| 190 |
# Function call used by Gradio: input_text to json
|
| 191 |
-
def generate_json_file(session_id, input_text):
|
| 192 |
output_path = utils.get_session_path(session_id)
|
| 193 |
# Step 1
|
| 194 |
-
return input_text_to_json_script(input_text, output_path)
|
| 195 |
|
| 196 |
# Function call used by Gradio: json to result wav
|
| 197 |
-
def generate_audio(session_id, json_script):
|
| 198 |
output_path = utils.get_session_path(session_id)
|
| 199 |
output_audio_path = utils.get_session_audio_path(session_id)
|
| 200 |
voices = voice_presets.get_merged_voice_presets(session_id)
|
| 201 |
|
| 202 |
# Step 2
|
| 203 |
-
char_voice_map = json_script_to_char_voice_map(json_script, voices, output_path)
|
| 204 |
# Step 3
|
| 205 |
json_script_filename = output_path / 'audio_script.json'
|
| 206 |
char_voice_map_filename = output_path / 'character_voice_map.json'
|
|
@@ -214,6 +216,6 @@ def generate_audio(session_id, json_script):
|
|
| 214 |
return result_wav_filename, char_voice_map
|
| 215 |
|
| 216 |
# Convenient function call used by wavjourney_cli
|
| 217 |
-
def full_steps(session_id, input_text):
|
| 218 |
-
json_script = generate_json_file(session_id, input_text)
|
| 219 |
-
return generate_audio(session_id, json_script)
|
|
|
|
| 4 |
import openai
|
| 5 |
import re
|
| 6 |
import glob
|
|
|
|
| 7 |
import pickle
|
| 8 |
import time
|
| 9 |
import json5
|
|
|
|
| 25 |
with open(cache_file, 'rb') as file:
|
| 26 |
openai_cache.append(pickle.load(file))
|
| 27 |
|
|
|
|
| 28 |
|
| 29 |
+
def chat_with_gpt(prompt, api_key):
|
| 30 |
if USE_OPENAI_CACHE:
|
| 31 |
filtered_object = list(filter(lambda x: x['prompt'] == prompt, openai_cache))
|
| 32 |
if len(filtered_object) > 0:
|
| 33 |
response = filtered_object[0]['response']
|
| 34 |
return response
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
openai.api_key = api_key
|
| 38 |
+
chat = openai.ChatCompletion.create(
|
| 39 |
+
# model="gpt-3.5-turbo",
|
| 40 |
+
model="gpt-4",
|
| 41 |
+
messages=[
|
| 42 |
+
{
|
| 43 |
+
"role": "system",
|
| 44 |
+
"content": "You are a helpful assistant."
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"role": "user",
|
| 48 |
+
"content": prompt
|
| 49 |
+
}
|
| 50 |
+
]
|
| 51 |
+
)
|
| 52 |
+
finally:
|
| 53 |
+
openai.api_key = ''
|
| 54 |
+
|
| 55 |
if USE_OPENAI_CACHE:
|
| 56 |
cache_obj = {
|
| 57 |
'prompt': prompt,
|
|
|
|
| 124 |
return session_id
|
| 125 |
|
| 126 |
@retry(stop_max_attempt_number=3)
|
| 127 |
+
def input_text_to_json_script_with_retry(complete_prompt_path, api_key):
|
| 128 |
print(" trying ...")
|
| 129 |
complete_prompt = get_file_content(complete_prompt_path)
|
| 130 |
+
json_response = try_extract_content_from_quotes(chat_with_gpt(complete_prompt, api_key))
|
| 131 |
json_data = json5.loads(json_response)
|
| 132 |
|
| 133 |
try:
|
|
|
|
| 142 |
return json_response
|
| 143 |
|
| 144 |
# Step 1: input_text to json
|
| 145 |
+
def input_text_to_json_script(input_text, output_path, api_key):
|
| 146 |
print('Step 1: Writing audio script with LLM ...')
|
| 147 |
input_text = maybe_get_content_from_file(input_text)
|
| 148 |
text_to_audio_script_prompt = get_file_content('prompts/text_to_json.prompt')
|
| 149 |
prompt = f'{text_to_audio_script_prompt}\n\nInput text: {input_text}\n\nScript:\n'
|
| 150 |
complete_prompt_path = output_path / 'complete_input_text_to_audio_script.prompt'
|
| 151 |
write_to_file(complete_prompt_path, prompt)
|
| 152 |
+
audio_script_response = input_text_to_json_script_with_retry(complete_prompt_path, api_key)
|
| 153 |
generated_audio_script_filename = output_path / 'audio_script.json'
|
| 154 |
write_to_file(generated_audio_script_filename, audio_script_response)
|
| 155 |
return audio_script_response
|
| 156 |
|
| 157 |
# Step 2: json to char-voice map
|
| 158 |
+
def json_script_to_char_voice_map(json_script, voices, output_path, api_key):
|
|
|
|
|
|
|
| 159 |
print('Step 2: Parsing character voice with LLM...')
|
| 160 |
json_script_content = maybe_get_content_from_file(json_script)
|
| 161 |
prompt = get_file_content('prompts/audio_script_to_character_voice_map.prompt')
|
|
|
|
| 163 |
prompt = Template(prompt).substitute(voice_and_desc=presets_str)
|
| 164 |
prompt = f"{prompt}\n\nAudio script:\n'''\n{json_script_content}\n'''\n\noutput:\n"
|
| 165 |
write_to_file(output_path / 'complete_audio_script_to_char_voice_map.prompt', prompt)
|
| 166 |
+
char_voice_map_response = try_extract_content_from_quotes(chat_with_gpt(prompt, api_key))
|
| 167 |
char_voice_map = json5.loads(char_voice_map_response)
|
| 168 |
# enrich char_voice_map with voice preset metadata
|
| 169 |
complete_char_voice_map = {c: voices[char_voice_map[c]] for c in char_voice_map}
|
|
|
|
| 190 |
os.system(f'python {audio_gen_code_filename}')
|
| 191 |
|
| 192 |
# Function call used by Gradio: input_text to json
|
| 193 |
+
def generate_json_file(session_id, input_text, api_key):
|
| 194 |
output_path = utils.get_session_path(session_id)
|
| 195 |
# Step 1
|
| 196 |
+
return input_text_to_json_script(input_text, output_path, api_key)
|
| 197 |
|
| 198 |
# Function call used by Gradio: json to result wav
|
| 199 |
+
def generate_audio(session_id, json_script, api_key):
|
| 200 |
output_path = utils.get_session_path(session_id)
|
| 201 |
output_audio_path = utils.get_session_audio_path(session_id)
|
| 202 |
voices = voice_presets.get_merged_voice_presets(session_id)
|
| 203 |
|
| 204 |
# Step 2
|
| 205 |
+
char_voice_map = json_script_to_char_voice_map(json_script, voices, output_path, api_key)
|
| 206 |
# Step 3
|
| 207 |
json_script_filename = output_path / 'audio_script.json'
|
| 208 |
char_voice_map_filename = output_path / 'character_voice_map.json'
|
|
|
|
| 216 |
return result_wav_filename, char_voice_map
|
| 217 |
|
| 218 |
# Convenient function call used by wavjourney_cli
|
| 219 |
+
def full_steps(session_id, input_text, api_key):
|
| 220 |
+
json_script = generate_json_file(session_id, input_text, api_key)
|
| 221 |
+
return generate_audio(session_id, json_script, api_key)
|
share_btn.py
CHANGED
|
@@ -26,7 +26,7 @@ share_js = """async () => {
|
|
| 26 |
const res = await fetch(videoEl.src);
|
| 27 |
const blob = await res.blob();
|
| 28 |
const videoId = Date.now() % 200;
|
| 29 |
-
const fileName = `sd-perception-${
|
| 30 |
return new File([blob], fileName, { type: 'video/mp4' });
|
| 31 |
}
|
| 32 |
|
|
@@ -40,7 +40,7 @@ share_js = """async () => {
|
|
| 40 |
});
|
| 41 |
}
|
| 42 |
const gradioEl = document.querySelector("gradio-app").shadowRoot || document.querySelector('body > gradio-app');
|
| 43 |
-
const inputPromptEl = gradioEl.querySelector('#prompt-in
|
| 44 |
const outputVideoEl = gradioEl.querySelector('#output-video video');
|
| 45 |
|
| 46 |
let titleTxt = `WavJourney: ${inputPromptEl}`;
|
|
|
|
| 26 |
const res = await fetch(videoEl.src);
|
| 27 |
const blob = await res.blob();
|
| 28 |
const videoId = Date.now() % 200;
|
| 29 |
+
const fileName = `sd-perception-${videoId}.mp4`;
|
| 30 |
return new File([blob], fileName, { type: 'video/mp4' });
|
| 31 |
}
|
| 32 |
|
|
|
|
| 40 |
});
|
| 41 |
}
|
| 42 |
const gradioEl = document.querySelector("gradio-app").shadowRoot || document.querySelector('body > gradio-app');
|
| 43 |
+
const inputPromptEl = gradioEl.querySelector('#prompt-in textarea').value;
|
| 44 |
const outputVideoEl = gradioEl.querySelector('#output-video video');
|
| 45 |
|
| 46 |
let titleTxt = `WavJourney: ${inputPromptEl}`;
|
ui_client.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import shutil
|
| 2 |
import json5
|
|
|
|
| 3 |
|
| 4 |
-
import openai
|
| 5 |
import gradio as gr
|
| 6 |
from tabulate import tabulate
|
| 7 |
|
|
@@ -44,11 +44,13 @@ def convert_char_voice_map_to_md(char_voice_map):
|
|
| 44 |
def generate_script_fn(instruction, _state: gr.State):
|
| 45 |
try:
|
| 46 |
session_id = _state['session_id']
|
| 47 |
-
|
|
|
|
| 48 |
table_text = convert_json_to_md(json_script)
|
| 49 |
except Exception as e:
|
| 50 |
gr.Warning(str(e))
|
| 51 |
print(f"Generating script error: {str(e)}")
|
|
|
|
| 52 |
return [
|
| 53 |
None,
|
| 54 |
_state,
|
|
@@ -89,9 +91,8 @@ def generate_audio_fn(state):
|
|
| 89 |
]
|
| 90 |
except Exception as e:
|
| 91 |
print(f"Generation audio error: {str(e)}")
|
|
|
|
| 92 |
gr.Warning(str(e))
|
| 93 |
-
# For debugging, uncomment the line below
|
| 94 |
-
#raise e
|
| 95 |
|
| 96 |
return [
|
| 97 |
None,
|
|
@@ -172,8 +173,8 @@ def get_system_voice_presets():
|
|
| 172 |
return data
|
| 173 |
|
| 174 |
|
| 175 |
-
def set_openai_key(key):
|
| 176 |
-
|
| 177 |
return key
|
| 178 |
|
| 179 |
|
|
@@ -191,7 +192,10 @@ def add_voice_preset(vp_id, vp_desc, file, ui_state, added_voice_preset):
|
|
| 191 |
add_session_voice_preset(vp_id, vp_desc, file_path, session_id)
|
| 192 |
added_voice_preset['count'] = count + 1
|
| 193 |
except Exception as exception:
|
|
|
|
|
|
|
| 194 |
gr.Warning(str(exception))
|
|
|
|
| 195 |
# After added
|
| 196 |
dataframe = get_voice_preset_to_list(ui_state)
|
| 197 |
df_visible = gr.Dataframe.update(visible=True)
|
|
@@ -379,7 +383,7 @@ with gr.Blocks(css=css) as interface:
|
|
| 379 |
|
| 380 |
system_voice_presets = get_system_voice_presets()
|
| 381 |
# State
|
| 382 |
-
ui_state = gr.State(value={'session_id': pipeline.init_session()})
|
| 383 |
selected_voice_presets = gr.State(value={'selected_voice_preset': None})
|
| 384 |
added_voice_preset_state = gr.State(value={'added_file': None, 'count': 0})
|
| 385 |
# UI Component
|
|
@@ -461,7 +465,7 @@ with gr.Blocks(css=css) as interface:
|
|
| 461 |
)
|
| 462 |
|
| 463 |
# events
|
| 464 |
-
key_text_input.change(fn=set_openai_key, inputs=[key_text_input], outputs=[key_text_input])
|
| 465 |
text_input.change(fn=textbox_listener, inputs=[text_input], outputs=[generate_script_btn])
|
| 466 |
generate_audio_btn.click(
|
| 467 |
fn=generate_audio_fn,
|
|
|
|
| 1 |
import shutil
|
| 2 |
import json5
|
| 3 |
+
import traceback
|
| 4 |
|
|
|
|
| 5 |
import gradio as gr
|
| 6 |
from tabulate import tabulate
|
| 7 |
|
|
|
|
| 44 |
def generate_script_fn(instruction, _state: gr.State):
|
| 45 |
try:
|
| 46 |
session_id = _state['session_id']
|
| 47 |
+
api_key = _state['api_key']
|
| 48 |
+
json_script = generate_json_file(session_id, instruction, api_key)
|
| 49 |
table_text = convert_json_to_md(json_script)
|
| 50 |
except Exception as e:
|
| 51 |
gr.Warning(str(e))
|
| 52 |
print(f"Generating script error: {str(e)}")
|
| 53 |
+
traceback.print_exc()
|
| 54 |
return [
|
| 55 |
None,
|
| 56 |
_state,
|
|
|
|
| 91 |
]
|
| 92 |
except Exception as e:
|
| 93 |
print(f"Generation audio error: {str(e)}")
|
| 94 |
+
traceback.print_exc()
|
| 95 |
gr.Warning(str(e))
|
|
|
|
|
|
|
| 96 |
|
| 97 |
return [
|
| 98 |
None,
|
|
|
|
| 173 |
return data
|
| 174 |
|
| 175 |
|
| 176 |
+
def set_openai_key(key, _state):
|
| 177 |
+
_state['api_key'] = key
|
| 178 |
return key
|
| 179 |
|
| 180 |
|
|
|
|
| 192 |
add_session_voice_preset(vp_id, vp_desc, file_path, session_id)
|
| 193 |
added_voice_preset['count'] = count + 1
|
| 194 |
except Exception as exception:
|
| 195 |
+
print(exception)
|
| 196 |
+
traceback.print_exc()
|
| 197 |
gr.Warning(str(exception))
|
| 198 |
+
|
| 199 |
# After added
|
| 200 |
dataframe = get_voice_preset_to_list(ui_state)
|
| 201 |
df_visible = gr.Dataframe.update(visible=True)
|
|
|
|
| 383 |
|
| 384 |
system_voice_presets = get_system_voice_presets()
|
| 385 |
# State
|
| 386 |
+
ui_state = gr.State(value={'session_id': pipeline.init_session(), 'api_key': ''})
|
| 387 |
selected_voice_presets = gr.State(value={'selected_voice_preset': None})
|
| 388 |
added_voice_preset_state = gr.State(value={'added_file': None, 'count': 0})
|
| 389 |
# UI Component
|
|
|
|
| 465 |
)
|
| 466 |
|
| 467 |
# events
|
| 468 |
+
key_text_input.change(fn=set_openai_key, inputs=[key_text_input, ui_state], outputs=[key_text_input])
|
| 469 |
text_input.change(fn=textbox_listener, inputs=[text_input], outputs=[generate_script_btn])
|
| 470 |
generate_audio_btn.click(
|
| 471 |
fn=generate_audio_fn,
|
utils.py
CHANGED
|
@@ -62,6 +62,5 @@ def fade(audio_data, fade_duration=2, sr=32000):
|
|
| 62 |
def get_key(config='config.yaml'):
|
| 63 |
with open('config.yaml', 'r') as file:
|
| 64 |
config = yaml.safe_load(file)
|
| 65 |
-
|
| 66 |
-
return openai_key
|
| 67 |
|
|
|
|
| 62 |
def get_key(config='config.yaml'):
|
| 63 |
with open('config.yaml', 'r') as file:
|
| 64 |
config = yaml.safe_load(file)
|
| 65 |
+
return config['OpenAI-Key'] if 'OpenAI-Key' in config else None
|
|
|
|
| 66 |
|
wavjourney_cli.py
CHANGED
|
@@ -1,12 +1,14 @@
|
|
| 1 |
import time
|
| 2 |
import argparse
|
| 3 |
|
|
|
|
| 4 |
import pipeline
|
| 5 |
|
| 6 |
parser = argparse.ArgumentParser()
|
| 7 |
parser.add_argument('-f', '--full', action='store_true', help='Go through the full process')
|
| 8 |
parser.add_argument('--input-text', type=str, default='', help='input text or text file')
|
| 9 |
parser.add_argument('--session-id', type=str, default='', help='session id, if set to empty, system will allocate an id')
|
|
|
|
| 10 |
args = parser.parse_args()
|
| 11 |
|
| 12 |
if args.full:
|
|
@@ -14,10 +16,11 @@ if args.full:
|
|
| 14 |
|
| 15 |
start_time = time.time()
|
| 16 |
session_id = pipeline.init_session(args.session_id)
|
|
|
|
| 17 |
|
| 18 |
print(f"Session {session_id} is created.")
|
| 19 |
|
| 20 |
-
pipeline.full_steps(session_id, input_text)
|
| 21 |
end_time = time.time()
|
| 22 |
|
| 23 |
print(f"WavJourney took {end_time - start_time:.2f} seconds to complete.")
|
|
|
|
| 1 |
import time
|
| 2 |
import argparse
|
| 3 |
|
| 4 |
+
import utils
|
| 5 |
import pipeline
|
| 6 |
|
| 7 |
parser = argparse.ArgumentParser()
|
| 8 |
parser.add_argument('-f', '--full', action='store_true', help='Go through the full process')
|
| 9 |
parser.add_argument('--input-text', type=str, default='', help='input text or text file')
|
| 10 |
parser.add_argument('--session-id', type=str, default='', help='session id, if set to empty, system will allocate an id')
|
| 11 |
+
parser.add_argument('--api-key', type=str, default='', help='api key used for GPT-4')
|
| 12 |
args = parser.parse_args()
|
| 13 |
|
| 14 |
if args.full:
|
|
|
|
| 16 |
|
| 17 |
start_time = time.time()
|
| 18 |
session_id = pipeline.init_session(args.session_id)
|
| 19 |
+
api_key = args.api_key if args.api_key != '' else utils.get_key()
|
| 20 |
|
| 21 |
print(f"Session {session_id} is created.")
|
| 22 |
|
| 23 |
+
pipeline.full_steps(session_id, input_text, api_key)
|
| 24 |
end_time = time.time()
|
| 25 |
|
| 26 |
print(f"WavJourney took {end_time - start_time:.2f} seconds to complete.")
|