Spaces:
Running
Running
fix formatting
Browse files- app.py +0 -1
- gui.py +0 -75
- voiceClone.py +0 -3
app.py
CHANGED
@@ -56,7 +56,6 @@ with gr.Blocks() as app:
|
|
56 |
|
57 |
gr.Markdown(""" ## Avatar Creation""")
|
58 |
with gr.Row():
|
59 |
-
gr.Markdown("Set Deepfake Options")
|
60 |
with gr.Column(scale=1):
|
61 |
image_input = gr.Image(label="Upload Image", type="filepath", sources=["upload", "webcam"], interactive=True)
|
62 |
aspect_ratio = gr.Radio(choices= ["1:1", "16:9", "9:16"], value= "1:1", label="Aspect Ratio")
|
|
|
56 |
|
57 |
gr.Markdown(""" ## Avatar Creation""")
|
58 |
with gr.Row():
|
|
|
59 |
with gr.Column(scale=1):
|
60 |
image_input = gr.Image(label="Upload Image", type="filepath", sources=["upload", "webcam"], interactive=True)
|
61 |
aspect_ratio = gr.Radio(choices= ["1:1", "16:9", "9:16"], value= "1:1", label="Aspect Ratio")
|
gui.py
DELETED
@@ -1,75 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import voiceClone as vc
|
3 |
-
import videoGenerate as vg
|
4 |
-
|
5 |
-
model_list = []
|
6 |
-
output_audio = None
|
7 |
-
image_error = None
|
8 |
-
|
9 |
-
# Function to create model from audio file
|
10 |
-
def create_voice_model(title, audio_file):
|
11 |
-
global model_list
|
12 |
-
new_model = vc.create_model(audio_file, title)
|
13 |
-
if new_model:
|
14 |
-
model_list.append((title, new_model)) # Store as (title, id)
|
15 |
-
return f"Voice model {title} created"
|
16 |
-
|
17 |
-
def update_dropdown_choices():
|
18 |
-
return gr.Dropdown(choices=[title for title, _ in model_list])
|
19 |
-
|
20 |
-
def on_model_select(selected_item):
|
21 |
-
return next((model_id for title, model_id in model_list if title == selected_item), None)
|
22 |
-
|
23 |
-
def generate_tts(model_id, text):
|
24 |
-
return vc.tts(model_id, text)
|
25 |
-
|
26 |
-
def create_talking_face(audio, image, aspect_ratio, resolution, text_prompt, seed):
|
27 |
-
output_path = vg.generate_video(audio, image, aspect_ratio, resolution, text_prompt, seed)
|
28 |
-
return output_path
|
29 |
-
|
30 |
-
|
31 |
-
# Gradio UI components
|
32 |
-
with gr.Blocks() as app:
|
33 |
-
gr.Markdown(" # Deepfake Generator")
|
34 |
-
gr.Markdown(""" ## Voice Cloning """)
|
35 |
-
with gr.Row():
|
36 |
-
with gr.Column(scale=1):
|
37 |
-
gr.Markdown("Generate Voice Model")
|
38 |
-
audio_input = gr.Audio(sources=["upload", "microphone"], label="Record Voice Sample", type="filepath", interactive = True)
|
39 |
-
title_input = gr.Textbox(label="Model Title", placeholder="Enter model title")
|
40 |
-
output_textbox = gr.Label(label="Output", value = "")
|
41 |
-
generate_model_button = gr.Button("Generate Voice Model")
|
42 |
-
generate_model_button.click(create_voice_model, inputs=[title_input, audio_input], outputs=output_textbox)
|
43 |
-
|
44 |
-
with gr.Column(scale=1):
|
45 |
-
gr.Markdown("Generate TTS")
|
46 |
-
update_models = gr.Button("Update Models")
|
47 |
-
reference_id = gr.Textbox(label="Model ID", interactive=False, visible=False)
|
48 |
-
model_dropdown = gr.Dropdown(label="Select Model", choices=[], interactive=True)
|
49 |
-
model_dropdown.change(fn=on_model_select, inputs=model_dropdown, outputs= reference_id)
|
50 |
-
update_models.click(update_dropdown_choices, outputs=model_dropdown)
|
51 |
-
text_input = gr.Textbox(label="Text for TTS", placeholder="Enter text to synthesize", lines=3)
|
52 |
-
tts_output = gr.Audio(label="TTS Output", type="filepath", interactive=False)
|
53 |
-
generate_tts_button = gr.Button("Generate TTS")
|
54 |
-
generate_tts_button.click(generate_tts, inputs=[reference_id, text_input], outputs=tts_output)
|
55 |
-
|
56 |
-
|
57 |
-
gr.Markdown(""" ## Avatar Creation""")
|
58 |
-
with gr.Row():
|
59 |
-
with gr.Column(scale=1):
|
60 |
-
image_input = gr.Image(label="Upload Image", type="filepath", sources=["upload", "webcam"], interactive=True)
|
61 |
-
aspect_ratio = gr.Radio(choices= ["1:1", "16:9", "9:16"], value= "1:1", label="Aspect Ratio")
|
62 |
-
resolution = gr.Radio(choices= ["540p", "720p"], value= "720p", label="Resolution")
|
63 |
-
text_prompt = gr.Textbox(label="Text Prompt", placeholder="Enter text prompt to describe your avatar", lines=3)
|
64 |
-
seed = gr.Slider(minimum=1, maximum=10000, value=None, label="Optional seed for generation (integer)")
|
65 |
-
with gr.Column(scale=1):
|
66 |
-
output_video = gr.Video(label="Talking Head")
|
67 |
-
generate_video_button = gr.Button("Generate Talking Face Avatar")
|
68 |
-
generate_video_button.click(create_talking_face, inputs=[tts_output, image_input, aspect_ratio, resolution, text_prompt, seed], outputs=output_video)
|
69 |
-
|
70 |
-
if __name__ == "__main__":
|
71 |
-
app.launch()
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
voiceClone.py
CHANGED
@@ -1,9 +1,6 @@
|
|
1 |
from fish_audio_sdk import Session, TTSRequest
|
2 |
import os
|
3 |
import tempfile
|
4 |
-
from dotenv import load_dotenv
|
5 |
-
|
6 |
-
load_dotenv()
|
7 |
|
8 |
fish_api_key = os.getenv("FISH_API_KEY")
|
9 |
session = Session(fish_api_key)
|
|
|
1 |
from fish_audio_sdk import Session, TTSRequest
|
2 |
import os
|
3 |
import tempfile
|
|
|
|
|
|
|
4 |
|
5 |
fish_api_key = os.getenv("FISH_API_KEY")
|
6 |
session = Session(fish_api_key)
|