Spaces:
Running
Running
init
Browse files- .DS_Store +0 -0
- deepfakes/.DS_Store +0 -0
- gui.py +75 -0
- requirements.txt +4 -0
- videoGenerate.py +120 -0
- voiceClone.py +38 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
deepfakes/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
gui.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import voiceClone as vc
|
3 |
+
import videoGenerate as vg
|
4 |
+
|
5 |
+
model_list = []
|
6 |
+
output_audio = None
|
7 |
+
image_error = None
|
8 |
+
|
9 |
+
# Function to create model from audio file
|
10 |
+
def create_voice_model(title, audio_file):
|
11 |
+
global model_list
|
12 |
+
new_model = vc.create_model(audio_file, title)
|
13 |
+
if new_model:
|
14 |
+
model_list.append((title, new_model)) # Store as (title, id)
|
15 |
+
return f"Voice model {title} created"
|
16 |
+
|
17 |
+
def update_dropdown_choices():
|
18 |
+
return gr.Dropdown(choices=[title for title, _ in model_list])
|
19 |
+
|
20 |
+
def on_model_select(selected_item):
|
21 |
+
return next((model_id for title, model_id in model_list if title == selected_item), None)
|
22 |
+
|
23 |
+
def generate_tts(model_id, text):
|
24 |
+
return vc.tts(model_id, text)
|
25 |
+
|
26 |
+
def create_talking_face(audio, image, aspect_ratio, resolution, text_prompt, seed):
|
27 |
+
output_path = vg.generate_video(audio, image, aspect_ratio, resolution, text_prompt, seed)
|
28 |
+
return output_path
|
29 |
+
|
30 |
+
|
31 |
+
# Gradio UI components
|
32 |
+
with gr.Blocks() as app:
|
33 |
+
gr.Markdown(" # Deepfake Generator")
|
34 |
+
gr.Markdown(""" ## Voice Cloning """)
|
35 |
+
with gr.Row():
|
36 |
+
with gr.Column(scale=1):
|
37 |
+
gr.Markdown("Generate Voice Model")
|
38 |
+
audio_input = gr.Audio(sources=["upload", "microphone"], label="Record Voice Sample", type="filepath", interactive = True)
|
39 |
+
title_input = gr.Textbox(label="Model Title", placeholder="Enter model title")
|
40 |
+
output_textbox = gr.Label(label="Output", value = "")
|
41 |
+
generate_model_button = gr.Button("Generate Voice Model")
|
42 |
+
generate_model_button.click(create_voice_model, inputs=[title_input, audio_input], outputs=output_textbox)
|
43 |
+
|
44 |
+
with gr.Column(scale=1):
|
45 |
+
gr.Markdown("Generate TTS")
|
46 |
+
update_models = gr.Button("Update Models")
|
47 |
+
reference_id = gr.Textbox(label="Model ID", interactive=False, visible=False)
|
48 |
+
model_dropdown = gr.Dropdown(label="Select Model", choices=[], interactive=True)
|
49 |
+
model_dropdown.change(fn=on_model_select, inputs=model_dropdown, outputs= reference_id)
|
50 |
+
update_models.click(update_dropdown_choices, outputs=model_dropdown)
|
51 |
+
text_input = gr.Textbox(label="Text for TTS", placeholder="Enter text to synthesize", lines=3)
|
52 |
+
tts_output = gr.Audio(label="TTS Output", type="filepath", interactive=False)
|
53 |
+
generate_tts_button = gr.Button("Generate TTS")
|
54 |
+
generate_tts_button.click(generate_tts, inputs=[reference_id, text_input], outputs=tts_output)
|
55 |
+
|
56 |
+
|
57 |
+
gr.Markdown(""" ## Avatar Creation""")
|
58 |
+
with gr.Row():
|
59 |
+
with gr.Column(scale=1):
|
60 |
+
image_input = gr.Image(label="Upload Image", type="filepath", sources=["upload", "webcam"], interactive=True)
|
61 |
+
aspect_ratio = gr.Radio(choices= ["1:1", "16:9", "9:16"], value= "1:1", label="Aspect Ratio")
|
62 |
+
resolution = gr.Radio(choices= ["540p", "720p"], value= "720p", label="Resolution")
|
63 |
+
text_prompt = gr.Textbox(label="Text Prompt", placeholder="Enter text prompt to describe your avatar", lines=3)
|
64 |
+
seed = gr.Slider(minimum=1, maximum=10000, value=None, label="Optional seed for generation (integer)")
|
65 |
+
with gr.Column(scale=1):
|
66 |
+
output_video = gr.Video(label="Talking Head")
|
67 |
+
generate_video_button = gr.Button("Generate Talking Face Avatar")
|
68 |
+
generate_video_button.click(create_talking_face, inputs=[tts_output, image_input, aspect_ratio, resolution, text_prompt, seed], outputs=output_video)
|
69 |
+
|
70 |
+
if __name__ == "__main__":
|
71 |
+
app.launch()
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
requests
|
2 |
+
ffmpeg
|
3 |
+
fish-audio-sdk
|
4 |
+
|
videoGenerate.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
import logging
|
4 |
+
import requests
|
5 |
+
|
6 |
+
logger = logging.getLogger()
|
7 |
+
logging.basicConfig(level=logging.INFO)
|
8 |
+
|
9 |
+
api_key = os.getenv("HEDRA_API_KEY")
|
10 |
+
|
11 |
+
class Session(requests.Session):
|
12 |
+
def __init__(self, api_key: str):
|
13 |
+
super().__init__()
|
14 |
+
|
15 |
+
self.base_url: str = "https://api.hedra.com/web-app/public"
|
16 |
+
self.headers["x-api-key"] = api_key
|
17 |
+
|
18 |
+
#@override
|
19 |
+
def prepare_request(self, request: requests.Request) -> requests.PreparedRequest:
|
20 |
+
request.url = f"{self.base_url}{request.url}"
|
21 |
+
return super().prepare_request(request)
|
22 |
+
|
23 |
+
|
24 |
+
def generate_video(audio, image, aspect_ratio, resolution, text_prompt, seed):
|
25 |
+
global api_key
|
26 |
+
# Load environment variables from .env file
|
27 |
+
if not api_key:
|
28 |
+
print("HEDRA_API_KEY not found in environment variables or .env file.")
|
29 |
+
return
|
30 |
+
|
31 |
+
|
32 |
+
# Initialize Hedra client
|
33 |
+
session = Session(api_key=api_key)
|
34 |
+
|
35 |
+
logger.info("testing against %s", session.base_url)
|
36 |
+
model_id = session.get("/models").json()[0]["id"]
|
37 |
+
logger.info("got model id %s", model_id)
|
38 |
+
|
39 |
+
image_response = session.post(
|
40 |
+
"/assets",
|
41 |
+
json={"name": os.path.basename(image), "type": "image"},
|
42 |
+
)
|
43 |
+
if not image_response.ok:
|
44 |
+
logger.error(
|
45 |
+
"error creating image: %d %s",
|
46 |
+
image_response.status_code,
|
47 |
+
image_response.json(),
|
48 |
+
)
|
49 |
+
image_id = image_response.json()["id"]
|
50 |
+
with open(image, "rb") as f:
|
51 |
+
session.post(f"/assets/{image_id}/upload", files={"file": f}).raise_for_status()
|
52 |
+
logger.info("uploaded image %s", image_id)
|
53 |
+
|
54 |
+
audio_id = session.post(
|
55 |
+
"/assets", json={"name": os.path.basename(audio), "type": "audio"}
|
56 |
+
).json()["id"]
|
57 |
+
with open(audio, "rb") as f:
|
58 |
+
session.post(f"/assets/{audio_id}/upload", files={"file": f}).raise_for_status()
|
59 |
+
logger.info("uploaded audio %s", audio_id)
|
60 |
+
|
61 |
+
generation_request_data = {
|
62 |
+
"type": "video",
|
63 |
+
"ai_model_id": model_id,
|
64 |
+
"start_keyframe_id": image_id,
|
65 |
+
"audio_id": audio_id,
|
66 |
+
"generated_video_inputs": {
|
67 |
+
"text_prompt": text_prompt,
|
68 |
+
"resolution": resolution,
|
69 |
+
"aspect_ratio": aspect_ratio,
|
70 |
+
},
|
71 |
+
}
|
72 |
+
|
73 |
+
# Add optional parameters if provided
|
74 |
+
if seed is not None:
|
75 |
+
generation_request_data["generated_video_inputs"]["seed"] = seed
|
76 |
+
|
77 |
+
generation_response = session.post(
|
78 |
+
"/generations", json=generation_request_data
|
79 |
+
).json()
|
80 |
+
logger.info(generation_response)
|
81 |
+
generation_id = generation_response["id"]
|
82 |
+
while True:
|
83 |
+
status_response = session.get(f"/generations/{generation_id}/status").json()
|
84 |
+
logger.info("status response %s", status_response)
|
85 |
+
status = status_response["status"]
|
86 |
+
|
87 |
+
# --- Check for completion or error to break the loop ---
|
88 |
+
if status in ["complete", "error"]:
|
89 |
+
break
|
90 |
+
|
91 |
+
time.sleep(5)
|
92 |
+
|
93 |
+
# --- Process final status (download or log error) ---
|
94 |
+
if status == "complete" and status_response.get("url"):
|
95 |
+
download_url = status_response["url"]
|
96 |
+
# Use asset_id for filename if available, otherwise use generation_id
|
97 |
+
output_filename_base = status_response.get("asset_id", generation_id)
|
98 |
+
output_filename = f"{output_filename_base}.mp4"
|
99 |
+
logger.info(f"Generation complete. Downloading video from {download_url} to {output_filename}")
|
100 |
+
try:
|
101 |
+
# Use a fresh requests get, not the session, as the URL is likely presigned S3
|
102 |
+
with requests.get(download_url, stream=True) as r:
|
103 |
+
r.raise_for_status() # Check if the request was successful
|
104 |
+
with open(output_filename, 'wb') as f:
|
105 |
+
for chunk in r.iter_content(chunk_size=8192):
|
106 |
+
f.write(chunk)
|
107 |
+
logger.info(f"Successfully downloaded video to {output_filename}")
|
108 |
+
except requests.exceptions.RequestException as e:
|
109 |
+
logger.error(f"Failed to download video: {e}")
|
110 |
+
except IOError as e:
|
111 |
+
logger.error(f"Failed to save video file: {e}")
|
112 |
+
elif status == "error":
|
113 |
+
logger.error(f"Video generation failed: {status_response.get('error_message', 'Unknown error')}")
|
114 |
+
else:
|
115 |
+
# This case might happen if loop breaks unexpectedly or API changes
|
116 |
+
logger.warning(f"Video generation finished with status '{status}' but no download URL was found.")
|
117 |
+
|
118 |
+
return output_filename if 'output_filename' in locals() else None
|
119 |
+
|
120 |
+
|
voiceClone.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fish_audio_sdk import Session, TTSRequest
|
2 |
+
import os
|
3 |
+
import tempfile
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
fish_api_key = os.getenv("FISH_API_KEY")
|
9 |
+
session = Session(fish_api_key)
|
10 |
+
|
11 |
+
def create_model(audio_file, title:str="test"):
|
12 |
+
with open(audio_file, "rb") as voice:
|
13 |
+
print(voice)
|
14 |
+
model = session.create_model(
|
15 |
+
title=title,
|
16 |
+
description=" ",
|
17 |
+
voices=[voice.read()]
|
18 |
+
)
|
19 |
+
return model.id
|
20 |
+
|
21 |
+
def tts(model_id, input_text):
|
22 |
+
audio_chunks = []
|
23 |
+
tts_request = TTSRequest(reference_id=model_id, text=input_text)
|
24 |
+
#print(tts_request)
|
25 |
+
# Collect audio chunks for TTS
|
26 |
+
for chunk in session.tts(tts_request):
|
27 |
+
audio_chunks.append(chunk)
|
28 |
+
|
29 |
+
# Write audio chunks to a temporary file to serve as output
|
30 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tts_audio:
|
31 |
+
for chunk in audio_chunks:
|
32 |
+
tts_audio.write(chunk)
|
33 |
+
audio_path = tts_audio.name
|
34 |
+
|
35 |
+
return audio_path
|
36 |
+
|
37 |
+
|
38 |
+
|