kratadata commited on
Commit
e1079c4
·
1 Parent(s): ab69a78
Files changed (6) hide show
  1. .DS_Store +0 -0
  2. deepfakes/.DS_Store +0 -0
  3. gui.py +75 -0
  4. requirements.txt +4 -0
  5. videoGenerate.py +120 -0
  6. voiceClone.py +38 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
deepfakes/.DS_Store ADDED
Binary file (6.15 kB). View file
 
gui.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import voiceClone as vc
3
+ import videoGenerate as vg
4
+
5
+ model_list = []
6
+ output_audio = None
7
+ image_error = None
8
+
9
+ # Function to create model from audio file
10
+ def create_voice_model(title, audio_file):
11
+ global model_list
12
+ new_model = vc.create_model(audio_file, title)
13
+ if new_model:
14
+ model_list.append((title, new_model)) # Store as (title, id)
15
+ return f"Voice model {title} created"
16
+
17
+ def update_dropdown_choices():
18
+ return gr.Dropdown(choices=[title for title, _ in model_list])
19
+
20
+ def on_model_select(selected_item):
21
+ return next((model_id for title, model_id in model_list if title == selected_item), None)
22
+
23
+ def generate_tts(model_id, text):
24
+ return vc.tts(model_id, text)
25
+
26
+ def create_talking_face(audio, image, aspect_ratio, resolution, text_prompt, seed):
27
+ output_path = vg.generate_video(audio, image, aspect_ratio, resolution, text_prompt, seed)
28
+ return output_path
29
+
30
+
31
+ # Gradio UI components
32
+ with gr.Blocks() as app:
33
+ gr.Markdown(" # Deepfake Generator")
34
+ gr.Markdown(""" ## Voice Cloning """)
35
+ with gr.Row():
36
+ with gr.Column(scale=1):
37
+ gr.Markdown("Generate Voice Model")
38
+ audio_input = gr.Audio(sources=["upload", "microphone"], label="Record Voice Sample", type="filepath", interactive = True)
39
+ title_input = gr.Textbox(label="Model Title", placeholder="Enter model title")
40
+ output_textbox = gr.Label(label="Output", value = "")
41
+ generate_model_button = gr.Button("Generate Voice Model")
42
+ generate_model_button.click(create_voice_model, inputs=[title_input, audio_input], outputs=output_textbox)
43
+
44
+ with gr.Column(scale=1):
45
+ gr.Markdown("Generate TTS")
46
+ update_models = gr.Button("Update Models")
47
+ reference_id = gr.Textbox(label="Model ID", interactive=False, visible=False)
48
+ model_dropdown = gr.Dropdown(label="Select Model", choices=[], interactive=True)
49
+ model_dropdown.change(fn=on_model_select, inputs=model_dropdown, outputs= reference_id)
50
+ update_models.click(update_dropdown_choices, outputs=model_dropdown)
51
+ text_input = gr.Textbox(label="Text for TTS", placeholder="Enter text to synthesize", lines=3)
52
+ tts_output = gr.Audio(label="TTS Output", type="filepath", interactive=False)
53
+ generate_tts_button = gr.Button("Generate TTS")
54
+ generate_tts_button.click(generate_tts, inputs=[reference_id, text_input], outputs=tts_output)
55
+
56
+
57
+ gr.Markdown(""" ## Avatar Creation""")
58
+ with gr.Row():
59
+ with gr.Column(scale=1):
60
+ image_input = gr.Image(label="Upload Image", type="filepath", sources=["upload", "webcam"], interactive=True)
61
+ aspect_ratio = gr.Radio(choices= ["1:1", "16:9", "9:16"], value= "1:1", label="Aspect Ratio")
62
+ resolution = gr.Radio(choices= ["540p", "720p"], value= "720p", label="Resolution")
63
+ text_prompt = gr.Textbox(label="Text Prompt", placeholder="Enter text prompt to describe your avatar", lines=3)
64
+ seed = gr.Slider(minimum=1, maximum=10000, value=None, label="Optional seed for generation (integer)")
65
+ with gr.Column(scale=1):
66
+ output_video = gr.Video(label="Talking Head")
67
+ generate_video_button = gr.Button("Generate Talking Face Avatar")
68
+ generate_video_button.click(create_talking_face, inputs=[tts_output, image_input, aspect_ratio, resolution, text_prompt, seed], outputs=output_video)
69
+
70
+ if __name__ == "__main__":
71
+ app.launch()
72
+
73
+
74
+
75
+
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ requests
2
+ ffmpeg
3
+ fish-audio-sdk
4
+
videoGenerate.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import logging
4
+ import requests
5
+
6
+ logger = logging.getLogger()
7
+ logging.basicConfig(level=logging.INFO)
8
+
9
+ api_key = os.getenv("HEDRA_API_KEY")
10
+
11
+ class Session(requests.Session):
12
+ def __init__(self, api_key: str):
13
+ super().__init__()
14
+
15
+ self.base_url: str = "https://api.hedra.com/web-app/public"
16
+ self.headers["x-api-key"] = api_key
17
+
18
+ #@override
19
+ def prepare_request(self, request: requests.Request) -> requests.PreparedRequest:
20
+ request.url = f"{self.base_url}{request.url}"
21
+ return super().prepare_request(request)
22
+
23
+
24
+ def generate_video(audio, image, aspect_ratio, resolution, text_prompt, seed):
25
+ global api_key
26
+ # Load environment variables from .env file
27
+ if not api_key:
28
+ print("HEDRA_API_KEY not found in environment variables or .env file.")
29
+ return
30
+
31
+
32
+ # Initialize Hedra client
33
+ session = Session(api_key=api_key)
34
+
35
+ logger.info("testing against %s", session.base_url)
36
+ model_id = session.get("/models").json()[0]["id"]
37
+ logger.info("got model id %s", model_id)
38
+
39
+ image_response = session.post(
40
+ "/assets",
41
+ json={"name": os.path.basename(image), "type": "image"},
42
+ )
43
+ if not image_response.ok:
44
+ logger.error(
45
+ "error creating image: %d %s",
46
+ image_response.status_code,
47
+ image_response.json(),
48
+ )
49
+ image_id = image_response.json()["id"]
50
+ with open(image, "rb") as f:
51
+ session.post(f"/assets/{image_id}/upload", files={"file": f}).raise_for_status()
52
+ logger.info("uploaded image %s", image_id)
53
+
54
+ audio_id = session.post(
55
+ "/assets", json={"name": os.path.basename(audio), "type": "audio"}
56
+ ).json()["id"]
57
+ with open(audio, "rb") as f:
58
+ session.post(f"/assets/{audio_id}/upload", files={"file": f}).raise_for_status()
59
+ logger.info("uploaded audio %s", audio_id)
60
+
61
+ generation_request_data = {
62
+ "type": "video",
63
+ "ai_model_id": model_id,
64
+ "start_keyframe_id": image_id,
65
+ "audio_id": audio_id,
66
+ "generated_video_inputs": {
67
+ "text_prompt": text_prompt,
68
+ "resolution": resolution,
69
+ "aspect_ratio": aspect_ratio,
70
+ },
71
+ }
72
+
73
+ # Add optional parameters if provided
74
+ if seed is not None:
75
+ generation_request_data["generated_video_inputs"]["seed"] = seed
76
+
77
+ generation_response = session.post(
78
+ "/generations", json=generation_request_data
79
+ ).json()
80
+ logger.info(generation_response)
81
+ generation_id = generation_response["id"]
82
+ while True:
83
+ status_response = session.get(f"/generations/{generation_id}/status").json()
84
+ logger.info("status response %s", status_response)
85
+ status = status_response["status"]
86
+
87
+ # --- Check for completion or error to break the loop ---
88
+ if status in ["complete", "error"]:
89
+ break
90
+
91
+ time.sleep(5)
92
+
93
+ # --- Process final status (download or log error) ---
94
+ if status == "complete" and status_response.get("url"):
95
+ download_url = status_response["url"]
96
+ # Use asset_id for filename if available, otherwise use generation_id
97
+ output_filename_base = status_response.get("asset_id", generation_id)
98
+ output_filename = f"{output_filename_base}.mp4"
99
+ logger.info(f"Generation complete. Downloading video from {download_url} to {output_filename}")
100
+ try:
101
+ # Use a fresh requests get, not the session, as the URL is likely presigned S3
102
+ with requests.get(download_url, stream=True) as r:
103
+ r.raise_for_status() # Check if the request was successful
104
+ with open(output_filename, 'wb') as f:
105
+ for chunk in r.iter_content(chunk_size=8192):
106
+ f.write(chunk)
107
+ logger.info(f"Successfully downloaded video to {output_filename}")
108
+ except requests.exceptions.RequestException as e:
109
+ logger.error(f"Failed to download video: {e}")
110
+ except IOError as e:
111
+ logger.error(f"Failed to save video file: {e}")
112
+ elif status == "error":
113
+ logger.error(f"Video generation failed: {status_response.get('error_message', 'Unknown error')}")
114
+ else:
115
+ # This case might happen if loop breaks unexpectedly or API changes
116
+ logger.warning(f"Video generation finished with status '{status}' but no download URL was found.")
117
+
118
+ return output_filename if 'output_filename' in locals() else None
119
+
120
+
voiceClone.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fish_audio_sdk import Session, TTSRequest
2
+ import os
3
+ import tempfile
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ fish_api_key = os.getenv("FISH_API_KEY")
9
+ session = Session(fish_api_key)
10
+
11
+ def create_model(audio_file, title:str="test"):
12
+ with open(audio_file, "rb") as voice:
13
+ print(voice)
14
+ model = session.create_model(
15
+ title=title,
16
+ description=" ",
17
+ voices=[voice.read()]
18
+ )
19
+ return model.id
20
+
21
+ def tts(model_id, input_text):
22
+ audio_chunks = []
23
+ tts_request = TTSRequest(reference_id=model_id, text=input_text)
24
+ #print(tts_request)
25
+ # Collect audio chunks for TTS
26
+ for chunk in session.tts(tts_request):
27
+ audio_chunks.append(chunk)
28
+
29
+ # Write audio chunks to a temporary file to serve as output
30
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tts_audio:
31
+ for chunk in audio_chunks:
32
+ tts_audio.write(chunk)
33
+ audio_path = tts_audio.name
34
+
35
+ return audio_path
36
+
37
+
38
+