import os, sys
import tempfile
import gradio as gr
from src.gradio_demo import SadTalker
from huggingface_hub import snapshot_download
def get_source_image(image):
return image
try:
import webui
in_webui = True
except:
in_webui = False
def toggle_audio_file(choice):
if choice == False:
return gr.update(visible=True), gr.update(visible=False)
else:
return gr.update(visible=False), gr.update(visible=True)
def ref_video_fn(path_of_ref_video):
if path_of_ref_video is not None:
return gr.update(value=True)
else:
return gr.update(value=False)
def download_model():
REPO_ID = 'ashishninehertz/modelforfacetalk'
snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
def animatalk_demo():
download_model()
animatalker = SadTalker(lazy_load=True)
custom_theme = gr.themes.Default(
primary_hue="blue",
secondary_hue="teal",
neutral_hue="slate",
radius_size="md",
font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
).set(
button_primary_background_fill="linear-gradient(90deg, #2563eb 0%, #7c3aed 100%)",
button_primary_background_fill_hover="linear-gradient(90deg, #1d4ed8 0%, #6d28d9 100%)",
button_primary_text_color="white",
button_primary_background_fill_dark="linear-gradient(90deg, #3b82f6 0%, #8b5cf6 100%)",
)
with gr.Blocks(theme=custom_theme, analytics_enabled=False) as animatalk_interface:
gr.HTML("""
AnimaTalk: AI-Powered Talking Face Animation
Bring your images to life with realistic facials animations
""")
with gr.Row():
with gr.Column(variant='panel', min_width=500):
with gr.Tabs():
with gr.TabItem('Source Image', id="source_image"):
source_image = gr.Image(
label="Upload your image",
source="upload",
type="filepath",
elem_id="source_img",
interactive=True
)
with gr.Tabs():
# with gr.TabItem('Animation Settings', id="driven_audio"):
# gr.Markdown("""
#
# Animation Options:
# 1. Audio only
# 2. Audio + Reference Video
# 3. Idle animation only
# 4. Reference video only
#
# """)
with gr.Row():
driven_audio = gr.Audio(
label="Upload audio file",
source="upload",
type="filepath",
max_length=180,
visible=True
)
driven_audio_no = gr.Audio(
label="No audio needed for idle mode",
source="upload",
type="filepath",
visible=False
)
with gr.Column():
use_idle_mode = gr.Checkbox(
label="Enable Idle Animation",
info="Generate animation without audio"
)
length_of_audio = gr.Number(
value=5,
label="Video length (seconds)",
precision=0
)
use_idle_mode.change(
toggle_audio_file,
inputs=use_idle_mode,
outputs=[driven_audio, driven_audio_no]
)
with gr.Row():
ref_video = gr.Video(
label="Reference Video (optional)",
source="upload",
type="filepath",
interactive=True
)
with gr.Column():
use_ref_video = gr.Checkbox(
label="Use Reference Video",
value=False
)
ref_info = gr.Radio(
['pose', 'blink', 'pose+blink', 'all'],
value='pose',
label='Reference Style',
info="Select which aspects to copy from reference"
)
ref_video.change(
ref_video_fn,
inputs=ref_video,
outputs=[use_ref_video]
)
with gr.Column(variant='panel'):
with gr.Tabs():
with gr.TabItem('Generation Settings'):
with gr.Column(variant='panel'):
with gr.Row():
pose_style = gr.Slider(
minimum=0,
maximum=45,
step=1,
label="Pose Style Intensity",
value=0
)
exp_weight = gr.Slider(
minimum=0,
maximum=3,
step=0.1,
label="Expression Strength",
value=1
)
blink_every = gr.Checkbox(
label="Enable Eye Blinking",
value=True
)
with gr.Row():
size_of_image = gr.Radio(
[256, 512],
value=256,
label='Face Resolution',
info="Higher resolution needs more processing power"
)
preprocess_type = gr.Radio(
['crop', 'resize', 'full', 'extcrop', 'extfull'],
value='crop',
label='Image Processing',
info="How to handle the input image"
)
with gr.Row():
is_still_mode = gr.Checkbox(
label="Still Mode",
info="Reduces head movement for more stable results"
)
facerender = gr.Radio(
['facevid2vid', 'pirender'],
value='facevid2vid',
label='Rendering Engine'
)
with gr.Row():
batch_size = gr.Slider(
label="Generation Batch Size",
step=1,
maximum=10,
value=1,
info="Higher values process more at once"
)
enhancer = gr.Checkbox(
label="Enable Face Enhancer",
info="Improves face quality with GFPGAN"
)
submit = gr.Button(
'Generate Animation',
variant='primary',
elem_classes="gradient-border"
)
with gr.Tabs():
gen_video = gr.Video(
label="Generated Animation",
format="mp4",
autoplay=True,
elem_id="output_video"
)
submit.click(
fn=animatalker.test,
inputs=[
source_image,
driven_audio,
preprocess_type,
is_still_mode,
enhancer,
batch_size,
size_of_image,
pose_style,
facerender,
exp_weight,
use_ref_video,
ref_video,
ref_info,
use_idle_mode,
length_of_audio,
blink_every
],
outputs=[gen_video],
)
# with gr.Row():
# examples = [
# [
# 'examples/source_image/full_body_1.png',
# 'examples/driven_audio/bus_chinese.wav',
# 'crop',
# True,
# False
# ],
# [
# 'examples/source_image/full_body_2.png',
# 'examples/driven_audio/japanese.wav',
# 'crop',
# False,
# False
# ],
# [
# 'examples/source_image/full3.png',
# 'examples/driven_audio/deyu.wav',
# 'crop',
# False,
# True
# ],
# [
# 'examples/source_image/full4.jpeg',
# 'examples/driven_audio/eluosi.wav',
# 'full',
# False,
# True
# ],
# [
# 'examples/source_image/full4.jpeg',
# 'examples/driven_audio/imagine.wav',
# 'full',
# True,
# True
# ],
# [
# 'examples/source_image/full_body_1.png',
# 'examples/driven_audio/bus_chinese.wav',
# 'full',
# True,
# False
# ],
# [
# 'examples/source_image/art_13.png',
# 'examples/driven_audio/fayu.wav',
# 'resize',
# True,
# False
# ],
# [
# 'examples/source_image/art_5.png',
# 'examples/driven_audio/chinese_news.wav',
# 'resize',
# False,
# False
# ],
# [
# 'examples/source_image/art_5.png',
# 'examples/driven_audio/RD_Radio31_000.wav',
# 'resize',
# True,
# True
# ],
# ]
# gr.Examples(
# examples=examples,
# inputs=[
# source_image,
# driven_audio,
# preprocess_type,
# is_still_mode,
# enhancer
# ],
# outputs=[gen_video],
# fn=animatalker.test,
# cache_examples=os.getenv('SYSTEM') == 'spaces'
# )
return animatalk_interface
if __name__ == "__main__":
demo = animatalk_demo()
demo.queue(max_size=10, api_open=True)
demo.launch(debug=True)