import os, sys import tempfile import gradio as gr from src.gradio_demo import SadTalker from huggingface_hub import snapshot_download def get_source_image(image): return image try: import webui in_webui = True except: in_webui = False def toggle_audio_file(choice): if choice == False: return gr.update(visible=True), gr.update(visible=False) else: return gr.update(visible=False), gr.update(visible=True) def ref_video_fn(path_of_ref_video): if path_of_ref_video is not None: return gr.update(value=True) else: return gr.update(value=False) def download_model(): REPO_ID = 'ashishninehertz/modelforfacetalk' snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True) def animatalk_demo(): download_model() animatalker = SadTalker(lazy_load=True) custom_theme = gr.themes.Default( primary_hue="blue", secondary_hue="teal", neutral_hue="slate", radius_size="md", font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"], ).set( button_primary_background_fill="linear-gradient(90deg, #2563eb 0%, #7c3aed 100%)", button_primary_background_fill_hover="linear-gradient(90deg, #1d4ed8 0%, #6d28d9 100%)", button_primary_text_color="white", button_primary_background_fill_dark="linear-gradient(90deg, #3b82f6 0%, #8b5cf6 100%)", ) with gr.Blocks(theme=custom_theme, analytics_enabled=False) as animatalk_interface: gr.HTML("""

AnimaTalk: AI-Powered Talking Face Animation

Bring your images to life with realistic facials animations

""") with gr.Row(): with gr.Column(variant='panel', min_width=500): with gr.Tabs(): with gr.TabItem('Source Image', id="source_image"): source_image = gr.Image( label="Upload your image", source="upload", type="filepath", elem_id="source_img", interactive=True ) with gr.Tabs(): # with gr.TabItem('Animation Settings', id="driven_audio"): # gr.Markdown(""" #
# Animation Options:
# 1. Audio only
# 2. Audio + Reference Video
# 3. Idle animation only
# 4. Reference video only #
# """) with gr.Row(): driven_audio = gr.Audio( label="Upload audio file", source="upload", type="filepath", max_length=180, visible=True ) driven_audio_no = gr.Audio( label="No audio needed for idle mode", source="upload", type="filepath", visible=False ) with gr.Column(): use_idle_mode = gr.Checkbox( label="Enable Idle Animation", info="Generate animation without audio" ) length_of_audio = gr.Number( value=5, label="Video length (seconds)", precision=0 ) use_idle_mode.change( toggle_audio_file, inputs=use_idle_mode, outputs=[driven_audio, driven_audio_no] ) with gr.Row(): ref_video = gr.Video( label="Reference Video (optional)", source="upload", type="filepath", interactive=True ) with gr.Column(): use_ref_video = gr.Checkbox( label="Use Reference Video", value=False ) ref_info = gr.Radio( ['pose', 'blink', 'pose+blink', 'all'], value='pose', label='Reference Style', info="Select which aspects to copy from reference" ) ref_video.change( ref_video_fn, inputs=ref_video, outputs=[use_ref_video] ) with gr.Column(variant='panel'): with gr.Tabs(): with gr.TabItem('Generation Settings'): with gr.Column(variant='panel'): with gr.Row(): pose_style = gr.Slider( minimum=0, maximum=45, step=1, label="Pose Style Intensity", value=0 ) exp_weight = gr.Slider( minimum=0, maximum=3, step=0.1, label="Expression Strength", value=1 ) blink_every = gr.Checkbox( label="Enable Eye Blinking", value=True ) with gr.Row(): size_of_image = gr.Radio( [256, 512], value=256, label='Face Resolution', info="Higher resolution needs more processing power" ) preprocess_type = gr.Radio( ['crop', 'resize', 'full', 'extcrop', 'extfull'], value='crop', label='Image Processing', info="How to handle the input image" ) with gr.Row(): is_still_mode = gr.Checkbox( label="Still Mode", info="Reduces head movement for more stable results" ) facerender = gr.Radio( ['facevid2vid', 'pirender'], value='facevid2vid', label='Rendering Engine' ) with gr.Row(): batch_size = gr.Slider( label="Generation Batch Size", step=1, maximum=10, value=1, info="Higher values process more at once" ) enhancer = gr.Checkbox( label="Enable Face Enhancer", info="Improves face quality with GFPGAN" ) submit = gr.Button( 'Generate Animation', variant='primary', elem_classes="gradient-border" ) with gr.Tabs(): gen_video = gr.Video( label="Generated Animation", format="mp4", autoplay=True, elem_id="output_video" ) submit.click( fn=animatalker.test, inputs=[ source_image, driven_audio, preprocess_type, is_still_mode, enhancer, batch_size, size_of_image, pose_style, facerender, exp_weight, use_ref_video, ref_video, ref_info, use_idle_mode, length_of_audio, blink_every ], outputs=[gen_video], ) # with gr.Row(): # examples = [ # [ # 'examples/source_image/full_body_1.png', # 'examples/driven_audio/bus_chinese.wav', # 'crop', # True, # False # ], # [ # 'examples/source_image/full_body_2.png', # 'examples/driven_audio/japanese.wav', # 'crop', # False, # False # ], # [ # 'examples/source_image/full3.png', # 'examples/driven_audio/deyu.wav', # 'crop', # False, # True # ], # [ # 'examples/source_image/full4.jpeg', # 'examples/driven_audio/eluosi.wav', # 'full', # False, # True # ], # [ # 'examples/source_image/full4.jpeg', # 'examples/driven_audio/imagine.wav', # 'full', # True, # True # ], # [ # 'examples/source_image/full_body_1.png', # 'examples/driven_audio/bus_chinese.wav', # 'full', # True, # False # ], # [ # 'examples/source_image/art_13.png', # 'examples/driven_audio/fayu.wav', # 'resize', # True, # False # ], # [ # 'examples/source_image/art_5.png', # 'examples/driven_audio/chinese_news.wav', # 'resize', # False, # False # ], # [ # 'examples/source_image/art_5.png', # 'examples/driven_audio/RD_Radio31_000.wav', # 'resize', # True, # True # ], # ] # gr.Examples( # examples=examples, # inputs=[ # source_image, # driven_audio, # preprocess_type, # is_still_mode, # enhancer # ], # outputs=[gen_video], # fn=animatalker.test, # cache_examples=os.getenv('SYSTEM') == 'spaces' # ) return animatalk_interface if __name__ == "__main__": demo = animatalk_demo() demo.queue(max_size=10, api_open=True) demo.launch(debug=True)