Spaces:
Running
Running
| import os | |
| import shutil | |
| from huggingface_hub import snapshot_download | |
| import gradio as gr | |
| os.chdir(os.path.dirname(os.path.abspath(__file__))) | |
| from scripts.inference import inference_process | |
| import argparse | |
| import uuid | |
| hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models") | |
| def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)): | |
| unique_id = uuid.uuid4() | |
| args = argparse.Namespace( | |
| config='configs/inference/default.yaml', | |
| source_image=source_image, | |
| driving_audio=driving_audio, | |
| output=f'output-{unique_id}.mp4', | |
| pose_weight=1.0, | |
| face_weight=1.0, | |
| lip_weight=1.0, | |
| face_expand_ratio=1.2, | |
| checkpoint=None | |
| ) | |
| inference_process(args) | |
| return f'output-{unique_id}.mp4' | |
| iface = gr.Interface( | |
| title="Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation", | |
| description="Generate talking head avatars driven from audio. **every 10 seconds of generation takes ~1 minute** - duplicate the space for private use or try for free on Google Colab", | |
| fn=run_inference, | |
| inputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")], | |
| cache_examples=False, | |
| outputs="video" | |
| ) | |
| iface.launch(share=True) |