import os import subprocess import sys from pathlib import Path # --- 0. Hardcoded Toggle for Execution Environment --- # Set this to True to use Hugging Face ZeroGPU (recommended) # Set this to False to use the slower, pure CPU environment USE_ZEROGPU = True # --- 1. Clone the VibeVoice Repository --- repo_dir = "VibeVoice" if not os.path.exists(repo_dir): print("Cloning the VibeVoice repository...") try: subprocess.run( ["git", "clone", "https://github.com/microsoft/VibeVoice.git"], check=True, capture_output=True, text=True ) print("Repository cloned successfully.") except subprocess.CalledProcessError as e: print(f"Error cloning repository: {e.stderr}") sys.exit(1) else: print("Repository already exists. Skipping clone.") # --- 2. Install the VibeVoice Package --- # Note: Other dependencies are installed via requirements.txt os.chdir(repo_dir) print(f"Changed directory to: {os.getcwd()}") print("Installing the VibeVoice package in editable mode...") try: subprocess.run( [sys.executable, "-m", "pip", "install", "-e", "."], check=True, capture_output=True, text=True ) print("Package installed successfully.") except subprocess.CalledProcessError as e: print(f"Error installing package: {e.stderr}") sys.exit(1) # --- 3. Modify the demo script to be environment-aware --- demo_script_path = Path("demo/gradio_demo.py") print(f"Reading {demo_script_path} to apply environment-specific modifications...") try: file_content = demo_script_path.read_text() # Define the original model loading block that we need to replace. # This block is problematic because it hardcodes FlashAttention. original_block = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained( self.model_path, torch_dtype=torch.bfloat16, device_map='cuda', attn_implementation="flash_attention_2", )""" if USE_ZEROGPU: print("Optimizing for ZeroGPU execution...") # New block for ZeroGPU: We remove the problematic `attn_implementation` line. # `transformers` will automatically use the best available attention mechanism. replacement_block_gpu = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained( self.model_path, torch_dtype=torch.bfloat16, device_map='cuda', )""" # Add 'import spaces' at the beginning of the file for the @spaces.GPU decorator modified_content = "import spaces\n" + file_content # Decorate the main interface class to request a GPU from the Spaces infrastructure modified_content = modified_content.replace( "class VibeVoiceGradioInterface:", "@spaces.GPU(duration=120)\nclass VibeVoiceGradioInterface:" ) # Replace the model loading block modified_content = modified_content.replace(original_block, replacement_block_gpu) print("Script modified for ZeroGPU successfully.") else: # Pure CPU execution print("Modifying for pure CPU execution...") # New block for CPU: Use float32 and map directly to the CPU. replacement_block_cpu = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained( self.model_path, torch_dtype=torch.float32, # Use float32 for CPU device_map="cpu", )""" # Replace the original model loading block with the CPU version modified_content = file_content.replace(original_block, replacement_block_cpu) print("Script modified for CPU successfully.") # Write the dynamically modified content back to the demo file demo_script_path.write_text(modified_content) except Exception as e: print(f"An error occurred while modifying the script: {e}") sys.exit(1) # --- 4. Launch the Gradio Demo --- model_id = "microsoft/VibeVoice-1.5B" # Construct the command to run the modified demo script command = [ "python", str(demo_script_path), "--model_path", model_id, "--share" ] print(f"Launching Gradio demo with command: {' '.join(command)}") subprocess.run(command)