Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	Add MCP server (#6)
Browse files- Add MCP server (778db97e13636a827e72e88a60aaf9960bb3b2d7)
Co-authored-by: Apolinário from multimodal AI art <[email protected]>
    	
        app.py
    CHANGED
    
    | @@ -212,7 +212,16 @@ def normalize_text(transcript: str): | |
| 212 |  | 
| 213 | 
             
            @spaces.GPU
         | 
| 214 | 
             
            def initialize_engine(model_path, audio_tokenizer_path) -> bool:
         | 
| 215 | 
            -
                """ | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 216 | 
             
                global engine
         | 
| 217 | 
             
                try:
         | 
| 218 | 
             
                    logger.info(f"Initializing engine with model: {model_path} and audio tokenizer: {audio_tokenizer_path}")
         | 
| @@ -301,7 +310,26 @@ def text_to_speech( | |
| 301 | 
             
                ras_win_len=7,
         | 
| 302 | 
             
                ras_win_max_num_repeat=2,
         | 
| 303 | 
             
            ):
         | 
| 304 | 
            -
                """ | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 305 | 
             
                global engine
         | 
| 306 |  | 
| 307 | 
             
                if engine is None:
         | 
| @@ -518,6 +546,15 @@ def create_ui(): | |
| 518 |  | 
| 519 | 
             
                    # Function to play voice sample when clicking on a row
         | 
| 520 | 
             
                    def play_voice_sample(evt: gr.SelectData):
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 521 | 
             
                        try:
         | 
| 522 | 
             
                            # Get the preset name from the clicked row
         | 
| 523 | 
             
                            preset_names = [preset for preset in VOICE_PRESETS.keys() if preset != "EMPTY"]
         | 
| @@ -541,6 +578,16 @@ def create_ui(): | |
| 541 |  | 
| 542 | 
             
                    # Function to handle template selection
         | 
| 543 | 
             
                    def apply_template(template_name):
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 544 | 
             
                        if template_name in PREDEFINED_EXAMPLES:
         | 
| 545 | 
             
                            template = PREDEFINED_EXAMPLES[template_name]
         | 
| 546 | 
             
                            # Enable voice preset and custom reference only for voice-clone template
         | 
| @@ -642,8 +689,8 @@ def main(): | |
| 642 |  | 
| 643 | 
             
                # Create and launch the UI
         | 
| 644 | 
             
                demo = create_ui()
         | 
| 645 | 
            -
                demo.launch(server_name=args.host, server_port=args.port)
         | 
| 646 |  | 
| 647 |  | 
| 648 | 
             
            if __name__ == "__main__":
         | 
| 649 | 
            -
                main()
         | 
|  | |
| 212 |  | 
| 213 | 
             
            @spaces.GPU
         | 
| 214 | 
             
            def initialize_engine(model_path, audio_tokenizer_path) -> bool:
         | 
| 215 | 
            +
                """
         | 
| 216 | 
            +
                Initialize the HiggsAudioServeEngine with the specified model and tokenizer.
         | 
| 217 | 
            +
                
         | 
| 218 | 
            +
                Args:
         | 
| 219 | 
            +
                    model_path: Path to the model to load
         | 
| 220 | 
            +
                    audio_tokenizer_path: Path to the audio tokenizer to load
         | 
| 221 | 
            +
                    
         | 
| 222 | 
            +
                Returns:
         | 
| 223 | 
            +
                    True if initialization was successful, False otherwise
         | 
| 224 | 
            +
                """
         | 
| 225 | 
             
                global engine
         | 
| 226 | 
             
                try:
         | 
| 227 | 
             
                    logger.info(f"Initializing engine with model: {model_path} and audio tokenizer: {audio_tokenizer_path}")
         | 
|  | |
| 310 | 
             
                ras_win_len=7,
         | 
| 311 | 
             
                ras_win_max_num_repeat=2,
         | 
| 312 | 
             
            ):
         | 
| 313 | 
            +
                """
         | 
| 314 | 
            +
                Convert text to speech using HiggsAudioServeEngine.
         | 
| 315 | 
            +
                
         | 
| 316 | 
            +
                Args:
         | 
| 317 | 
            +
                    text: The text to convert to speech
         | 
| 318 | 
            +
                    voice_preset: The voice preset to use (or "EMPTY" for no preset)
         | 
| 319 | 
            +
                    reference_audio: Optional path to reference audio file
         | 
| 320 | 
            +
                    reference_text: Optional transcript of the reference audio
         | 
| 321 | 
            +
                    max_completion_tokens: Maximum number of tokens to generate
         | 
| 322 | 
            +
                    temperature: Sampling temperature for generation
         | 
| 323 | 
            +
                    top_p: Top-p sampling parameter
         | 
| 324 | 
            +
                    top_k: Top-k sampling parameter
         | 
| 325 | 
            +
                    system_prompt: System prompt to guide the model
         | 
| 326 | 
            +
                    stop_strings: Dataframe containing stop strings
         | 
| 327 | 
            +
                    ras_win_len: Window length for repetition avoidance sampling
         | 
| 328 | 
            +
                    ras_win_max_num_repeat: Maximum number of repetitions allowed in the window
         | 
| 329 | 
            +
                    
         | 
| 330 | 
            +
                Returns:
         | 
| 331 | 
            +
                    Tuple of (generated_text, (sample_rate, audio_data)) where audio_data is int16 numpy array
         | 
| 332 | 
            +
                """
         | 
| 333 | 
             
                global engine
         | 
| 334 |  | 
| 335 | 
             
                if engine is None:
         | 
|  | |
| 546 |  | 
| 547 | 
             
                    # Function to play voice sample when clicking on a row
         | 
| 548 | 
             
                    def play_voice_sample(evt: gr.SelectData):
         | 
| 549 | 
            +
                        """
         | 
| 550 | 
            +
                        Play a voice sample when a row is clicked in the voice samples table.
         | 
| 551 | 
            +
                        
         | 
| 552 | 
            +
                        Args:
         | 
| 553 | 
            +
                            evt: The select event containing the clicked row index
         | 
| 554 | 
            +
                            
         | 
| 555 | 
            +
                        Returns:
         | 
| 556 | 
            +
                            Path to the voice sample audio file, or None if not found
         | 
| 557 | 
            +
                        """
         | 
| 558 | 
             
                        try:
         | 
| 559 | 
             
                            # Get the preset name from the clicked row
         | 
| 560 | 
             
                            preset_names = [preset for preset in VOICE_PRESETS.keys() if preset != "EMPTY"]
         | 
|  | |
| 578 |  | 
| 579 | 
             
                    # Function to handle template selection
         | 
| 580 | 
             
                    def apply_template(template_name):
         | 
| 581 | 
            +
                        """
         | 
| 582 | 
            +
                        Apply a predefined template to the UI components.
         | 
| 583 | 
            +
                        
         | 
| 584 | 
            +
                        Args:
         | 
| 585 | 
            +
                            template_name: Name of the template to apply
         | 
| 586 | 
            +
                            
         | 
| 587 | 
            +
                        Returns:
         | 
| 588 | 
            +
                            Tuple of updated values for system_prompt, input_text, template_description,
         | 
| 589 | 
            +
                            voice_preset, custom_reference_accordion, voice_samples_section, and ras_win_len
         | 
| 590 | 
            +
                        """
         | 
| 591 | 
             
                        if template_name in PREDEFINED_EXAMPLES:
         | 
| 592 | 
             
                            template = PREDEFINED_EXAMPLES[template_name]
         | 
| 593 | 
             
                            # Enable voice preset and custom reference only for voice-clone template
         | 
|  | |
| 689 |  | 
| 690 | 
             
                # Create and launch the UI
         | 
| 691 | 
             
                demo = create_ui()
         | 
| 692 | 
            +
                demo.launch(server_name=args.host, server_port=args.port, mcp_server=True)
         | 
| 693 |  | 
| 694 |  | 
| 695 | 
             
            if __name__ == "__main__":
         | 
| 696 | 
            +
                main()
         | 
