Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Commit 
							
							·
						
						38d85c1
	
1
								Parent(s):
							
							9f8c873
								
Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -3,7 +3,7 @@ import os | |
| 3 |  | 
| 4 | 
             
            import gradio as gr
         | 
| 5 | 
             
            import pytube as pt
         | 
| 6 | 
            -
            from  | 
| 7 |  | 
| 8 | 
             
            MODEL_NAME = "openai/whisper-tiny"
         | 
| 9 |  | 
| @@ -16,22 +16,21 @@ pipe = ASRDiarizationPipeline.from_pretrained( | |
| 16 | 
             
                use_auth_token=HF_TOKEN,
         | 
| 17 | 
             
            )
         | 
| 18 |  | 
| 19 | 
            -
            def  | 
| 20 | 
            -
                 | 
| 21 | 
            -
                if (microphone is not None) and (file_upload is not None):
         | 
| 22 | 
            -
                    warn_output = (
         | 
| 23 | 
            -
                        "WARNING: You've uploaded an audio file and used the microphone. "
         | 
| 24 | 
            -
                        "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
         | 
| 25 | 
            -
                    )
         | 
| 26 |  | 
| 27 | 
            -
                elif (microphone is None) and (file_upload is None):
         | 
| 28 | 
            -
                    return "ERROR: You have to either use the microphone or upload an audio file"
         | 
| 29 |  | 
| 30 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
| 31 |  | 
| 32 | 
            -
                text = pipe(file)
         | 
| 33 |  | 
| 34 | 
            -
             | 
|  | |
|  | |
|  | |
| 35 |  | 
| 36 |  | 
| 37 | 
             
            def _return_yt_html_embed(yt_url):
         | 
| @@ -43,7 +42,7 @@ def _return_yt_html_embed(yt_url): | |
| 43 | 
             
                return HTML_str
         | 
| 44 |  | 
| 45 |  | 
| 46 | 
            -
            def yt_transcribe(yt_url):
         | 
| 47 | 
             
                yt = pt.YouTube(yt_url)
         | 
| 48 | 
             
                html_embed_str = _return_yt_html_embed(yt_url)
         | 
| 49 | 
             
                stream = yt.streams.filter(only_audio=True)[0]
         | 
| @@ -51,7 +50,7 @@ def yt_transcribe(yt_url): | |
| 51 |  | 
| 52 | 
             
                text = pipe("audio.mp3")
         | 
| 53 |  | 
| 54 | 
            -
                return html_embed_str,  | 
| 55 |  | 
| 56 |  | 
| 57 | 
             
            demo = gr.Blocks()
         | 
| @@ -59,37 +58,43 @@ demo = gr.Blocks() | |
| 59 | 
             
            mf_transcribe = gr.Interface(
         | 
| 60 | 
             
                fn=transcribe,
         | 
| 61 | 
             
                inputs=[
         | 
| 62 | 
            -
                    gr.inputs.Audio(source="microphone", type="filepath", optional=True),
         | 
| 63 | 
             
                    gr.inputs.Audio(source="upload", type="filepath", optional=True),
         | 
|  | |
| 64 | 
             
                ],
         | 
| 65 | 
             
                outputs="text",
         | 
| 66 | 
             
                layout="horizontal",
         | 
| 67 | 
             
                theme="huggingface",
         | 
| 68 | 
             
                title="Whisper Demo: Transcribe Audio",
         | 
| 69 | 
             
                description=(
         | 
| 70 | 
            -
                    "Transcribe  | 
| 71 | 
            -
                    f"  | 
| 72 | 
            -
                    "  | 
| 73 | 
            -
                ) | 
| 74 | 
             
                allow_flagging="never",
         | 
| 75 | 
             
            )
         | 
| 76 |  | 
| 77 | 
             
            yt_transcribe = gr.Interface(
         | 
| 78 | 
             
                fn=yt_transcribe,
         | 
| 79 | 
            -
                inputs=[ | 
|  | |
|  | |
|  | |
| 80 | 
             
                outputs=["html", "text"],
         | 
| 81 | 
             
                layout="horizontal",
         | 
| 82 | 
             
                theme="huggingface",
         | 
| 83 | 
            -
                title="Whisper Demo: Transcribe YouTube",
         | 
| 84 | 
             
                description=(
         | 
| 85 | 
            -
                    "Transcribe  | 
| 86 | 
            -
                    f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME})  | 
| 87 | 
            -
                    "  | 
| 88 | 
            -
                ) | 
|  | |
|  | |
|  | |
| 89 | 
             
                allow_flagging="never",
         | 
| 90 | 
             
            )
         | 
| 91 |  | 
| 92 | 
             
            with demo:
         | 
| 93 | 
             
                gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
         | 
| 94 |  | 
| 95 | 
            -
            demo.launch(enable_queue=True)
         | 
|  | |
| 3 |  | 
| 4 | 
             
            import gradio as gr
         | 
| 5 | 
             
            import pytube as pt
         | 
| 6 | 
            +
            from asr_diarize import ASRDiarizationPipeline  # TODO: speechbox import
         | 
| 7 |  | 
| 8 | 
             
            MODEL_NAME = "openai/whisper-tiny"
         | 
| 9 |  | 
|  | |
| 16 | 
             
                use_auth_token=HF_TOKEN,
         | 
| 17 | 
             
            )
         | 
| 18 |  | 
| 19 | 
            +
            def tuple_to_string(start_end_tuple, ndigits=1):
         | 
| 20 | 
            +
                return str((round(start_end_tuple[0], ndigits), round(start_end_tuple[1], ndigits)))
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 21 |  | 
|  | |
|  | |
| 22 |  | 
| 23 | 
            +
            def format_as_transcription(raw_segments, with_timestamps=False):
         | 
| 24 | 
            +
                if with_timestamps:
         | 
| 25 | 
            +
                    return "\n\n".join([chunk["speaker"] + " " + tuple_to_string(chunk["timestamp"]) +  chunk["text"] for chunk in raw_segments])
         | 
| 26 | 
            +
                else:
         | 
| 27 | 
            +
                    return "\n\n".join([chunk["speaker"] + chunk["text"] for chunk in raw_segments])
         | 
| 28 |  | 
|  | |
| 29 |  | 
| 30 | 
            +
            def transcribe(file_upload, with_timestamps):
         | 
| 31 | 
            +
                raw_segments = pipe(file_upload)
         | 
| 32 | 
            +
                transcription = format_as_transcription(raw_segments, with_timestamps=with_timestamps)
         | 
| 33 | 
            +
                return transcription
         | 
| 34 |  | 
| 35 |  | 
| 36 | 
             
            def _return_yt_html_embed(yt_url):
         | 
|  | |
| 42 | 
             
                return HTML_str
         | 
| 43 |  | 
| 44 |  | 
| 45 | 
            +
            def yt_transcribe(yt_url, with_timestamps):
         | 
| 46 | 
             
                yt = pt.YouTube(yt_url)
         | 
| 47 | 
             
                html_embed_str = _return_yt_html_embed(yt_url)
         | 
| 48 | 
             
                stream = yt.streams.filter(only_audio=True)[0]
         | 
|  | |
| 50 |  | 
| 51 | 
             
                text = pipe("audio.mp3")
         | 
| 52 |  | 
| 53 | 
            +
                return html_embed_str, format_as_transcription(text, with_timestamps=with_timestamps)
         | 
| 54 |  | 
| 55 |  | 
| 56 | 
             
            demo = gr.Blocks()
         | 
|  | |
| 58 | 
             
            mf_transcribe = gr.Interface(
         | 
| 59 | 
             
                fn=transcribe,
         | 
| 60 | 
             
                inputs=[
         | 
|  | |
| 61 | 
             
                    gr.inputs.Audio(source="upload", type="filepath", optional=True),
         | 
| 62 | 
            +
                    gr.Checkbox(label="With timestamps?", value=True),
         | 
| 63 | 
             
                ],
         | 
| 64 | 
             
                outputs="text",
         | 
| 65 | 
             
                layout="horizontal",
         | 
| 66 | 
             
                theme="huggingface",
         | 
| 67 | 
             
                title="Whisper Demo: Transcribe Audio",
         | 
| 68 | 
             
                description=(
         | 
| 69 | 
            +
                    "Transcribe audio files with speaker diarization using 🤗 Speechbox. Demo uses the pre-trained checkpoint"
         | 
| 70 | 
            +
                    f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) for ASR transcriptions and [PyAnnote Audio](https://huggingface.co/pyannote/speaker-diarization)"
         | 
| 71 | 
            +
                    " to label the speakers."
         | 
| 72 | 
            +
                )
         | 
| 73 | 
             
                allow_flagging="never",
         | 
| 74 | 
             
            )
         | 
| 75 |  | 
| 76 | 
             
            yt_transcribe = gr.Interface(
         | 
| 77 | 
             
                fn=yt_transcribe,
         | 
| 78 | 
            +
                inputs=[
         | 
| 79 | 
            +
                    gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
         | 
| 80 | 
            +
                    gr.Checkbox(label="With timestamps?", value=True),
         | 
| 81 | 
            +
                ],
         | 
| 82 | 
             
                outputs=["html", "text"],
         | 
| 83 | 
             
                layout="horizontal",
         | 
| 84 | 
             
                theme="huggingface",
         | 
| 85 | 
            +
                title="Whisper Speaker Diarization Demo: Transcribe YouTube",
         | 
| 86 | 
             
                description=(
         | 
| 87 | 
            +
                    "Transcribe YouTube videos with speaker diarization using 🤗 Speechbox. Demo uses the pre-trained checkpoint"
         | 
| 88 | 
            +
                    f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) for ASR transcriptions and [PyAnnote Audio](https://huggingface.co/pyannote/speaker-diarization)"
         | 
| 89 | 
            +
                    " to label the speakers."
         | 
| 90 | 
            +
                )
         | 
| 91 | 
            +
                examples=[
         | 
| 92 | 
            +
                    ["https://www.youtube.com/watch?v=9dAWIPixYxc", True],
         | 
| 93 | 
            +
                ],
         | 
| 94 | 
             
                allow_flagging="never",
         | 
| 95 | 
             
            )
         | 
| 96 |  | 
| 97 | 
             
            with demo:
         | 
| 98 | 
             
                gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
         | 
| 99 |  | 
| 100 | 
            +
            demo.launch(enable_queue=True, share=True)
         | 
 
			

