In [1]:
# @title 0. Installations
# Install necessary packages (run this cell once if needed)

!pip install gradio torch torchvision torchaudio openai-whisper soundfile parler-tts transformers google-generativeai numpy librosa flash-attn

Collecting gradio
  Downloading gradio-5.30.0-py3-none-any.whl.metadata (16 kB)
Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting parler-tts
  Downloading parler_tts-0.2.3.tar.gz (80 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.2/80.2 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting flash-attn
  Downloading flash_attn-2.7.4.post1.tar.gz (6.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m83.5 MB/s[0m eta 

In [2]:
# @title 1. Setup and Imports

import asyncio
import base64
import io
import logging
import os
from threading import Thread, Event
import time
import queue # For streamer's queue.Empty exception

import soundfile as sf
import torch
import whisper
from parler_tts import ParlerTTSForConditionalGeneration, ParlerTTSStreamer
from transformers import AutoTokenizer, GenerationConfig as HFGeLE
import google.generativeai as genai
# from google.colab import userdata
# from flash_attn_triton import FlashAttention

import gradio as gr

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
WHISPER_MODEL_SIZE = "tiny"
TTS_MODEL_NAME = "ai4bharat/indic-parler-tts"
attention_implementation = "sdpa" # Also try with flash_attention_2
GEMINI_MODEL_NAME_NOTEBOOK = "gemini-1.5-flash-latest"
GOOGLE_API_KEY = "AIzaSyD6x3Yoby4eQ6QL2kaaG_Rz3fG3rh7wPB8"
torch_dtype_tts = torch.bfloat16 if DEVICE == "cuda" and torch.cuda.is_bf16_supported() else (torch.float16 if DEVICE == "cuda" else torch.float32)
torch_dtype_whisper = torch.float16 if DEVICE == "cuda" else torch.float32
# userdata.get('GOOGLE_API_KEY')

TTS_STREAMING_PARAMS_NOTEBOOK = {
    "do_sample": True,
    "temperature": 1.0,
    "min_new_tokens": 5,
}

# --- Logging ---
logging.basicConfig(level=logging.INFO)
logger_nb = logging.getLogger("notebook_ai_pipeline") # Use a specific logger for the notebook
logger_nb.setLevel(logging.INFO)


# --- Global Model Variables for Notebook ---
whisper_model_nb = None
gemini_model_instance_nb = None
tts_model_nb = None
tts_tokenizer_nb = None

In [3]:
# @title 2. Model Loading Functions
def load_all_resources_notebook():
    global whisper_model_nb, tts_model_nb, tts_tokenizer_nb, gemini_model_instance_nb
    logger_nb.info(f"Notebook: Loading models. Whisper on {DEVICE} with {torch_dtype_whisper}, TTS on {DEVICE} with {torch_dtype_tts}")

    if whisper_model_nb is None:
        logger_nb.info(f"Notebook: Loading Whisper model: {WHISPER_MODEL_SIZE}")
        whisper_model_nb = whisper.load_model(WHISPER_MODEL_SIZE, device=DEVICE)
        logger_nb.info("Notebook: Whisper model loaded successfully.")

    if tts_model_nb is None:
        logger_nb.info(f"Notebook: Loading IndicParler-TTS model: {TTS_MODEL_NAME}")
        tts_model_nb = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL_NAME).to(DEVICE, dtype=torch_dtype_tts)
        tts_tokenizer_nb = AutoTokenizer.from_pretrained(TTS_MODEL_NAME)
        if tts_tokenizer_nb and tts_tokenizer_nb.pad_token_id is not None:
            TTS_STREAMING_PARAMS_NOTEBOOK["pad_token_id"] = tts_tokenizer_nb.pad_token_id
        logger_nb.info(f"Notebook: IndicParler-TTS model loaded. Streaming params: {TTS_STREAMING_PARAMS_NOTEBOOK}")

    if gemini_model_instance_nb is None:
        if not GOOGLE_API_KEY or GOOGLE_API_KEY != "AIzaSyD6x3Yoby4eQ6QL2kaaG_Rz3fG3rh7wPB8":
            logger_nb.warning("Notebook: GOOGLE_API_KEY not found or not replaced. LLM functionality will be limited.")
        else:
            try:
                genai.configure(api_key=GOOGLE_API_KEY)
                gemini_model_instance_nb = genai.GenerativeModel(GEMINI_MODEL_NAME_NOTEBOOK)
                logger_nb.info(f"Notebook: Gemini API configured with model: {GEMINI_MODEL_NAME_NOTEBOOK}")
            except Exception as e:
                logger_nb.error(f"Notebook: Failed to configure Gemini API: {e}", exc_info=True)
                gemini_model_instance_nb = None
    logger_nb.info("Notebook: All resources loaded (or attempted).")

In [4]:
# @title 3. Helper Functions for AI Pipeline
async def transcribe_audio_notebook(audio_input_tuple):
    if not whisper_model_nb:
        logger_nb.error("Notebook STT: Whisper model not loaded.")
        return "Error: Whisper model not loaded."

    if audio_input_tuple is None:
        logger_nb.warning("Notebook STT: No audio provided.")
        return "No audio provided."

    sample_rate, audio_numpy = audio_input_tuple

    if audio_numpy is None or audio_numpy.size == 0:
        logger_nb.warning("Notebook STT: Audio numpy array is empty.")
        return "Empty audio received."

    # Ensure audio is mono float32, which is a common expectation
    if audio_numpy.ndim > 1:
        if audio_numpy.shape[0] == 2 and audio_numpy.ndim == 2:
            audio_numpy = librosa.to_mono(audio_numpy)
        elif audio_numpy.shape[1] == 2 and audio_numpy.ndim == 2:
            audio_numpy = np.mean(audio_numpy, axis=1)

    if audio_numpy.dtype != np.float32:
        if np.issubdtype(audio_numpy.dtype, np.integer):
            audio_numpy = audio_numpy.astype(np.float32) / np.iinfo(audio_numpy.dtype).max
        else:
            audio_numpy = audio_numpy.astype(np.float32)

    # Whisper expects audio at its own sampling rate (usually 16kHz, but it handles resampling)
    # However, providing it correctly can sometimes be better.
    # For simplicity, we let Whisper handle resampling from the input sample_rate.

    try:
        logger_nb.info(f"Notebook STT: Transcribing audio of shape {audio_numpy.shape} with original sample rate {sample_rate}")
        # Whisper's transcribe method can take a numpy array directly
        result = whisper_model_nb.transcribe(audio_numpy, sample_rate=sample_rate, fp16=(DEVICE == "cuda" and torch_dtype_whisper == torch.float16))
        transcribed_text = result["text"].strip()
        logger_nb.info(f"Notebook STT: Transcription: {transcribed_text}")
        return transcribed_text if transcribed_text else "Transcription resulted in empty text."
    except Exception as e:
        logger_nb.error(f"Notebook STT: Error during transcription: {e}", exc_info=True)
        return f"Error during transcription: {str(e)}"

async def generate_gemini_response_notebook(text: str):
    if not gemini_model_instance_nb:
        logger_nb.error("Notebook LLM: Gemini model instance not available.")
        return "Sorry, the language model is currently unavailable (Gemini not configured)."
    if not isinstance(text, str) or not text.strip() or text.startswith("Error:") or "No audio provided" in text or "Empty audio" in text:
        logger_nb.warning(f"Notebook LLM: Invalid input for Gemini: '{text}'. Skipping.")
        return "LLM (Gemini) skipped due to prior error or no input."
    try:
        full_prompt = f"User: {text}\nAssistant:"
        logger_nb.info(f"Notebook LLM: Sending prompt to Gemini: \"{full_prompt[:100]}...\"")

        # Running blocking IO in an executor for async compatibility in notebook if needed
        loop = asyncio.get_event_loop()
        response = await loop.run_in_executor(None, gemini_model_instance_nb.generate_content, full_prompt)

        response_text = "I'm sorry, I couldn't generate a response for that (Gemini)."
        if hasattr(response, 'text') and response.text:
            response_text = response.text.strip()
        elif hasattr(response, 'parts') and response.parts:
             response_text = "".join(part.text for part in response.parts).strip()
        elif response.candidates and response.candidates[0].content.parts:
            response_text = response.candidates[0].content.parts[0].text.strip()
        else:
            safety_feedback = ""
            if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
                 safety_feedback = f" Safety Feedback: {response.prompt_feedback}"
            elif response.candidates and hasattr(response.candidates[0], 'finish_reason') and response.candidates[0].finish_reason != "STOP":
                 safety_feedback = f" Finish Reason: {response.candidates[0].finish_reason}"
            logger_nb.warning(f"Notebook LLM: Gemini response might be empty or blocked.{safety_feedback}")
        logger_nb.info(f"Notebook LLM: Gemini Response: {response_text}")
        return response_text
    except Exception as e:
        logger_nb.error(f"Notebook LLM: Error during Gemini generation: {e}", exc_info=True)
        return f"Sorry, I encountered an error trying to respond with Gemini: {str(e)}"

async def synthesize_speech_streaming_notebook(text: str, description: str = "A clear, female voice speaking in English.", play_steps_in_s: float = 0.4):
    if not tts_model_nb or not tts_tokenizer_nb:
        logger_nb.error("Notebook TTS: Model or tokenizer not loaded.")
        yield None, None # Yield None for sample_rate, None for chunk
        return

    if not isinstance(text, str) or not text.strip() or text.startswith("Error:") or "LLM skipped" in text or "unavailable" in text:
        logger_nb.warning(f"Notebook TTS: Invalid input text for TTS: '{text}'. Yielding no audio.")
        yield None, None
        return

    streamer = None
    thread = None
    # This event is to signal the main loop that the generation thread is done/exited
    thread_done_event = Event()

    try:
        logger_nb.info(f"Notebook TTS Streamer: Starting for text: \"{text[:50]}...\"")

        if hasattr(tts_model_nb.config, 'audio_encoder') and hasattr(tts_model_nb.config.audio_encoder, 'sampling_rate'):
            sampling_rate = tts_model_nb.config.audio_encoder.sampling_rate
        else:
            logger_nb.warning("Notebook TTS Streamer: Could not find sampling_rate, defaulting to 24000")
            sampling_rate = 24000

        try:
            frame_rate = getattr(tts_model_nb.config.audio_encoder, 'frame_rate', 100)
        except AttributeError:
            logger_nb.warning("Notebook TTS Streamer: frame_rate not found, using default of 100 Hz.")
            frame_rate = 100

        play_steps = int(frame_rate * play_steps_in_s)
        if play_steps == 0 : play_steps = 1

        logger_nb.info(f"Notebook TTS Streamer: params: sampling_rate={sampling_rate}, frame_rate={frame_rate}, play_steps={play_steps}")

        streamer = ParlerTTSStreamer(tts_model_nb, device=DEVICE, play_steps=play_steps)
        description_inputs = tts_tokenizer_nb(description, return_tensors="pt")
        prompt_inputs = tts_tokenizer_nb(text, return_tensors="pt")

        current_streaming_params = TTS_STREAMING_PARAMS_NOTEBOOK.copy()
        if tts_tokenizer_nb.pad_token_id is not None: # Ensure pad_token_id is set if model expects it
            current_streaming_params["pad_token_id"] = tts_tokenizer_nb.pad_token_id

        thread_generation_kwargs = {
            "input_ids": description_inputs.input_ids.to(DEVICE),
            "prompt_input_ids": prompt_inputs.input_ids.to(DEVICE),
            "attention_mask": description_inputs.attention_mask.to(DEVICE) if hasattr(description_inputs, 'attention_mask') else None,
            "streamer": streamer,
            **current_streaming_params
        }
        if thread_generation_kwargs["attention_mask"] is None:
            del thread_generation_kwargs["attention_mask"]

        def _generate_in_thread_notebook():
            try:
                logger_nb.info(f"Notebook TTS generation thread: Started for text \"{text[:30]}...\"")
                with torch.no_grad():
                     tts_model_nb.generate(**thread_generation_kwargs)
                logger_nb.info(f"Notebook TTS generation thread: Finished model.generate() for text \"{text[:30]}...\"")
            except Exception as e_thread:
                logger_nb.error(f"Notebook TTS generation thread: Error: {e_thread}", exc_info=True)
            finally:
                if streamer: streamer.end()
                logger_nb.info(f"Notebook TTS generation thread: Called streamer.end() for text \"{text[:30]}...\"")
                thread_done_event.set()

        thread = Thread(target=_generate_in_thread_notebook)
        thread.daemon = True
        thread.start()

        loop = asyncio.get_event_loop() # Get current loop for notebook
        streamer_iter_count = 0

        # Yield sample rate once at the beginning
        yield sampling_rate, None # Signal sample rate, no audio chunk yet

        while not thread_done_event.is_set():
            audio_chunk_tensor = None
            try:
                logger_nb.debug(f"Notebook TTS Streamer: Attempting to get chunk {streamer_iter_count}...")
                # Use run_in_executor for the blocking queue get
                audio_chunk_tensor = await loop.run_in_executor(None, lambda: streamer.audio_queue.get(timeout=0.1)) # Shorter timeout

                if audio_chunk_tensor is None:
                    logger_nb.info("Notebook TTS Streamer: Yielded None, assuming end from producer.")
                    # thread_done_event might not be set yet if this None is the final sentinel
                    if not streamer.is_active: # Check if streamer itself thinks it's done
                        break
                    continue # If streamer is active but yielded None, it's unusual, maybe wait

                if not isinstance(audio_chunk_tensor, torch.Tensor) or audio_chunk_tensor.numel() == 0:
                    logger_nb.debug("Notebook TTS Streamer: Yielded empty or non-tensor chunk.")
                    await asyncio.sleep(0.01) # Brief sleep
                    continue

                audio_chunk_np = audio_chunk_tensor.cpu().to(torch.float32).numpy().squeeze()
                if audio_chunk_np.size == 0:
                    continue

                # For Gradio, we yield the numpy array directly with the sample rate
                yield None, audio_chunk_np # No sample rate on subsequent chunks, only audio
                streamer_iter_count += 1

            except queue.Empty: # Timeout from streamer.audio_queue.get
                logger_nb.debug("Notebook TTS Streamer: Queue empty, checking thread status.")
                if not thread.is_alive() and not thread_done_event.is_set(): # Thread died before signaling
                    logger_nb.warning("Notebook TTS Streamer: Generation thread died unexpectedly. Ending stream.")
                    thread_done_event.set()
                continue # Loop again to check thread_done_event
            except StopIteration: # Should not happen if streamer.end() is used correctly by thread
                logger_nb.info("Notebook TTS Streamer: Finished (StopIteration).")
                thread_done_event.set()
                break
            except Exception as e_stream_iter:
                logger_nb.error(f"Notebook TTS Streamer: Error iterating streamer: {e_stream_iter}", exc_info=True)
                thread_done_event.set()
                break

        logger_nb.info(f"Notebook TTS Streamer: Finished iteration. Yielded {streamer_iter_count} chunks.")

    except Exception as e:
        logger_nb.error(f"Notebook TTS Streamer: Error in main function: {e}", exc_info=True)
        yield None, None
    finally:
        logger_nb.info(f"Notebook TTS Streamer: Exiting for text \"{text[:50]}...\".")
        if streamer: streamer.end()
        if thread and thread.is_alive():
            logger_nb.info("Notebook TTS Streamer: Waiting for thread in finally...")
            thread.join(timeout=2.0)
            if thread.is_alive():
                logger_nb.warning("Notebook TTS Streamer: Thread still alive after join timeout.")
        yield None, None # Signal end of stream

In [5]:
# @title 4. Gradio Interface Definition and Pipeline

# Load models once when this cell is run (or re-run)
if whisper_model_nb is None or tts_model_nb is None or gemini_model_instance_nb is None: # Basic check to prevent re-loading if cell is run multiple times
    load_all_resources_notebook()

async def full_ai_pipeline_notebook(audio_input_microphone):
    """
    Gradio function that processes audio input through STT, LLM (Gemini),
    and yields streaming TTS audio chunks for Gradio's streaming audio output.
    """
    logger_nb.info("Gradio Pipeline: Started.")

    # 1. STT
    if audio_input_microphone is None:
        yield "Please provide audio input.", "Recording not provided.", (TARGET_SAMPLE_RATE, np.array([0.0], dtype=np.float32)) # Empty audio
        return

    transcribed_text = await transcribe_audio_notebook(audio_input_microphone)
    logger_nb.info(f"Gradio Pipeline: Transcription: {transcribed_text}")
    yield transcribed_text, "Processing with LLM...", (TARGET_SAMPLE_RATE, np.array([0.0], dtype=np.float32)) # Update UI

    # 2. LLM (Gemini)
    if transcribed_text.startswith("Error:") or "No audio provided" in transcribed_text or "Empty audio" in transcribed_text:
        llm_response_text = "Cannot proceed with LLM due to STT error."
    else:
        llm_response_text = await generate_gemini_response_notebook(transcribed_text)
    logger_nb.info(f"Gradio Pipeline: LLM Response: {llm_response_text}")
    yield transcribed_text, llm_response_text, (TARGET_SAMPLE_RATE, np.array([0.0], dtype=np.float32)) # Update UI

    # 3. TTS Streaming
    if llm_response_text.startswith("Error:") or "LLM skipped" in llm_response_text or "unavailable" in llm_response_text:
        logger_nb.warning("Gradio Pipeline: Skipping TTS due to LLM error.")
        final_llm_text_with_tts_status = f"{llm_response_text} (TTS Skipped)"
        yield transcribed_text, final_llm_text_with_tts_status, (TARGET_SAMPLE_RATE, np.array([0.0], dtype=np.float32))
        return

    tts_description = "A clear, female voice speaking in English."

    # For Gradio's streaming audio output, we yield (sample_rate, chunk_np_array)
    # The first yield should be (sample_rate, None) to set the rate.
    # Subsequent yields are (None, chunk_np_array).
    # Final yield is (None, None) to signal end.

    first_chunk = True
    async for sr, audio_chunk_np in synthesize_speech_streaming_notebook(llm_response_text, tts_description):
        if sr is not None and first_chunk: # First yield with sample rate
            logger_nb.info(f"Gradio Pipeline: TTS Stream - Yielding sample rate {sr}")
            yield transcribed_text, llm_response_text, (sr, None)
            first_chunk = False
        elif audio_chunk_np is not None: # Subsequent audio chunks
            logger_nb.debug(f"Gradio Pipeline: TTS Stream - Yielding audio chunk of shape {audio_chunk_np.shape}")
            yield transcribed_text, llm_response_text, (None, audio_chunk_np)
        elif sr is None and audio_chunk_np is None and not first_chunk: # End of stream signal from generator
            logger_nb.info("Gradio Pipeline: TTS Stream - Signalling end of stream.")
            yield transcribed_text, llm_response_text, (None, None)
            break

    logger_nb.info("Gradio Pipeline: Finished.")


# Define Gradio Interface
# Ensure this cell is run after defining all functions and loading models.
with gr.Blocks(title="Notebook Conversational AI") as demo_notebook:
    gr.Markdown("# Conversational AI in Jupyter Notebook")
    gr.Markdown("Uses AI4Bharat IndicParler-TTS (Streaming), Gemini LLM, and Whisper STT.")

    with gr.Row():
        mic_input = gr.Audio(sources=["microphone"], type="numpy", label="Speak Here", streaming=False)
        # `streaming=True` for gr.Audio input is for continuous input,
        # not directly related to output streaming here. We process after recording stops.

    submit_button = gr.Button("Process Speech")

    with gr.Accordion("Conversation Log", open=True):
        stt_output = gr.Textbox(label="You Said (Transcription)", lines=2, interactive=False)
        llm_output = gr.Textbox(label="Assistant's Response (Text)", lines=4, interactive=False)
        # For streaming audio output in Gradio, the component itself handles accumulation
        tts_audio_output = gr.Audio(label="Assistant's Speech (Streaming)", streaming=True, autoplay=False)

    submit_button.click(
        fn=full_ai_pipeline_notebook,
        inputs=[mic_input],
        outputs=[stt_output, llm_output, tts_audio_output]
    )

INFO:notebook_ai_pipeline:Notebook: Loading models. Whisper on cuda with torch.float16, TTS on cuda with torch.bfloat16
INFO:notebook_ai_pipeline:Notebook: Loading Whisper model: tiny
100%|█████████████████████████████████████| 72.1M/72.1M [00:01<00:00, 50.2MiB/s]
INFO:notebook_ai_pipeline:Notebook: Whisper model loaded successfully.
INFO:notebook_ai_pipeline:Notebook: Loading IndicParler-TTS model: ai4bharat/indic-parler-tts
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.75G [00:00<?, ?B/s]

  "_name_or_path": "google/flan-t5-large",
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "classifier_dropout": 0.0,
  "d_ff": 2816,
  "d_kv": 64,
  "d_model": 1024,
  "decoder_start_token_id": 0,
  "dense_act_fn": "gelu_new",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "gated-gelu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": true,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 24,
  "num_heads": 16,
  "num_layers": 24,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "tie_word_embeddings": false,
  "transformers_version": "4.46.1",
  "use_cache": true,
  "vocab_size": 32128
}

  "_name_or_path": "ylacombe/dac_44khz",
  "architectures": [
    "DacModel"
  ],
  "codebook_dim": 8,
  "codebook_loss_weight": 1.0,
  "codebook_size": 1024,
  "commitment_loss_weight": 0.25,
  "decoder_hidden_si

generation_config.json:   0%|          | 0.00/223 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/990 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/10.3M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

INFO:notebook_ai_pipeline:Notebook: IndicParler-TTS model loaded. Streaming params: {'do_sample': True, 'temperature': 1.0, 'min_new_tokens': 5, 'pad_token_id': 0}
INFO:notebook_ai_pipeline:Notebook: Gemini API configured with model: gemini-1.5-flash-latest
INFO:notebook_ai_pipeline:Notebook: All resources loaded (or attempted).


In [6]:
# @title 5. Launch the Gradio App (Inline)
# Make sure models are loaded before launching
if whisper_model_nb is None or tts_model_nb is None or gemini_model_instance_nb is None:
    print("Models not loaded. Please run the 'Model Loading Functions' cell first.")
else:
    print("Launching Gradio interface inline...")
    # When running in a notebook, launch() will typically try to display inline.
    # `share=True` creates a public link (use with caution, especially with API keys).
    # `inline=True` is often default in notebooks but can be explicit.
    # `debug=True` can show more Gradio errors in output.
    demo_notebook.launch(inline=True, debug=False, share=True)

Launching Gradio interface inline...
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://cb1cebc1fc444a2178.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
