import gradio as gr import os from youtube_transcript_api import YouTubeTranscriptApi import utils from openai import OpenAI from groq import Groq from dotenv import load_dotenv load_dotenv() GROQ_API_KEY = os.getenv("GROQ_API_KEY") OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") #import importlib #importlib.reload(utils) def get_llm_client_and_model(llm_model): if llm_model == "llama3-8b": llm_client = Groq(api_key=GROQ_API_KEY) llm_model = 'llama3-8b-8192' elif llm_model == "gpt-4o-mini": llm_client = OpenAI(api_key=OPENAI_API_KEY) llm_model = 'gpt-4o-mini-2024-07-18' return llm_client, llm_model def gradio_process_video(video_id, model_format_transcript, model_toc, chunk_size_format_transcript, chunk_size_toc, progress=gr.Progress()): if video_id in ["ErnWZxJovaM"]: chapters = utils.load_json_chapters(video_id) else: transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"]) chunk_size_format_transcript = int(chunk_size_format_transcript) llm_client_format_transcript, llm_model_format_transcript = \ get_llm_client_and_model(model_format_transcript) paragraphs, nb_input_tokens, nb_output_tokens, price = \ utils.transcript_to_paragraphs(transcript, \ llm_client_format_transcript, llm_model_format_transcript, \ chunk_size=chunk_size_format_transcript, progress=progress) paragraphs = utils.add_timestamps_to_paragraphs(transcript, paragraphs, num_words=50) chunk_size_toc = int(chunk_size_toc) llm_client_get_toc, llm_model_get_toc = \ get_llm_client_and_model(model_toc) json_toc, nb_input_tokens, nb_output_tokens, price = \ utils.paragraphs_to_toc(paragraphs, \ llm_client_get_toc, llm_model_get_toc, \ chunk_size=chunk_size_toc) chapters = utils.get_chapters(paragraphs, json_toc) output_html = utils.get_result_as_html(chapters, video_id) return {output_processing: str(output_html), gv_output: output_html} def gradio_process_video(video_id, model_format_transcript, model_toc, chunk_size_format_transcript, chunk_size_toc, progress=gr.Progress()): if video_id in ["ErnWZxJovaM"]: chapters = utils.load_json_chapters(video_id) else: transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"]) chunk_size_format_transcript = int(chunk_size_format_transcript) llm_client_format_transcript, llm_model_format_transcript = \ get_llm_client_and_model(model_format_transcript) paragraphs, nb_input_tokens, nb_output_tokens, price = \ utils.transcript_to_paragraphs(transcript, \ llm_client_format_transcript, llm_model_format_transcript, \ chunk_size=chunk_size_format_transcript, progress=progress) paragraphs = utils.add_timestamps_to_paragraphs(transcript, paragraphs, num_words=50) chunk_size_toc = int(chunk_size_toc) llm_client_get_toc, llm_model_get_toc = \ get_llm_client_and_model(model_toc) json_toc, nb_input_tokens, nb_output_tokens, price = \ utils.paragraphs_to_toc(paragraphs, \ llm_client_get_toc, llm_model_get_toc, \ chunk_size=chunk_size_toc) chapters = utils.get_chapters(paragraphs, json_toc) output_html = utils.get_result_as_html(chapters, video_id) return {output_processing: str(output_html), gv_output: output_html} # %% css = """ .content { padding: 20px; max-width: 800px; margin: 0 auto; background-color: #ffffff; box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); border-radius: 8px; } """ example_video_id = "ErnWZxJovaM" example_chapters = utils.load_json_chapters(example_video_id) example_output_html = utils.get_result_as_html(example_chapters, example_video_id) with (gr.Blocks(css=css) as app): gr.HTML("

Demo: Automatic video chaptering with LLMs and TF-IDF

") gr.HTML("

From raw transcript to structured document

") gr.HTML("
") gr.Markdown("""This demo relies on - Groq's Llama 3 8B for transcript preprocessing - OpenAI's GPT-4o-mini for chaptering. Note: Using GPT-4o-mini for transcript preprocessing will improve results, but takes longer (around 2/3 minutes for a one-hour video) The following YouTube video ID are already preprocessed (copy and paste ID in box below): - `ErnWZxJovaM`: [MIT course](https://www.youtube.com/watch?v=ErnWZxJovaM) - `EuC1GWhQdKE`: [Anthropic](https://www.youtube.com/watch?v=EuC1GWhQdKE) Check the [Medium article]() for more details""" ) gv_transcript = gr.State() video_id_input = gr.Textbox(label="Enter YouTube Video ID", value="EuC1GWhQdKE") with gr.Accordion("Set parameters", open=False): with gr.Row(): with gr.Column(scale=1): model_format_transcript = gr.Dropdown( [("LLama 3 8B (Groq)", "llama3-8b"), ("GPT-4o-mini (OpenAI)", "gpt-4o-mini")], label="Transcript preprocessing", value="llama3-8b", interactive=True) chunk_size_format_transcript = gr.Textbox(label="Preprocessing chunk size", value=2000) with gr.Column(scale=1): model_toc = gr.Dropdown([("LLama 3 8B (Groq)", "llama3-8b"), ("GPT-4o-mini (OpenAI)", "gpt-4o-mini")], label="Chaptering", value="gpt-4o-mini", interactive=True) chunk_size_toc = gr.Textbox(label="Chaptering chunk size", value=30) with gr.Column(scale=1): api_key_openai = gr.Textbox(label="OpenAI API Key", value="xxx") api_key_groq = gr.Textbox(label="Groq API Key", value="xxx") processing_button = gr.Button("Process transcript") gv_output = gr.State() gr.HTML("
") output_processing = gr.HTML(label="Output processing", value=example_output_html) processing_button.click(gradio_process_video, inputs=[video_id_input, model_format_transcript, model_toc, chunk_size_format_transcript, chunk_size_toc], outputs=[output_processing, gv_output]) # gr.HTML(result_as_html) app.launch(debug=True, width="100%")