Spaces:
Running
Running
import gradio as gr | |
import os | |
from youtube_transcript_api import YouTubeTranscriptApi | |
import utils | |
from openai import OpenAI | |
from groq import Groq | |
from dotenv import load_dotenv | |
load_dotenv() | |
GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
#import importlib | |
#importlib.reload(utils) | |
def get_llm_client_and_model(llm_model): | |
if llm_model == "llama3-8b": | |
llm_client = Groq(api_key=GROQ_API_KEY) | |
llm_model = 'llama3-8b-8192' | |
elif llm_model == "gpt-4o-mini": | |
llm_client = OpenAI(api_key=OPENAI_API_KEY) | |
llm_model = 'gpt-4o-mini-2024-07-18' | |
return llm_client, llm_model | |
def gradio_process_video(video_id, | |
model_format_transcript, model_toc, | |
chunk_size_format_transcript, chunk_size_toc, | |
progress=gr.Progress()): | |
if video_id in ["ErnWZxJovaM"]: | |
chapters = utils.load_json_chapters(video_id) | |
else: | |
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"]) | |
chunk_size_format_transcript = int(chunk_size_format_transcript) | |
llm_client_format_transcript, llm_model_format_transcript = \ | |
get_llm_client_and_model(model_format_transcript) | |
paragraphs, nb_input_tokens, nb_output_tokens, price = \ | |
utils.transcript_to_paragraphs(transcript, \ | |
llm_client_format_transcript, llm_model_format_transcript, \ | |
chunk_size=chunk_size_format_transcript, progress=progress) | |
paragraphs = utils.add_timestamps_to_paragraphs(transcript, paragraphs, num_words=50) | |
chunk_size_toc = int(chunk_size_toc) | |
llm_client_get_toc, llm_model_get_toc = \ | |
get_llm_client_and_model(model_toc) | |
json_toc, nb_input_tokens, nb_output_tokens, price = \ | |
utils.paragraphs_to_toc(paragraphs, \ | |
llm_client_get_toc, llm_model_get_toc, \ | |
chunk_size=chunk_size_toc) | |
chapters = utils.get_chapters(paragraphs, json_toc) | |
output_html = utils.get_result_as_html(chapters, video_id) | |
return {output_processing: str(output_html), | |
gv_output: output_html} | |
def gradio_process_video(video_id, | |
model_format_transcript, model_toc, | |
chunk_size_format_transcript, chunk_size_toc, | |
progress=gr.Progress()): | |
if video_id in ["ErnWZxJovaM"]: | |
chapters = utils.load_json_chapters(video_id) | |
else: | |
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"]) | |
chunk_size_format_transcript = int(chunk_size_format_transcript) | |
llm_client_format_transcript, llm_model_format_transcript = \ | |
get_llm_client_and_model(model_format_transcript) | |
paragraphs, nb_input_tokens, nb_output_tokens, price = \ | |
utils.transcript_to_paragraphs(transcript, \ | |
llm_client_format_transcript, llm_model_format_transcript, \ | |
chunk_size=chunk_size_format_transcript, progress=progress) | |
paragraphs = utils.add_timestamps_to_paragraphs(transcript, paragraphs, num_words=50) | |
chunk_size_toc = int(chunk_size_toc) | |
llm_client_get_toc, llm_model_get_toc = \ | |
get_llm_client_and_model(model_toc) | |
json_toc, nb_input_tokens, nb_output_tokens, price = \ | |
utils.paragraphs_to_toc(paragraphs, \ | |
llm_client_get_toc, llm_model_get_toc, \ | |
chunk_size=chunk_size_toc) | |
chapters = utils.get_chapters(paragraphs, json_toc) | |
output_html = utils.get_result_as_html(chapters, video_id) | |
return {output_processing: str(output_html), | |
gv_output: output_html} | |
# %% | |
css = """ | |
.content { | |
padding: 20px; | |
max-width: 800px; | |
margin: 0 auto; | |
background-color: #ffffff; | |
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); | |
border-radius: 8px; | |
} | |
""" | |
example_video_id = "ErnWZxJovaM" | |
example_chapters = utils.load_json_chapters(example_video_id) | |
example_output_html = utils.get_result_as_html(example_chapters, example_video_id) | |
with (gr.Blocks(css=css) as app): | |
gr.HTML("<div align='center'><h1 class='header'>Demo: Automatic video chaptering with LLMs and TF-IDF</h1></div>") | |
gr.HTML("<div align='center'><h3 class='header'>From raw transcript to structured document</h3></div>") | |
gr.HTML("<hr>") | |
gr.Markdown("""This demo relies on | |
- Groq's Llama 3 8B for transcript preprocessing | |
- OpenAI's GPT-4o-mini for chaptering. Note: Using GPT-4o-mini for transcript preprocessing will improve results, but takes longer (around 2/3 minutes for a one-hour video) | |
The following YouTube video ID are already preprocessed (copy and paste ID in box below): | |
- `ErnWZxJovaM`: [MIT course](https://www.youtube.com/watch?v=ErnWZxJovaM) | |
- `EuC1GWhQdKE`: [Anthropic](https://www.youtube.com/watch?v=EuC1GWhQdKE) | |
Check the [Medium article]() for more details""" | |
) | |
gv_transcript = gr.State() | |
video_id_input = gr.Textbox(label="Enter YouTube Video ID", value="EuC1GWhQdKE") | |
with gr.Accordion("Set parameters", open=False): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
model_format_transcript = gr.Dropdown( | |
[("LLama 3 8B (Groq)", "llama3-8b"), ("GPT-4o-mini (OpenAI)", "gpt-4o-mini")], | |
label="Transcript preprocessing", value="llama3-8b", interactive=True) | |
chunk_size_format_transcript = gr.Textbox(label="Preprocessing chunk size", value=2000) | |
with gr.Column(scale=1): | |
model_toc = gr.Dropdown([("LLama 3 8B (Groq)", "llama3-8b"), ("GPT-4o-mini (OpenAI)", "gpt-4o-mini")], | |
label="Chaptering", value="gpt-4o-mini", interactive=True) | |
chunk_size_toc = gr.Textbox(label="Chaptering chunk size", value=30) | |
with gr.Column(scale=1): | |
api_key_openai = gr.Textbox(label="OpenAI API Key", value="xxx") | |
api_key_groq = gr.Textbox(label="Groq API Key", value="xxx") | |
processing_button = gr.Button("Process transcript") | |
gv_output = gr.State() | |
gr.HTML("<hr>") | |
output_processing = gr.HTML(label="Output processing", value=example_output_html) | |
processing_button.click(gradio_process_video, | |
inputs=[video_id_input, | |
model_format_transcript, model_toc, | |
chunk_size_format_transcript, chunk_size_toc], | |
outputs=[output_processing, gv_output]) | |
# gr.HTML(result_as_html) | |
app.launch(debug=True, width="100%") |