Yannael_LB
Update
c335989
raw
history blame
6.98 kB
import gradio as gr
import os
from youtube_transcript_api import YouTubeTranscriptApi
import utils
from openai import OpenAI
from groq import Groq
from dotenv import load_dotenv
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
#import importlib
#importlib.reload(utils)
def get_llm_client_and_model(llm_model):
if llm_model == "llama3-8b":
llm_client = Groq(api_key=GROQ_API_KEY)
llm_model = 'llama3-8b-8192'
elif llm_model == "gpt-4o-mini":
llm_client = OpenAI(api_key=OPENAI_API_KEY)
llm_model = 'gpt-4o-mini-2024-07-18'
return llm_client, llm_model
def gradio_process_video(video_id,
model_format_transcript, model_toc,
chunk_size_format_transcript, chunk_size_toc,
progress=gr.Progress()):
if video_id in ["ErnWZxJovaM"]:
chapters = utils.load_json_chapters(video_id)
else:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])
chunk_size_format_transcript = int(chunk_size_format_transcript)
llm_client_format_transcript, llm_model_format_transcript = \
get_llm_client_and_model(model_format_transcript)
paragraphs, nb_input_tokens, nb_output_tokens, price = \
utils.transcript_to_paragraphs(transcript, \
llm_client_format_transcript, llm_model_format_transcript, \
chunk_size=chunk_size_format_transcript, progress=progress)
paragraphs = utils.add_timestamps_to_paragraphs(transcript, paragraphs, num_words=50)
chunk_size_toc = int(chunk_size_toc)
llm_client_get_toc, llm_model_get_toc = \
get_llm_client_and_model(model_toc)
json_toc, nb_input_tokens, nb_output_tokens, price = \
utils.paragraphs_to_toc(paragraphs, \
llm_client_get_toc, llm_model_get_toc, \
chunk_size=chunk_size_toc)
chapters = utils.get_chapters(paragraphs, json_toc)
output_html = utils.get_result_as_html(chapters, video_id)
return {output_processing: str(output_html),
gv_output: output_html}
def gradio_process_video(video_id,
model_format_transcript, model_toc,
chunk_size_format_transcript, chunk_size_toc,
progress=gr.Progress()):
if video_id in ["ErnWZxJovaM"]:
chapters = utils.load_json_chapters(video_id)
else:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])
chunk_size_format_transcript = int(chunk_size_format_transcript)
llm_client_format_transcript, llm_model_format_transcript = \
get_llm_client_and_model(model_format_transcript)
paragraphs, nb_input_tokens, nb_output_tokens, price = \
utils.transcript_to_paragraphs(transcript, \
llm_client_format_transcript, llm_model_format_transcript, \
chunk_size=chunk_size_format_transcript, progress=progress)
paragraphs = utils.add_timestamps_to_paragraphs(transcript, paragraphs, num_words=50)
chunk_size_toc = int(chunk_size_toc)
llm_client_get_toc, llm_model_get_toc = \
get_llm_client_and_model(model_toc)
json_toc, nb_input_tokens, nb_output_tokens, price = \
utils.paragraphs_to_toc(paragraphs, \
llm_client_get_toc, llm_model_get_toc, \
chunk_size=chunk_size_toc)
chapters = utils.get_chapters(paragraphs, json_toc)
output_html = utils.get_result_as_html(chapters, video_id)
return {output_processing: str(output_html),
gv_output: output_html}
# %%
css = """
.content {
padding: 20px;
max-width: 800px;
margin: 0 auto;
background-color: #ffffff;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
border-radius: 8px;
}
"""
example_video_id = "ErnWZxJovaM"
example_chapters = utils.load_json_chapters(example_video_id)
example_output_html = utils.get_result_as_html(example_chapters, example_video_id)
with (gr.Blocks(css=css) as app):
gr.HTML("<div align='center'><h1 class='header'>Demo: Automatic video chaptering with LLMs and TF-IDF</h1></div>")
gr.HTML("<div align='center'><h3 class='header'>From raw transcript to structured document</h3></div>")
gr.HTML("<hr>")
gr.Markdown("""This demo relies on
- Groq's Llama 3 8B for transcript preprocessing
- OpenAI's GPT-4o-mini for chaptering. Note: Using GPT-4o-mini for transcript preprocessing will improve results, but takes longer (around 2/3 minutes for a one-hour video)
The following YouTube video ID are already preprocessed (copy and paste ID in box below):
- `ErnWZxJovaM`: [MIT course](https://www.youtube.com/watch?v=ErnWZxJovaM)
- `EuC1GWhQdKE`: [Anthropic](https://www.youtube.com/watch?v=EuC1GWhQdKE)
Check the [Medium article]() for more details"""
)
gv_transcript = gr.State()
video_id_input = gr.Textbox(label="Enter YouTube Video ID", value="EuC1GWhQdKE")
with gr.Accordion("Set parameters", open=False):
with gr.Row():
with gr.Column(scale=1):
model_format_transcript = gr.Dropdown(
[("LLama 3 8B (Groq)", "llama3-8b"), ("GPT-4o-mini (OpenAI)", "gpt-4o-mini")],
label="Transcript preprocessing", value="llama3-8b", interactive=True)
chunk_size_format_transcript = gr.Textbox(label="Preprocessing chunk size", value=2000)
with gr.Column(scale=1):
model_toc = gr.Dropdown([("LLama 3 8B (Groq)", "llama3-8b"), ("GPT-4o-mini (OpenAI)", "gpt-4o-mini")],
label="Chaptering", value="gpt-4o-mini", interactive=True)
chunk_size_toc = gr.Textbox(label="Chaptering chunk size", value=30)
with gr.Column(scale=1):
api_key_openai = gr.Textbox(label="OpenAI API Key", value="xxx")
api_key_groq = gr.Textbox(label="Groq API Key", value="xxx")
processing_button = gr.Button("Process transcript")
gv_output = gr.State()
gr.HTML("<hr>")
output_processing = gr.HTML(label="Output processing", value=example_output_html)
processing_button.click(gradio_process_video,
inputs=[video_id_input,
model_format_transcript, model_toc,
chunk_size_format_transcript, chunk_size_toc],
outputs=[output_processing, gv_output])
# gr.HTML(result_as_html)
app.launch(debug=True, width="100%")