import gradio as gr import os from youtube_transcript_api import YouTubeTranscriptApi import utils from openai import OpenAI from groq import Groq from dotenv import load_dotenv load_dotenv() GROQ_API_KEY = os.getenv("GROQ_API_KEY") OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") #import importlib #importlib.reload(utils) def get_llm_client_and_model(llm_model): if llm_model == "llama3-8b": llm_client = Groq(api_key=GROQ_API_KEY) llm_model = 'llama3-8b-8192' elif llm_model == "gpt-4o-mini": llm_client = OpenAI(api_key=OPENAI_API_KEY) llm_model = 'gpt-4o-mini-2024-07-18' return llm_client, llm_model def gradio_process_video(video_id, model_format_transcript, model_toc, chunk_size_format_transcript, chunk_size_toc, progress=gr.Progress()): if video_id in ["ErnWZxJovaM"]: chapters = utils.load_json_chapters(video_id) else: transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"]) chunk_size_format_transcript = int(chunk_size_format_transcript) llm_client_format_transcript, llm_model_format_transcript = \ get_llm_client_and_model(model_format_transcript) paragraphs, nb_input_tokens, nb_output_tokens, price = \ utils.transcript_to_paragraphs(transcript, \ llm_client_format_transcript, llm_model_format_transcript, \ chunk_size=chunk_size_format_transcript, progress=progress) paragraphs = utils.add_timestamps_to_paragraphs(transcript, paragraphs, num_words=50) chunk_size_toc = int(chunk_size_toc) llm_client_get_toc, llm_model_get_toc = \ get_llm_client_and_model(model_toc) json_toc, nb_input_tokens, nb_output_tokens, price = \ utils.paragraphs_to_toc(paragraphs, \ llm_client_get_toc, llm_model_get_toc, \ chunk_size=chunk_size_toc) chapters = utils.get_chapters(paragraphs, json_toc) output_html = utils.get_result_as_html(chapters, video_id) return {output_processing: str(output_html), gv_output: output_html} def gradio_process_video(video_id, model_format_transcript, model_toc, chunk_size_format_transcript, chunk_size_toc, progress=gr.Progress()): if video_id in ["ErnWZxJovaM"]: chapters = utils.load_json_chapters(video_id) else: transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"]) chunk_size_format_transcript = int(chunk_size_format_transcript) llm_client_format_transcript, llm_model_format_transcript = \ get_llm_client_and_model(model_format_transcript) paragraphs, nb_input_tokens, nb_output_tokens, price = \ utils.transcript_to_paragraphs(transcript, \ llm_client_format_transcript, llm_model_format_transcript, \ chunk_size=chunk_size_format_transcript, progress=progress) paragraphs = utils.add_timestamps_to_paragraphs(transcript, paragraphs, num_words=50) chunk_size_toc = int(chunk_size_toc) llm_client_get_toc, llm_model_get_toc = \ get_llm_client_and_model(model_toc) json_toc, nb_input_tokens, nb_output_tokens, price = \ utils.paragraphs_to_toc(paragraphs, \ llm_client_get_toc, llm_model_get_toc, \ chunk_size=chunk_size_toc) chapters = utils.get_chapters(paragraphs, json_toc) output_html = utils.get_result_as_html(chapters, video_id) return {output_processing: str(output_html), gv_output: output_html} # %% css = """ .content { padding: 20px; max-width: 800px; margin: 0 auto; background-color: #ffffff; box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); border-radius: 8px; } """ example_video_id = "ErnWZxJovaM" example_chapters = utils.load_json_chapters(example_video_id) example_output_html = utils.get_result_as_html(example_chapters, example_video_id) with (gr.Blocks(css=css) as app): gr.HTML("