88hours commited on
Commit
913d475
Β·
1 Parent(s): 5939cd9

improved limited UI

Browse files
Files changed (1) hide show
  1. app.py +31 -70
app.py CHANGED
@@ -128,7 +128,7 @@ def get_metadata_of_yt_video_with_captions(vid_url, from_gen=False):
128
 
129
  def return_top_k_most_similar_docs(vid_table_name, query, use_llm=False):
130
  if not video_processed:
131
- gr.Error("Please process the video first in Step 1")
132
  # Initialize results variable outside the if condition
133
  max_docs = 2
134
  print("Querying ", vid_table_name)
@@ -178,11 +178,12 @@ def return_top_k_most_similar_docs(vid_table_name, query, use_llm=False):
178
 
179
 
180
  def process_url_and_init(youtube_url, from_gen=False):
 
181
  video_processed = True
182
  url_input = gr.update(visible=False)
183
  submit_btn = gr.update(visible=True)
184
- chatbox = gr.update(visible=True)
185
- submit_btn2 = gr.update(visible=True)
186
  frame1 = gr.update(visible=True)
187
  frame2 = gr.update(visible=False)
188
  chatbox_llm, submit_btn_chat = gr.update(
@@ -190,7 +191,7 @@ def process_url_and_init(youtube_url, from_gen=False):
190
  vid_filepath, vid_table_name = get_metadata_of_yt_video_with_captions(
191
  youtube_url, from_gen)
192
  video = gr.Video(vid_filepath, render=True)
193
- return url_input, submit_btn, video, vid_table_name, chatbox, submit_btn2, frame1, frame2, chatbox_llm, submit_btn_chat
194
 
195
 
196
  def test_btn():
@@ -200,65 +201,8 @@ def test_btn():
200
  return response
201
 
202
 
203
- def init_ui():
204
- with gr.Blocks() as demo:
205
-
206
- gr.Markdown("Welcome to video chat demo - Initial processing can take up to 2 minutes, and responses may be slow. Please be patient and avoid clicking repeatedly.")
207
- url_input = gr.Textbox(label="Enter YouTube URL", visible=False, elem_id='url-inp',
208
- value="https://www.youtube.com/watch?v=kOEDG3j1bjs", interactive=True)
209
- vid_table_name = gr.Textbox(
210
- label="Enter Table Name", visible=False, interactive=False)
211
- video = gr.Video()
212
- with gr.Row():
213
- submit_btn = gr.Button("Process Video By Download Subtitles")
214
- submit_btn_gen = gr.Button("Process Video By Generating Subtitles")
215
-
216
- with gr.Row():
217
- chatbox = gr.Textbox(label="Enter the keyword/s and AI will get related captions and images",
218
- visible=False, value="event horizan", scale=4)
219
- submit_btn_whisper = gr.Button(
220
- "Submit", elem_id='chat-submit', visible=False, scale=1)
221
- with gr.Row():
222
- chatbox_llm = gr.Textbox(
223
- label="Ask a Question", visible=False, value="what this video is about?", scale=4)
224
- submit_btn_chat = gr.Button("Ask", visible=False, scale=1)
225
-
226
- response = gr.Textbox(
227
- label="Response", elem_id='chat-response', visible=False, interactive=False)
228
-
229
- with gr.Row():
230
- frame1 = gr.Image(visible=False, interactive=False, scale=2)
231
- frame2 = gr.Image(visible=False, interactive=False, scale=2)
232
- submit_btn.click(fn=process_url_and_init, inputs=[url_input], outputs=[
233
- url_input, submit_btn, video, vid_table_name, chatbox, submit_btn_whisper, frame1, frame2, chatbox_llm, submit_btn_chat])
234
- submit_btn_gen.click(fn=lambda x: process_url_and_init(x, from_gen=True), inputs=[url_input], outputs=[
235
- url_input, submit_btn, video, vid_table_name, chatbox, submit_btn_whisper, frame1, frame2, chatbox_llm, submit_btn_chat])
236
- submit_btn_whisper.click(fn=return_top_k_most_similar_docs, inputs=[
237
- vid_table_name, chatbox], outputs=[response, frame1, frame2])
238
-
239
- submit_btn_chat.click(
240
- fn=lambda table_name, query: return_top_k_most_similar_docs(
241
- vid_table_name=table_name,
242
- query=query,
243
- use_llm=True
244
- ),
245
- inputs=[vid_table_name, chatbox_llm],
246
- outputs=[response, frame1, frame2]
247
- )
248
- reset_btn = gr.Button("Reload Page")
249
- reset_btn.click(None, js="() => { location.reload(); }")
250
-
251
- test_llama = gr.Button("Test Llama")
252
- test_llama.click(test_btn, None, outputs=[response])
253
- return demo
254
-
255
-
256
  def init_improved_ui():
257
-
258
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
259
- # Header Section with Introduction
260
- with gr.Accordion(label=" # 🎬 Video Analysis Assistant", open=True):
261
- gr.Markdown("""
262
  ## How it Works:
263
  1. πŸ“₯ Provide a YouTube URL.
264
  2. πŸ”„ Choose a processing method:
@@ -273,7 +217,25 @@ def init_improved_ui():
273
  4. πŸ“Š Results will be displayed in the response section with related images.
274
 
275
  > **Note**: Initial processing takes several minutes. Please be patient and monitor the logs for progress updates.
276
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
  # Video Input Section
279
  with gr.Group():
@@ -288,13 +250,12 @@ def init_improved_ui():
288
 
289
  with gr.Row():
290
  submit_btn = gr.Button(
291
- "πŸ“₯ Step 1: Process with Existing Subtitles", variant="primary", size='md')
292
  submit_btn_gen = gr.Button(
293
  "🎯 Generate New Subtitles", variant="secondary", visible=False)
294
 
295
  # Analysis Tools Section
296
  with gr.Group():
297
- gr.Markdown("### πŸ” Step 2: Chat AI about the video")
298
 
299
  with gr.Row():
300
  chatbox = gr.Textbox(
@@ -310,19 +271,19 @@ def init_improved_ui():
310
 
311
  with gr.Row():
312
  chatbox_llm = gr.Textbox(
313
- label="",
314
  value="What is this video about?",
315
  visible=True
316
  )
 
317
  submit_btn_chat = gr.Button(
318
- "πŸ€– Ask",
319
  visible=True,
320
- scale=1
321
  )
322
 
323
  # Results Display Section
324
  with gr.Group():
325
- gr.Markdown("### πŸ“Š AI Response")
326
  response = gr.Textbox(
327
  label="AI Response",
328
  visible=True,
@@ -337,7 +298,7 @@ def init_improved_ui():
337
 
338
  # Control Buttons
339
  with gr.Row():
340
- reset_btn = gr.Button("πŸ”„ Start Over", variant="secondary")
341
  test_llama = gr.Button("πŸ§ͺ Say Hi to Llama",
342
  visible=False, variant="secondary")
343
 
 
128
 
129
  def return_top_k_most_similar_docs(vid_table_name, query, use_llm=False):
130
  if not video_processed:
131
+ raise gr.Error("Please process the video first in Step 1")
132
  # Initialize results variable outside the if condition
133
  max_docs = 2
134
  print("Querying ", vid_table_name)
 
178
 
179
 
180
  def process_url_and_init(youtube_url, from_gen=False):
181
+ global video_processed
182
  video_processed = True
183
  url_input = gr.update(visible=False)
184
  submit_btn = gr.update(visible=True)
185
+ chatbox = gr.update(visible=False)
186
+ submit_btn_whisper = gr.update(visible=False)
187
  frame1 = gr.update(visible=True)
188
  frame2 = gr.update(visible=False)
189
  chatbox_llm, submit_btn_chat = gr.update(
 
191
  vid_filepath, vid_table_name = get_metadata_of_yt_video_with_captions(
192
  youtube_url, from_gen)
193
  video = gr.Video(vid_filepath, render=True)
194
+ return url_input, submit_btn, video, vid_table_name, chatbox, submit_btn_whisper, frame1, frame2, chatbox_llm, submit_btn_chat
195
 
196
 
197
  def test_btn():
 
201
  return response
202
 
203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  def init_improved_ui():
205
+ full_intro = """
 
 
 
 
206
  ## How it Works:
207
  1. πŸ“₯ Provide a YouTube URL.
208
  2. πŸ”„ Choose a processing method:
 
217
  4. πŸ“Š Results will be displayed in the response section with related images.
218
 
219
  > **Note**: Initial processing takes several minutes. Please be patient and monitor the logs for progress updates.
220
+ """
221
+ intro = """
222
+ ## How it Works:
223
+ Step 1. πŸ“₯ A video URL.
224
+ Step 2. πŸ”„ Process Video:
225
+ Download the video and its captions/subtitles from YouTube OR generate captions using Whisper AI.
226
+ The system will load the video in video player for preview and process the video and extract frames from it.
227
+ It will then pass the captions and images to the RAG model to store them in the database.
228
+ The RAG (Lance DB) uses a pre-trained BridgeTower model to generate embeddings that provide pairs of captions and related images.
229
+ Step 3. πŸ€– Analyze video content through:
230
+ - AI-powered Q&A - Use this functionality to ask questions about the video content. Our system will use the Meta/LLaMA model to analyze the captions and images and provide detailed answers.
231
+ Step 4. πŸ“Š Results will be displayed in the response section with related images.
232
+
233
+ > **Note**: Initial processing takes several minutes. Please be patient and monitor the logs for progress updates.
234
+ """
235
+ with gr.Blocks(theme=gr.themes.Ocean()) as demo:
236
+ # Header Section with Introduction
237
+ with gr.Accordion(label=" # 🎬 Video Analysis Assistant ", open=False):
238
+ gr.Markdown(intro)
239
 
240
  # Video Input Section
241
  with gr.Group():
 
250
 
251
  with gr.Row():
252
  submit_btn = gr.Button(
253
+ "πŸ“₯ Step 1: Process with Existing Subtitles", variant="primary")
254
  submit_btn_gen = gr.Button(
255
  "🎯 Generate New Subtitles", variant="secondary", visible=False)
256
 
257
  # Analysis Tools Section
258
  with gr.Group():
 
259
 
260
  with gr.Row():
261
  chatbox = gr.Textbox(
 
271
 
272
  with gr.Row():
273
  chatbox_llm = gr.Textbox(
274
+ label="πŸ” Chat AI about the video",
275
  value="What is this video about?",
276
  visible=True
277
  )
278
+ with gr.Row():
279
  submit_btn_chat = gr.Button(
280
+ "πŸ€– Step 2: Ask",
281
  visible=True,
282
+ scale=1, variant="primary"
283
  )
284
 
285
  # Results Display Section
286
  with gr.Group():
 
287
  response = gr.Textbox(
288
  label="AI Response",
289
  visible=True,
 
298
 
299
  # Control Buttons
300
  with gr.Row():
301
+ reset_btn = gr.Button("πŸ”„ Step 3: Start Over", variant="primary")
302
  test_llama = gr.Button("πŸ§ͺ Say Hi to Llama",
303
  visible=False, variant="secondary")
304