JaganathC commited on
Commit
b28774a
Β·
verified Β·
1 Parent(s): 6bf8290

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -7
app.py CHANGED
@@ -11,32 +11,45 @@ import time
11
  import langdetect
12
  import uuid
13
 
 
14
  HF_TOKEN = os.environ.get("HF_TOKEN")
15
  print("Starting the program...")
16
 
 
17
  model_path = "Qwen/Qwen2.5-7B-Instruct"
18
- print(f"Loading model {model_path}...")
 
19
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
20
- model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda()
 
 
 
 
 
 
21
  model = model.eval()
22
  print("Model successfully loaded.")
23
 
 
24
  def generate_unique_filename(extension):
25
  return f"{uuid.uuid4()}{extension}"
26
 
 
27
  def cleanup_files(*files):
28
  for file in files:
29
  if file and os.path.exists(file):
30
  os.remove(file)
31
  print(f"Removed file: {file}")
32
 
 
33
  def extract_audio_ffmpeg(video_path):
34
  print("Extracting audio using ffmpeg...")
35
  audio_path = generate_unique_filename(".wav")
36
  command = ["ffmpeg", "-i", video_path, "-q:a", "0", "-map", "a", audio_path, "-y"]
37
- subprocess.run(command, check=True)
38
  return audio_path
39
 
 
40
  def transcribe_audio(file_path):
41
  print(f"Starting transcription of file: {file_path}")
42
  temp_audio = None
@@ -48,11 +61,11 @@ def transcribe_audio(file_path):
48
  output_file = generate_unique_filename(".json")
49
  command = [
50
  "insanely-fast-whisper", "--file-name", file_path,
51
- "--device-id", "0", "--model-name", "openai/whisper-large-v3",
52
  "--task", "transcribe", "--timestamp", "chunk",
53
  "--transcript-path", output_file
54
  ]
55
- subprocess.run(command, check=True)
56
 
57
  with open(output_file, "r") as f:
58
  transcription = json.load(f)
@@ -64,15 +77,17 @@ def transcribe_audio(file_path):
64
 
65
  return result
66
 
 
67
  def generate_summary_stream(transcription):
68
  detected_language = langdetect.detect(transcription)
69
  prompt = f"""Summarize the following video transcription in 150-300 words.
70
  The summary should be in the same language as the transcription, which is detected as {detected_language}.
71
- {transcription[:300000]}..."""
72
 
73
  response, history = model.chat(tokenizer, prompt, history=[])
74
  return response
75
 
 
76
  def process_uploaded_video(video_path):
77
  try:
78
  transcription = transcribe_audio(video_path)
@@ -80,6 +95,7 @@ def process_uploaded_video(video_path):
80
  except Exception as e:
81
  return f"Processing error: {str(e)}", None
82
 
 
83
  demo = gr.Blocks()
84
  with demo:
85
  gr.Markdown("""
@@ -99,4 +115,4 @@ with demo:
99
  video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
100
  summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
101
 
102
- demo.launch()
 
11
  import langdetect
12
  import uuid
13
 
14
+ # Hugging Face Token
15
  HF_TOKEN = os.environ.get("HF_TOKEN")
16
  print("Starting the program...")
17
 
18
+ # Load Qwen Model on CPU
19
  model_path = "Qwen/Qwen2.5-7B-Instruct"
20
+ print(f"Loading model {model_path} on CPU...")
21
+
22
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
23
+ model = AutoModelForCausalLM.from_pretrained(
24
+ model_path,
25
+ torch_dtype=torch.bfloat16, # Uses less memory than float32
26
+ trust_remote_code=True,
27
+ low_cpu_mem_usage=True,
28
+ device_map="auto" # Automatically optimizes model parts for CPU
29
+ ).to("cpu")
30
  model = model.eval()
31
  print("Model successfully loaded.")
32
 
33
+ # Generate unique filenames
34
  def generate_unique_filename(extension):
35
  return f"{uuid.uuid4()}{extension}"
36
 
37
+ # Cleanup temporary files
38
  def cleanup_files(*files):
39
  for file in files:
40
  if file and os.path.exists(file):
41
  os.remove(file)
42
  print(f"Removed file: {file}")
43
 
44
+ # Extract audio using FFmpeg
45
  def extract_audio_ffmpeg(video_path):
46
  print("Extracting audio using ffmpeg...")
47
  audio_path = generate_unique_filename(".wav")
48
  command = ["ffmpeg", "-i", video_path, "-q:a", "0", "-map", "a", audio_path, "-y"]
49
+ subprocess.Popen(command).wait() # Use Popen to reduce memory usage
50
  return audio_path
51
 
52
+ # Transcribe audio
53
  def transcribe_audio(file_path):
54
  print(f"Starting transcription of file: {file_path}")
55
  temp_audio = None
 
61
  output_file = generate_unique_filename(".json")
62
  command = [
63
  "insanely-fast-whisper", "--file-name", file_path,
64
+ "--device-id", "cpu", "--model-name", "openai/whisper-large-v3",
65
  "--task", "transcribe", "--timestamp", "chunk",
66
  "--transcript-path", output_file
67
  ]
68
+ subprocess.Popen(command).wait()
69
 
70
  with open(output_file, "r") as f:
71
  transcription = json.load(f)
 
77
 
78
  return result
79
 
80
+ # Generate summary using Qwen Model
81
  def generate_summary_stream(transcription):
82
  detected_language = langdetect.detect(transcription)
83
  prompt = f"""Summarize the following video transcription in 150-300 words.
84
  The summary should be in the same language as the transcription, which is detected as {detected_language}.
85
+ {transcription[:100000]}...""" # Limiting input size to avoid memory overflow
86
 
87
  response, history = model.chat(tokenizer, prompt, history=[])
88
  return response
89
 
90
+ # Process video upload
91
  def process_uploaded_video(video_path):
92
  try:
93
  transcription = transcribe_audio(video_path)
 
95
  except Exception as e:
96
  return f"Processing error: {str(e)}", None
97
 
98
+ # Gradio UI
99
  demo = gr.Blocks()
100
  with demo:
101
  gr.Markdown("""
 
115
  video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
116
  summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
117
 
118
+ demo.launch()