chinmayc3 commited on
Commit
48732e0
·
1 Parent(s): acc5e81

changed task creation to server

Browse files
Files changed (4) hide show
  1. app.py +26 -17
  2. enums.py +2 -2
  3. random_audios.csv +0 -0
  4. requirements.txt +2 -1
app.py CHANGED
@@ -14,12 +14,15 @@ import pandas as pd
14
  import requests
15
  import streamlit as st
16
  from audio_recorder_streamlit import audio_recorder
 
17
 
18
  from logger import logger
19
  from utils import fs
20
- from enums import SAVE_PATH, ELO_JSON_PATH, ELO_CSV_PATH, EMAIL_PATH, TEMP_DIR, CREATE_TASK_URL,TRANSCRIBE_URL
21
 
22
  result_queue = Queue()
 
 
23
 
24
  def result_writer_thread():
25
  result_writer = ResultWriter(SAVE_PATH)
@@ -137,9 +140,9 @@ def send_task(payload):
137
  "Authorization": f"Bearer {os.getenv('CREATE_TASK_API_KEY')}"
138
  }
139
  if payload["task"] in ["fetch_audio","write_result"]:
140
- response = requests.post(CREATE_TASK_URL,json=payload,headers=header,timeout=300)
141
  else:
142
- response = requests.post(TRANSCRIBE_URL,json=payload,headers=header,timeout=300,stream=True)
143
  try:
144
  response = response.json()
145
  except Exception as e:
@@ -152,12 +155,12 @@ def send_task(payload):
152
  if payload["task"] == "transcribe_with_fastapi":
153
  return response["text"]
154
 
155
- elif payload["task"] == "fetch_audio":
156
- array = response["array"]
157
- array = decode_audio_array(array)
158
- sampling_rate = response["sample_rate"]
159
- filepath = response["filepath"]
160
- return array,sampling_rate,filepath
161
 
162
  def encode_audio_array(audio_array):
163
  buffer = io.BytesIO()
@@ -184,7 +187,7 @@ def call_function(model_name):
184
  sr = st.session_state.audio['sample_rate']
185
  array = st.session_state.audio['data']
186
  if sr != 22050:
187
- array = librosa.resample(array,sr,22050)
188
  encoded_array = encode_audio_array(array)
189
  payload = {
190
  "task":"transcribe_with_fastapi",
@@ -251,6 +254,7 @@ def on_option_1_click():
251
  }
252
  )
253
  st.session_state.option_selected = True
 
254
 
255
  def on_option_2_click():
256
  if st.session_state.transcribed and not st.session_state.option_selected:
@@ -269,6 +273,7 @@ def on_option_2_click():
269
  }
270
  )
271
  st.session_state.option_selected = True
 
272
 
273
  def on_option_both_click():
274
  if st.session_state.transcribed and not st.session_state.option_selected:
@@ -288,6 +293,7 @@ def on_option_both_click():
288
  }
289
  )
290
  st.session_state.option_selected = True
 
291
 
292
  def on_option_none_click():
293
  if st.session_state.transcribed and not st.session_state.option_selected:
@@ -306,6 +312,7 @@ def on_option_none_click():
306
  }
307
  )
308
  st.session_state.option_selected = True
 
309
 
310
  def on_click_transcribe():
311
  if st.session_state.has_audio:
@@ -318,12 +325,13 @@ def on_click_transcribe():
318
  st.session_state.option_1_model_name_state = ""
319
  st.session_state.option_2_model_name_state = ""
320
  st.session_state.option_selected = None
 
 
321
 
322
  def on_random_click():
323
  reset_state()
324
  with st.spinner("Fetching random audio... please wait"):
325
- fetch_audio_payload = {"task": "fetch_audio"}
326
- array, sampling_rate, filepath = send_task(fetch_audio_payload)
327
  st.session_state.audio = {"data":array,"sample_rate":sampling_rate,"format":"audio/wav"}
328
  st.session_state.has_audio = True
329
  st.session_state.current_audio_type = "random"
@@ -364,7 +372,8 @@ def main():
364
  st.session_state.user_email = ""
365
  if "recording" not in st.session_state:
366
  st.session_state.recording = True
367
-
 
368
  col1, col2 = st.columns([1, 1])
369
 
370
  with col1:
@@ -424,16 +433,16 @@ def main():
424
  c1, c2, c3, c4 = st.columns(4)
425
 
426
  with c1:
427
- st.button("Prefer Option 1",on_click=on_option_1_click,key="option1_btn")
428
 
429
  with c2:
430
- st.button("Prefer Option 2",on_click=on_option_2_click,key="option2_btn")
431
 
432
  with c3:
433
- st.button("Prefer Both",on_click=on_option_both_click,key="both_btn")
434
 
435
  with c4:
436
- st.button("Prefer None",on_click=on_option_none_click,key="none_btn")
437
 
438
  with st.container():
439
  st.button("New Match",on_click=on_reset_click,key="reset_btn",use_container_width=True)
 
14
  import requests
15
  import streamlit as st
16
  from audio_recorder_streamlit import audio_recorder
17
+ import torchaudio
18
 
19
  from logger import logger
20
  from utils import fs
21
+ from enums import SAVE_PATH, ELO_JSON_PATH, ELO_CSV_PATH, EMAIL_PATH, TEMP_DIR, NEW_TASK_URL,ARENA_PATH
22
 
23
  result_queue = Queue()
24
+ random_df = pd.read_csv("random_audios.csv")
25
+ random_paths = random_df["path"].tolist()
26
 
27
  def result_writer_thread():
28
  result_writer = ResultWriter(SAVE_PATH)
 
140
  "Authorization": f"Bearer {os.getenv('CREATE_TASK_API_KEY')}"
141
  }
142
  if payload["task"] in ["fetch_audio","write_result"]:
143
+ response = requests.post(NEW_TASK_URL,json=payload,headers=header,timeout=300)
144
  else:
145
+ response = requests.post(NEW_TASK_URL,json=payload,headers=header,timeout=300,stream=True)
146
  try:
147
  response = response.json()
148
  except Exception as e:
 
155
  if payload["task"] == "transcribe_with_fastapi":
156
  return response["text"]
157
 
158
+ def fetch_audio():
159
+ filepath = random.choice(random_paths)
160
+ with fs.open(f"{ARENA_PATH}/{filepath}", 'rb') as f:
161
+ audio,sr = torchaudio.load(f)
162
+ audio = audio.numpy()
163
+ return audio,sr,filepath
164
 
165
  def encode_audio_array(audio_array):
166
  buffer = io.BytesIO()
 
187
  sr = st.session_state.audio['sample_rate']
188
  array = st.session_state.audio['data']
189
  if sr != 22050:
190
+ array = librosa.resample(y=array,orig_sr=sr,target_sr=22050)
191
  encoded_array = encode_audio_array(array)
192
  payload = {
193
  "task":"transcribe_with_fastapi",
 
254
  }
255
  )
256
  st.session_state.option_selected = True
257
+ st.session_state.disable_voting=True
258
 
259
  def on_option_2_click():
260
  if st.session_state.transcribed and not st.session_state.option_selected:
 
273
  }
274
  )
275
  st.session_state.option_selected = True
276
+ st.session_state.disable_voting=True
277
 
278
  def on_option_both_click():
279
  if st.session_state.transcribed and not st.session_state.option_selected:
 
293
  }
294
  )
295
  st.session_state.option_selected = True
296
+ st.session_state.disable_voting=True
297
 
298
  def on_option_none_click():
299
  if st.session_state.transcribed and not st.session_state.option_selected:
 
312
  }
313
  )
314
  st.session_state.option_selected = True
315
+ st.session_state.disable_voting=True
316
 
317
  def on_click_transcribe():
318
  if st.session_state.has_audio:
 
325
  st.session_state.option_1_model_name_state = ""
326
  st.session_state.option_2_model_name_state = ""
327
  st.session_state.option_selected = None
328
+ st.session_state.recording=True
329
+ st.session_state.disable_voting=False
330
 
331
  def on_random_click():
332
  reset_state()
333
  with st.spinner("Fetching random audio... please wait"):
334
+ array, sampling_rate, filepath = fetch_audio()
 
335
  st.session_state.audio = {"data":array,"sample_rate":sampling_rate,"format":"audio/wav"}
336
  st.session_state.has_audio = True
337
  st.session_state.current_audio_type = "random"
 
372
  st.session_state.user_email = ""
373
  if "recording" not in st.session_state:
374
  st.session_state.recording = True
375
+ if "disable_voting" not in st.session_state:
376
+ st.session_state.disable_voting = True
377
  col1, col2 = st.columns([1, 1])
378
 
379
  with col1:
 
433
  c1, c2, c3, c4 = st.columns(4)
434
 
435
  with c1:
436
+ st.button("Prefer Option 1",on_click=on_option_1_click,key="option1_btn",disabled=st.session_state.disable_voting)
437
 
438
  with c2:
439
+ st.button("Prefer Option 2",on_click=on_option_2_click,key="option2_btn",disabled=st.session_state.disable_voting)
440
 
441
  with c3:
442
+ st.button("Prefer Both",on_click=on_option_both_click,key="both_btn",disabled=st.session_state.disable_voting)
443
 
444
  with c4:
445
+ st.button("Prefer None",on_click=on_option_none_click,key="none_btn",disabled=st.session_state.disable_voting)
446
 
447
  with st.container():
448
  st.button("New Match",on_click=on_reset_click,key="reset_btn",use_container_width=True)
enums.py CHANGED
@@ -5,5 +5,5 @@ ELO_JSON_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('ELO_JSON_PATH'
5
  ELO_CSV_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('ELO_CSV_KEY')}"
6
  EMAIL_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('EMAILS_KEY')}"
7
  TEMP_DIR = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('AUDIOS_KEY')}"
8
- CREATE_TASK_URL = os.getenv("CREATE_TASK_URL")
9
- TRANSCRIBE_URL = os.getenv("TRANSCRIPTION_TASK_URL")
 
5
  ELO_CSV_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('ELO_CSV_KEY')}"
6
  EMAIL_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('EMAILS_KEY')}"
7
  TEMP_DIR = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('AUDIOS_KEY')}"
8
+ NEW_TASK_URL=os.getenv("NEW_TASK_URL")
9
+ ARENA_PATH=os.getenv('ARENA_PATH')
random_audios.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -8,4 +8,5 @@ scipy
8
  streamlit==1.40.2
9
  fsspec==2024.10.0
10
  boto3
11
- s3fs
 
 
8
  streamlit==1.40.2
9
  fsspec==2024.10.0
10
  boto3
11
+ s3fs
12
+ torchaudio