Carsten Høyer commited on
Commit
5dfce18
1 Parent(s): 2e80e92
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -28,11 +28,16 @@ def greet_json():
28
  return {"Hello": "World!"}
29
 
30
  # Function to generate audio from text using ParlerTTS
31
- def generate_audio(text, description="Neutral voice"):
 
32
  input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
 
33
  prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
 
34
  generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
 
35
  audio_arr = generation.cpu().numpy().squeeze()
 
36
  return audio_arr, model.config.sampling_rate
37
 
38
  # A POST endpoint to receive and parse an array of JSON objects
@@ -40,20 +45,23 @@ def generate_audio(text, description="Neutral voice"):
40
  async def create_items(items: List[Item]):
41
  processed_items = []
42
  for item in items:
 
43
  # Generate audio
 
44
  audio_arr, sample_rate = generate_audio(item.text)
 
45
 
46
- # Create in-memory bytes buffer for audio
47
- audio_bytes = io.BytesIO()
48
- sf.write(audio_bytes, audio_arr, sample_rate, format="WAV")
49
- audio_bytes.seek(0) # Reset buffer position
50
 
51
  processed_item = {
52
  "text": item.text,
53
  "name": item.name,
54
  "section": item.section,
55
  "processed": True,
56
- "audio": StreamingResponse(audio_bytes, media_type="audio/wav")
57
  }
58
  processed_items.append(processed_item)
59
 
 
28
  return {"Hello": "World!"}
29
 
30
  # Function to generate audio from text using ParlerTTS
31
+ def generate_audio(text, description="Jon's voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise."):
32
+ print("A")
33
  input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
34
+ print("B")
35
  prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
36
+ print("C")
37
  generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
38
+ print("D")
39
  audio_arr = generation.cpu().numpy().squeeze()
40
+ print("E")
41
  return audio_arr, model.config.sampling_rate
42
 
43
  # A POST endpoint to receive and parse an array of JSON objects
 
45
  async def create_items(items: List[Item]):
46
  processed_items = []
47
  for item in items:
48
+ print(f"Processing item: {item.text}")
49
  # Generate audio
50
+ print("before")
51
  audio_arr, sample_rate = generate_audio(item.text)
52
+ print("after")
53
 
54
+ # # Create in-memory bytes buffer for audio
55
+ # audio_bytes = io.BytesIO()
56
+ # sf.write(audio_bytes, audio_arr, sample_rate, format="WAV")
57
+ # audio_bytes.seek(0) # Reset buffer position
58
 
59
  processed_item = {
60
  "text": item.text,
61
  "name": item.name,
62
  "section": item.section,
63
  "processed": True,
64
+ # "audio": StreamingResponse(audio_bytes, media_type="audio/wav")
65
  }
66
  processed_items.append(processed_item)
67