SebastianSchramm commited on
Commit
ae335a4
·
unverified ·
1 Parent(s): 9bbae43

add endpoints for text gen and full prompt2audio

Browse files
Files changed (1) hide show
  1. server.py +80 -29
server.py CHANGED
@@ -1,19 +1,19 @@
 
1
  import logging
 
 
2
 
3
- import random
4
-
5
- from fastapi import FastAPI
6
- from pydantic import BaseModel
7
  from fastapi.responses import FileResponse
8
- from fastapi import BackgroundTasks
9
- from starlette.requests import Request
10
  from kokoro import KPipeline
11
- import soundfile as sf
12
- import tempfile
13
- import numpy as np
14
- import os
15
 
16
- random.seed(42)
17
 
18
  logging.basicConfig()
19
  logger = logging.getLogger(__name__)
@@ -29,7 +29,7 @@ def cleanup_temp_file(file_path: str):
29
  try:
30
  os.unlink(file_path)
31
  except OSError:
32
- pass
33
 
34
 
35
  def text_to_audio_chunks(text, voice="af_heart", language="a"):
@@ -41,14 +41,18 @@ def text_to_audio_chunks(text, voice="af_heart", language="a"):
41
 
42
  def concat_chunks(audios, samplerate=24000, silence_dur=0.3):
43
  # Convert PyTorch tensors to NumPy arrays
44
- audio_arrays = [audio.numpy() if hasattr(audio, 'numpy') else audio for audio in audios]
45
-
 
 
46
  if not audio_arrays:
47
  return np.array([]) # Return empty array if no audio chunks
48
-
49
  silence = np.zeros(int(samplerate * silence_dur), dtype=audio_arrays[0].dtype)
50
  # Insert silence between all but last
51
- chunks = sum([[chunk, silence] for chunk in audio_arrays[:-1]], []) + [audio_arrays[-1]]
 
 
52
  return np.concatenate(chunks)
53
 
54
 
@@ -60,14 +64,60 @@ def get_audio(text: str, language: str):
60
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
61
  sf.write(tmp.name, final_audio, 24000)
62
  tmp.close()
63
- return tmp.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
 
 
 
 
 
 
65
 
66
- class InputLoad(BaseModel):
 
67
  text: str
68
  language: str
69
 
70
 
 
 
 
 
 
 
 
 
71
  app = FastAPI()
72
 
73
 
@@ -76,17 +126,18 @@ def health_check():
76
  return {"server": "running"}
77
 
78
 
79
- @app.post("/answer/")
80
- async def receive(input_load: InputLoad, request: Request) -> FileResponse:
81
- audio_path = get_audio(input_load.text, input_load.language)
82
 
83
- background_tasks = BackgroundTasks()
84
- background_tasks.add_task(cleanup_temp_file, audio_path)
85
 
86
- return FileResponse(
87
- path=audio_path,
88
- media_type="audio/wav",
89
- filename="generated_audio.wav",
90
- background=background_tasks
91
- )
92
 
 
 
 
 
 
1
+ import json
2
  import logging
3
+ import os
4
+ import tempfile
5
 
6
+ import numpy as np
7
+ import requests
8
+ import soundfile as sf
9
+ from fastapi import BackgroundTasks, FastAPI
10
  from fastapi.responses import FileResponse
 
 
11
  from kokoro import KPipeline
12
+ from pydantic import BaseModel
13
+ from starlette.requests import Request
14
+
15
+ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
16
 
 
17
 
18
  logging.basicConfig()
19
  logger = logging.getLogger(__name__)
 
29
  try:
30
  os.unlink(file_path)
31
  except OSError:
32
+ pass
33
 
34
 
35
  def text_to_audio_chunks(text, voice="af_heart", language="a"):
 
41
 
42
  def concat_chunks(audios, samplerate=24000, silence_dur=0.3):
43
  # Convert PyTorch tensors to NumPy arrays
44
+ audio_arrays = [
45
+ audio.numpy() if hasattr(audio, "numpy") else audio for audio in audios
46
+ ]
47
+
48
  if not audio_arrays:
49
  return np.array([]) # Return empty array if no audio chunks
50
+
51
  silence = np.zeros(int(samplerate * silence_dur), dtype=audio_arrays[0].dtype)
52
  # Insert silence between all but last
53
+ chunks = sum([[chunk, silence] for chunk in audio_arrays[:-1]], []) + [
54
+ audio_arrays[-1]
55
+ ]
56
  return np.concatenate(chunks)
57
 
58
 
 
64
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
65
  sf.write(tmp.name, final_audio, 24000)
66
  tmp.close()
67
+ return tmp.name
68
+
69
+
70
+ def generate_text(prompt: str):
71
+ response = requests.post(
72
+ url="https://openrouter.ai/api/v1/chat/completions",
73
+ headers={
74
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
75
+ "Content-Type": "application/json",
76
+ "HTTP-Referer": "Emotions API",
77
+ "X-Title": "Emotions API",
78
+ },
79
+ data=json.dumps(
80
+ {
81
+ "model": "google/gemma-3n-e4b-it:free",
82
+ "temperature": 0.0,
83
+ "max_tokens": 2048,
84
+ "top_p": 0.99,
85
+ "messages": [{"role": "user", "content": prompt}],
86
+ }
87
+ ),
88
+ )
89
+ response_json = response.json()
90
+ answer = response_json["choices"][0]["message"]["content"]
91
+ return answer, response_json
92
+
93
+
94
+ def generate_audio(text: str, language: str) -> FileResponse:
95
+ audio_path = get_audio(text, language)
96
+
97
+ background_tasks = BackgroundTasks()
98
+ background_tasks.add_task(cleanup_temp_file, audio_path)
99
 
100
+ return FileResponse(
101
+ path=audio_path,
102
+ media_type="audio/wav",
103
+ filename="generated_audio.wav",
104
+ background=background_tasks,
105
+ )
106
 
107
+
108
+ class InputLoadT2A(BaseModel):
109
  text: str
110
  language: str
111
 
112
 
113
+ class InputLoadP2T(BaseModel):
114
+ text: str
115
+
116
+
117
+ class ResponseLoadP2T(BaseModel):
118
+ text: str
119
+
120
+
121
  app = FastAPI()
122
 
123
 
 
126
  return {"server": "running"}
127
 
128
 
129
+ @app.post("/genaudio/")
130
+ async def receive(input_load: InputLoadT2A, request: Request) -> FileResponse:
131
+ return generate_audio(input_load.text, input_load.language)
132
 
 
 
133
 
134
+ @app.post("/gentext/")
135
+ async def gen_text(input_load: InputLoadP2T, request: Request) -> ResponseLoadP2T:
136
+ text, _ = generate_text(input_load.text)
137
+ return ResponseLoadP2T(text=text)
138
+
 
139
 
140
+ @app.post("/genemotion/")
141
+ async def gen_emotion(input_load: InputLoadT2A, request: Request) -> FileResponse:
142
+ text, _ = generate_text(input_load.text)
143
+ return generate_audio(text, input_load.language)