Spaces:
Running
Running
add voice chat function
Browse files- .streamlit/config.toml +2 -0
- app.py +2 -41
- pages/playground.py +4 -0
- pages/voice_chat.py +4 -0
- src/content/common.py +367 -0
- src/content/playground.py +208 -0
- src/content/voice_chat.py +153 -0
- src/generation.py +10 -2
- src/pages.py +0 -220
- src/utils.py +0 -64
- style/app_style.css +52 -28
- style/normal_window.css +14 -0
- style/small_window.css +9 -0
.streamlit/config.toml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
[client]
|
2 |
+
showSidebarNavigation = false
|
app.py
CHANGED
@@ -1,42 +1,3 @@
|
|
1 |
-
import
|
2 |
|
3 |
-
|
4 |
-
|
5 |
-
from src.tunnel import start_server
|
6 |
-
from src.generation import FIXED_GENERATION_CONFIG, load_model
|
7 |
-
from src.pages import DEFAULT_DIALOGUE_STATES, sidebar_fragment, specify_audio_fragment, conversation_section
|
8 |
-
|
9 |
-
|
10 |
-
st.set_page_config(page_title='MERaLiON-AudioLLM', page_icon = "🔥", layout='wide')
|
11 |
-
|
12 |
-
st.markdown('<style>' + open('./style/app_style.css').read() + '</style>', unsafe_allow_html=True)
|
13 |
-
|
14 |
-
if "server" not in st.session_state:
|
15 |
-
st.session_state.server = start_server()
|
16 |
-
|
17 |
-
if "client" not in st.session_state or 'model_name' not in st.session_state:
|
18 |
-
st.session_state.client, st.session_state.model_name = load_model()
|
19 |
-
|
20 |
-
for key, value in FIXED_GENERATION_CONFIG.items():
|
21 |
-
if key not in st.session_state:
|
22 |
-
st.session_state[key]=copy.deepcopy(value)
|
23 |
-
|
24 |
-
for key, value in DEFAULT_DIALOGUE_STATES.items():
|
25 |
-
if key not in st.session_state:
|
26 |
-
st.session_state[key]=copy.deepcopy(value)
|
27 |
-
|
28 |
-
with st.sidebar:
|
29 |
-
sidebar_fragment()
|
30 |
-
|
31 |
-
if st.sidebar.button('Clear History'):
|
32 |
-
st.session_state.update(copy.deepcopy(DEFAULT_DIALOGUE_STATES))
|
33 |
-
|
34 |
-
st.markdown("<h1 style='text-align: center;'>MERaLiON-AudioLLM Demo 🤖</h1>", unsafe_allow_html=True)
|
35 |
-
st.markdown(
|
36 |
-
"""This demo is based on [MERaLiON-AudioLLM](https://huggingface.co/MERaLiON/MERaLiON-AudioLLM-Whisper-SEA-LION),
|
37 |
-
developed by I2R, A*STAR, in collaboration with AISG, Singapore.
|
38 |
-
It is tailored for Singapore’s multilingual and multicultural landscape."""
|
39 |
-
)
|
40 |
-
|
41 |
-
specify_audio_fragment()
|
42 |
-
conversation_section()
|
|
|
1 |
+
from src.content.playground import playground_page
|
2 |
|
3 |
+
playground_page()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pages/playground.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.content.playground import playground_page
|
2 |
+
|
3 |
+
|
4 |
+
playground_page()
|
pages/voice_chat.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.content.voice_chat import voice_chat_page
|
2 |
+
|
3 |
+
|
4 |
+
voice_chat_page()
|
src/content/common.py
ADDED
@@ -0,0 +1,367 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import streamlit as st
|
5 |
+
|
6 |
+
from src.tunnel import start_server
|
7 |
+
from src.generation import FIXED_GENERATION_CONFIG, load_model
|
8 |
+
|
9 |
+
|
10 |
+
DEFAULT_DIALOGUE_STATES = dict(
|
11 |
+
audio_base64='',
|
12 |
+
audio_array=np.array([]),
|
13 |
+
disprompt = False,
|
14 |
+
new_prompt = "",
|
15 |
+
messages=[],
|
16 |
+
on_select=False,
|
17 |
+
on_upload=False,
|
18 |
+
on_record=False,
|
19 |
+
on_select_quick_action=False
|
20 |
+
)
|
21 |
+
|
22 |
+
|
23 |
+
DEFAULT_VOICE_CHAT_STATES = dict(
|
24 |
+
audio_base64='',
|
25 |
+
audio_array=np.array([]),
|
26 |
+
disprompt = False,
|
27 |
+
new_prompt = "",
|
28 |
+
messages=[],
|
29 |
+
on_select=False,
|
30 |
+
on_upload=False,
|
31 |
+
on_record=False,
|
32 |
+
on_select_quick_action=False
|
33 |
+
)
|
34 |
+
|
35 |
+
|
36 |
+
AUDIO_SAMPLES_W_INSTRUCT = {
|
37 |
+
"7_ASR_IMDA_PART3_30_ASR_v2_2269": {
|
38 |
+
"apperance": "7. Automatic Speech Recognation task: conversation in Singapore accent",
|
39 |
+
"instructions": [
|
40 |
+
"Need this talk written down, please."
|
41 |
+
]
|
42 |
+
},
|
43 |
+
"11_ASR_IMDA_PART4_30_ASR_v2_3771": {
|
44 |
+
"apperance": "11. Automatic Speech Recognation task: conversation with Singlish code-switch",
|
45 |
+
"instructions": [
|
46 |
+
"Write out the dialogue as text."
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"12_ASR_IMDA_PART4_30_ASR_v2_103": {
|
50 |
+
"apperance": "12. Automatic Speech Recognation task: conversation with Singlish code-switch",
|
51 |
+
"instructions": [
|
52 |
+
"Write out the dialogue as text."
|
53 |
+
]
|
54 |
+
},
|
55 |
+
"17_ASR_IMDA_PART6_30_ASR_v2_1413": {
|
56 |
+
"apperance": "17. Automatic Speech Recognation task: conversation in Singapore accent",
|
57 |
+
"instructions": [
|
58 |
+
"Record the spoken word in text form."
|
59 |
+
]
|
60 |
+
},
|
61 |
+
"32_SQA_CN_COLLEDGE_ENTRANCE_ENGLISH_TEST_SQA_V2_572": {
|
62 |
+
"apperance": "32. Spoken Question Answering task: general speech",
|
63 |
+
"instructions": [
|
64 |
+
"What does the man think the woman should do at 4:00."
|
65 |
+
]
|
66 |
+
},
|
67 |
+
"33_SQA_IMDA_PART3_30_SQA_V2_2310": {
|
68 |
+
"apperance": "33. Spoken Question Answering task: conversation in Singapore accent",
|
69 |
+
"instructions": [
|
70 |
+
"Does Speaker2's wife cook for Speaker2 when they are at home."
|
71 |
+
]
|
72 |
+
},
|
73 |
+
"34_SQA_IMDA_PART3_30_SQA_V2_3621": {
|
74 |
+
"apperance": "34. Spoken Question Answering task: conversation in Singapore accent",
|
75 |
+
"instructions": [
|
76 |
+
"Does the phrase \"#gai-gai#\" have a meaning in Chinese or Hokkien language."
|
77 |
+
]
|
78 |
+
},
|
79 |
+
"35_SQA_IMDA_PART3_30_SQA_V2_4062": {
|
80 |
+
"apperance": "35. Spoken Question Answering task: conversation in Singapore accent",
|
81 |
+
"instructions": [
|
82 |
+
"What is the color of the vase mentioned in the dialogue."
|
83 |
+
]
|
84 |
+
},
|
85 |
+
"36_DS_IMDA_PART4_30_DS_V2_849": {
|
86 |
+
"apperance": "36. Spoken Dialogue Summarization task: conversation with Singlish code-switch",
|
87 |
+
"instructions": [
|
88 |
+
"Condense the dialogue into a concise summary highlighting major topics and conclusions."
|
89 |
+
]
|
90 |
+
},
|
91 |
+
"39_Paralingual_IEMOCAP_ER_V2_91": {
|
92 |
+
"apperance": "39. Paralinguistics task: general speech",
|
93 |
+
"instructions": [
|
94 |
+
"Based on the speaker's speech patterns, what do you think they are feeling."
|
95 |
+
]
|
96 |
+
},
|
97 |
+
"40_Paralingual_IEMOCAP_ER_V2_567": {
|
98 |
+
"apperance": "40. Paralinguistics task: general speech",
|
99 |
+
"instructions": [
|
100 |
+
"Based on the speaker's speech patterns, what do you think they are feeling."
|
101 |
+
]
|
102 |
+
},
|
103 |
+
"42_Paralingual_IEMOCAP_GR_V2_320": {
|
104 |
+
"apperance": "42. Paralinguistics task: general speech",
|
105 |
+
"instructions": [
|
106 |
+
"Is it possible for you to identify whether the speaker in this recording is male or female."
|
107 |
+
]
|
108 |
+
},
|
109 |
+
"47_Paralingual_IMDA_PART3_30_NR_V2_10479": {
|
110 |
+
"apperance": "47. Paralinguistics task: conversation in Singapore accent",
|
111 |
+
"instructions": [
|
112 |
+
"Can you guess which ethnic group this person is from based on their accent."
|
113 |
+
]
|
114 |
+
},
|
115 |
+
"49_Paralingual_MELD_ER_V2_676": {
|
116 |
+
"apperance": "49. Paralinguistics task: general speech",
|
117 |
+
"instructions": [
|
118 |
+
"What emotions do you think the speaker is expressing."
|
119 |
+
]
|
120 |
+
},
|
121 |
+
"50_Paralingual_MELD_ER_V2_692": {
|
122 |
+
"apperance": "50. Paralinguistics task: general speech",
|
123 |
+
"instructions": [
|
124 |
+
"Based on the speaker's speech patterns, what do you think they are feeling."
|
125 |
+
]
|
126 |
+
},
|
127 |
+
"51_Paralingual_VOXCELEB1_GR_V2_2148": {
|
128 |
+
"apperance": "51. Paralinguistics task: general speech",
|
129 |
+
"instructions": [
|
130 |
+
"May I know the gender of the speaker."
|
131 |
+
]
|
132 |
+
},
|
133 |
+
"53_Paralingual_VOXCELEB1_NR_V2_2286": {
|
134 |
+
"apperance": "53. Paralinguistics task: general speech",
|
135 |
+
"instructions": [
|
136 |
+
"What's the nationality identity of the speaker."
|
137 |
+
]
|
138 |
+
},
|
139 |
+
"55_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_2": {
|
140 |
+
"apperance": "55. Spoken Question Answering task: general speech",
|
141 |
+
"instructions": [
|
142 |
+
"What impact would the growth of the healthcare sector have on the country's economy in terms of employment and growth."
|
143 |
+
]
|
144 |
+
},
|
145 |
+
"56_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_415": {
|
146 |
+
"apperance": "56. Spoken Question Answering task: general speech",
|
147 |
+
"instructions": [
|
148 |
+
"Based on the statement, can you summarize the speaker's position on the recent controversial issues in Singapore."
|
149 |
+
]
|
150 |
+
},
|
151 |
+
"57_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_460": {
|
152 |
+
"apperance": "57. Spoken Question Answering task: general speech",
|
153 |
+
"instructions": [
|
154 |
+
"How does the author respond to parents' worries about masks in schools."
|
155 |
+
]
|
156 |
+
},
|
157 |
+
"1_ASR_IMDA_PART1_ASR_v2_141": {
|
158 |
+
"apperance": "1. Automatic Speech Recognation task: phonetically balanced reading",
|
159 |
+
"instructions": [
|
160 |
+
"Turn the spoken language into a text format.",
|
161 |
+
"Please translate the content into Chinese."
|
162 |
+
]
|
163 |
+
},
|
164 |
+
"2_ASR_IMDA_PART1_ASR_v2_2258": {
|
165 |
+
"apperance": "2. Automatic Speech Recognation task: phonetically balanced reading",
|
166 |
+
"instructions": [
|
167 |
+
"Turn the spoken language into a text format.",
|
168 |
+
"Please translate the content into Chinese."
|
169 |
+
]
|
170 |
+
},
|
171 |
+
"3_ASR_IMDA_PART1_ASR_v2_2265": {
|
172 |
+
"apperance": "3. Automatic Speech Recognation task: phonetically balanced reading",
|
173 |
+
"instructions": [
|
174 |
+
"Turn the spoken language into a text format."
|
175 |
+
]
|
176 |
+
},
|
177 |
+
"4_ASR_IMDA_PART2_ASR_v2_999": {
|
178 |
+
"apperance": "4. Automatic Speech Recognation task: reading in Singapore context",
|
179 |
+
"instructions": [
|
180 |
+
"Translate the spoken words into text format."
|
181 |
+
]
|
182 |
+
},
|
183 |
+
"5_ASR_IMDA_PART2_ASR_v2_2241": {
|
184 |
+
"apperance": "5. Automatic Speech Recognation task: reading in Singapore context",
|
185 |
+
"instructions": [
|
186 |
+
"Translate the spoken words into text format."
|
187 |
+
]
|
188 |
+
},
|
189 |
+
"6_ASR_IMDA_PART2_ASR_v2_3409": {
|
190 |
+
"apperance": "6. Automatic Speech Recognation task: reading in Singapore context",
|
191 |
+
"instructions": [
|
192 |
+
"Translate the spoken words into text format."
|
193 |
+
]
|
194 |
+
},
|
195 |
+
"8_ASR_IMDA_PART3_30_ASR_v2_1698": {
|
196 |
+
"apperance": "8. Automatic Speech Recognation task: conversation in Singapore accent",
|
197 |
+
"instructions": [
|
198 |
+
"Need this talk written down, please."
|
199 |
+
]
|
200 |
+
},
|
201 |
+
"9_ASR_IMDA_PART3_30_ASR_v2_2474": {
|
202 |
+
"apperance": "9. Automatic Speech Recognation task: conversation in Singapore accent",
|
203 |
+
"instructions": [
|
204 |
+
"Need this talk written down, please."
|
205 |
+
]
|
206 |
+
},
|
207 |
+
"10_ASR_IMDA_PART4_30_ASR_v2_1527": {
|
208 |
+
"apperance": "10. Automatic Speech Recognation task: conversation with Singlish code-switch",
|
209 |
+
"instructions": [
|
210 |
+
"Write out the dialogue as text."
|
211 |
+
]
|
212 |
+
},
|
213 |
+
"13_ASR_IMDA_PART5_30_ASR_v2_1446": {
|
214 |
+
"apperance": "13. Automatic Speech Recognation task: conversation in Singapore accent",
|
215 |
+
"instructions": [
|
216 |
+
"Translate this vocal recording into a textual format."
|
217 |
+
]
|
218 |
+
},
|
219 |
+
"14_ASR_IMDA_PART5_30_ASR_v2_2281": {
|
220 |
+
"apperance": "14. Automatic Speech Recognation task: conversation in Singapore accent",
|
221 |
+
"instructions": [
|
222 |
+
"Translate this vocal recording into a textual format."
|
223 |
+
]
|
224 |
+
},
|
225 |
+
"15_ASR_IMDA_PART5_30_ASR_v2_4388": {
|
226 |
+
"apperance": "15. Automatic Speech Recognation task: conversation in Singapore accent",
|
227 |
+
"instructions": [
|
228 |
+
"Translate this vocal recording into a textual format."
|
229 |
+
]
|
230 |
+
},
|
231 |
+
"16_ASR_IMDA_PART6_30_ASR_v2_576": {
|
232 |
+
"apperance": "16. Automatic Speech Recognation task: conversation in Singapore accent",
|
233 |
+
"instructions": [
|
234 |
+
"Record the spoken word in text form."
|
235 |
+
]
|
236 |
+
},
|
237 |
+
"18_ASR_IMDA_PART6_30_ASR_v2_2834": {
|
238 |
+
"apperance": "18. Automatic Speech Recognation task: conversation in Singapore accent",
|
239 |
+
"instructions": [
|
240 |
+
"Record the spoken word in text form."
|
241 |
+
]
|
242 |
+
},
|
243 |
+
"19_ASR_AIShell_zh_ASR_v2_5044": {
|
244 |
+
"apperance": "19. Automatic Speech Recognation task: speech in Chinese ",
|
245 |
+
"instructions": [
|
246 |
+
"Transform the oral presentation into a text document."
|
247 |
+
]
|
248 |
+
},
|
249 |
+
"20_ASR_LIBRISPEECH_CLEAN_ASR_V2_833": {
|
250 |
+
"apperance": "20. Automatic Speech Recognation task: general speech",
|
251 |
+
"instructions": [
|
252 |
+
"Please provide a written transcription of the speech."
|
253 |
+
]
|
254 |
+
},
|
255 |
+
"25_ST_COVOST2_ZH-CN_EN_ST_V2_4567": {
|
256 |
+
"apperance": "25. Speech Translation task: Chinese to English",
|
257 |
+
"instructions": [
|
258 |
+
"Please translate the given speech to English."
|
259 |
+
]
|
260 |
+
},
|
261 |
+
"26_ST_COVOST2_EN_ZH-CN_ST_V2_5422": {
|
262 |
+
"apperance": "26. Speech Translation task: English to Chinese",
|
263 |
+
"instructions": [
|
264 |
+
"Please translate the given speech to Chinese."
|
265 |
+
]
|
266 |
+
},
|
267 |
+
"27_ST_COVOST2_EN_ZH-CN_ST_V2_6697": {
|
268 |
+
"apperance": "27. Speech Translation task: English to Chinese",
|
269 |
+
"instructions": [
|
270 |
+
"Please translate the given speech to Chinese."
|
271 |
+
]
|
272 |
+
},
|
273 |
+
"28_SI_ALPACA-GPT4-AUDIO_SI_V2_299": {
|
274 |
+
"apperance": "28. Speech Instruction task: general speech",
|
275 |
+
"instructions": [
|
276 |
+
"Please follow the instruction in the speech."
|
277 |
+
]
|
278 |
+
},
|
279 |
+
"29_SI_ALPACA-GPT4-AUDIO_SI_V2_750": {
|
280 |
+
"apperance": "29. Speech Instruction task: general speech",
|
281 |
+
"instructions": [
|
282 |
+
"Please follow the instruction in the speech."
|
283 |
+
]
|
284 |
+
},
|
285 |
+
"30_SI_ALPACA-GPT4-AUDIO_SI_V2_1454": {
|
286 |
+
"apperance": "30. Speech Instruction task: general speech",
|
287 |
+
"instructions": [
|
288 |
+
"Please follow the instruction in the speech."
|
289 |
+
]
|
290 |
+
}
|
291 |
+
}
|
292 |
+
|
293 |
+
|
294 |
+
def init_state_section():
|
295 |
+
st.set_page_config(page_title='MERaLiON-AudioLLM', page_icon = "🔥", layout='wide')
|
296 |
+
|
297 |
+
st.markdown(
|
298 |
+
(
|
299 |
+
'<style>' + \
|
300 |
+
open('./style/app_style.css').read() + \
|
301 |
+
open('./style/normal_window.css').read() + \
|
302 |
+
open('./style/small_window.css').read() + \
|
303 |
+
'</style>'
|
304 |
+
),
|
305 |
+
unsafe_allow_html=True
|
306 |
+
)
|
307 |
+
|
308 |
+
if "server" not in st.session_state:
|
309 |
+
st.session_state.server = start_server()
|
310 |
+
|
311 |
+
if "client" not in st.session_state or 'model_name' not in st.session_state:
|
312 |
+
st.session_state.client, st.session_state.model_name = load_model()
|
313 |
+
|
314 |
+
for key, value in FIXED_GENERATION_CONFIG.items():
|
315 |
+
if key not in st.session_state:
|
316 |
+
st.session_state[key]=copy.deepcopy(value)
|
317 |
+
|
318 |
+
for key, value in DEFAULT_DIALOGUE_STATES.items():
|
319 |
+
if key not in st.session_state:
|
320 |
+
st.session_state[key]=copy.deepcopy(value)
|
321 |
+
|
322 |
+
|
323 |
+
def header_section(component_name="Playground", icon="🤖"):
|
324 |
+
st.markdown(
|
325 |
+
f"<h1 style='text-align: center;'>MERaLiON-AudioLLM {component_name} {icon}</h1>",
|
326 |
+
unsafe_allow_html=True
|
327 |
+
)
|
328 |
+
|
329 |
+
st.markdown(
|
330 |
+
f"""<div class="main-intro-normal-window">
|
331 |
+
<p>This {component_name.lower()} is based on
|
332 |
+
<a href="https://huggingface.co/MERaLiON/MERaLiON-AudioLLM-Whisper-SEA-LION"
|
333 |
+
target="_blank" rel="noopener noreferrer"> MERaLiON-AudioLLM</a>,
|
334 |
+
developed by I2R, A*STAR, in collaboration with AISG, Singapore.
|
335 |
+
It is tailored for Singapore’s multilingual and multicultural landscape.
|
336 |
+
MERaLiON-AudioLLM supports <strong>Automatic Speech Recognation</strong>,
|
337 |
+
<strong>Speech Translation</strong>,
|
338 |
+
<strong>Spoken Question Answering</strong>,
|
339 |
+
<strong>Spoken Dialogue Summarization</strong>,
|
340 |
+
<strong>Speech Instruction</strong>, and
|
341 |
+
<strong>Paralinguistics</strong> tasks.</p></div>""",
|
342 |
+
unsafe_allow_html=True
|
343 |
+
)
|
344 |
+
|
345 |
+
st.markdown(
|
346 |
+
f"""<div class="main-intro-small-window">
|
347 |
+
<p>This {component_name.lower()} is based on
|
348 |
+
<a href="https://huggingface.co/MERaLiON/MERaLiON-AudioLLM-Whisper-SEA-LION"
|
349 |
+
target="_blank" rel="noopener noreferrer"> MERaLiON-AudioLLM</a>.</p></div>""",
|
350 |
+
unsafe_allow_html=True
|
351 |
+
)
|
352 |
+
|
353 |
+
|
354 |
+
@st.fragment
|
355 |
+
def sidebar_fragment():
|
356 |
+
with st.container(height=300, border=False):
|
357 |
+
st.page_link("pages/playground.py", label="Playground")
|
358 |
+
st.page_link("pages/voice_chat.py", label="Voice Chat (experimental)")
|
359 |
+
|
360 |
+
|
361 |
+
st.divider()
|
362 |
+
|
363 |
+
st.slider(label='Temperature', min_value=0.0, max_value=2.0, value=0.1, key='temperature')
|
364 |
+
|
365 |
+
st.slider(label='Top P', min_value=0.0, max_value=1.0, value=0.9, key='top_p')
|
366 |
+
|
367 |
+
st.slider(label="Repetition Penalty", min_value=1.0, max_value=1.2, value=1.1, key="repetition_penalty")
|
src/content/playground.py
ADDED
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import base64
|
3 |
+
|
4 |
+
import streamlit as st
|
5 |
+
|
6 |
+
from src.generation import retrive_response
|
7 |
+
from src.utils import bytes_to_array, array_to_bytes
|
8 |
+
from src.content.common import (
|
9 |
+
AUDIO_SAMPLES_W_INSTRUCT,
|
10 |
+
DEFAULT_DIALOGUE_STATES,
|
11 |
+
init_state_section,
|
12 |
+
header_section,
|
13 |
+
sidebar_fragment
|
14 |
+
)
|
15 |
+
|
16 |
+
|
17 |
+
QUICK_ACTIONS = [
|
18 |
+
{
|
19 |
+
"name": "**Summary**",
|
20 |
+
"instruction": "Please summarise this speech.",
|
21 |
+
"width": 10,
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"name": "**Transcript**",
|
25 |
+
"instruction": "Please transcribe this speech.",
|
26 |
+
"width": 9.5,
|
27 |
+
}
|
28 |
+
]
|
29 |
+
|
30 |
+
|
31 |
+
MAX_AUDIO_LENGTH = 120
|
32 |
+
|
33 |
+
|
34 |
+
def _update_audio(audio_bytes):
|
35 |
+
origin_audio_array = bytes_to_array(audio_bytes)
|
36 |
+
truncated_audio_array = origin_audio_array[: MAX_AUDIO_LENGTH*16000]
|
37 |
+
truncated_audio_bytes = array_to_bytes(truncated_audio_array)
|
38 |
+
|
39 |
+
st.session_state.audio_array = origin_audio_array
|
40 |
+
st.session_state.audio_base64 = base64.b64encode(truncated_audio_bytes).decode('utf-8')
|
41 |
+
|
42 |
+
|
43 |
+
@st.fragment
|
44 |
+
def successful_example_section():
|
45 |
+
audio_sample_names = [audio_sample_name for audio_sample_name in AUDIO_SAMPLES_W_INSTRUCT.keys()]
|
46 |
+
|
47 |
+
st.markdown(":fire: **Successful Tasks and Examples**")
|
48 |
+
|
49 |
+
sample_name = st.selectbox(
|
50 |
+
label="**Select Audio:**",
|
51 |
+
label_visibility="collapsed",
|
52 |
+
options=audio_sample_names,
|
53 |
+
format_func=lambda o: AUDIO_SAMPLES_W_INSTRUCT[o]["apperance"],
|
54 |
+
index=None,
|
55 |
+
placeholder="Select an audio sample:",
|
56 |
+
on_change=lambda: st.session_state.update(
|
57 |
+
on_select=True,
|
58 |
+
messages=[],
|
59 |
+
disprompt=True
|
60 |
+
),
|
61 |
+
key='select')
|
62 |
+
|
63 |
+
if sample_name and st.session_state.on_select:
|
64 |
+
audio_bytes = open(f"audio_samples/{sample_name}.wav", "rb").read()
|
65 |
+
st.session_state.update(
|
66 |
+
on_select=False,
|
67 |
+
new_prompt=AUDIO_SAMPLES_W_INSTRUCT[sample_name]["instructions"][0]
|
68 |
+
)
|
69 |
+
_update_audio(audio_bytes)
|
70 |
+
st.rerun(scope="app")
|
71 |
+
|
72 |
+
|
73 |
+
@st.dialog("Specify Audio")
|
74 |
+
def audio_attach_dialogue():
|
75 |
+
st.markdown("**Upload**")
|
76 |
+
|
77 |
+
uploaded_file = st.file_uploader(
|
78 |
+
label="**Upload Audio:**",
|
79 |
+
label_visibility="collapsed",
|
80 |
+
type=['wav', 'mp3'],
|
81 |
+
on_change=lambda: st.session_state.update(on_upload=True, messages=[]),
|
82 |
+
key='upload'
|
83 |
+
)
|
84 |
+
|
85 |
+
if uploaded_file and st.session_state.on_upload:
|
86 |
+
audio_bytes = uploaded_file.read()
|
87 |
+
_update_audio(audio_bytes)
|
88 |
+
st.session_state.on_upload = False
|
89 |
+
st.rerun()
|
90 |
+
|
91 |
+
st.markdown("**Record**")
|
92 |
+
|
93 |
+
uploaded_file = st.audio_input(
|
94 |
+
label="**Record Audio:**",
|
95 |
+
label_visibility="collapsed",
|
96 |
+
on_change=lambda: st.session_state.update(on_record=True, messages=[]),
|
97 |
+
key='record'
|
98 |
+
)
|
99 |
+
|
100 |
+
if uploaded_file and st.session_state.on_record:
|
101 |
+
audio_bytes = uploaded_file.read()
|
102 |
+
_update_audio(audio_bytes)
|
103 |
+
st.session_state.on_record = False
|
104 |
+
st.rerun()
|
105 |
+
|
106 |
+
|
107 |
+
def bottom_input_section():
|
108 |
+
bottom_cols = st.columns([0.03, 0.03, 0.94])
|
109 |
+
with bottom_cols[0]:
|
110 |
+
st.button(
|
111 |
+
'Clear',
|
112 |
+
disabled=st.session_state.disprompt,
|
113 |
+
on_click=lambda: st.session_state.update(copy.deepcopy(DEFAULT_DIALOGUE_STATES))
|
114 |
+
)
|
115 |
+
|
116 |
+
with bottom_cols[1]:
|
117 |
+
if st.button("\+ Audio", disabled=st.session_state.disprompt):
|
118 |
+
audio_attach_dialogue()
|
119 |
+
|
120 |
+
with bottom_cols[2]:
|
121 |
+
if chat_input := st.chat_input(
|
122 |
+
placeholder="Instruction...",
|
123 |
+
disabled=st.session_state.disprompt,
|
124 |
+
on_submit=lambda: st.session_state.update(disprompt=True, messages=[])
|
125 |
+
):
|
126 |
+
st.session_state.new_prompt = chat_input
|
127 |
+
|
128 |
+
|
129 |
+
@st.fragment
|
130 |
+
def quick_actions_fragment():
|
131 |
+
action_cols_spec = [_["width"] for _ in QUICK_ACTIONS]
|
132 |
+
action_cols = st.columns(action_cols_spec)
|
133 |
+
|
134 |
+
for idx, action in enumerate(QUICK_ACTIONS):
|
135 |
+
action_cols[idx].button(
|
136 |
+
action["name"],
|
137 |
+
args=(action["instruction"],),
|
138 |
+
disabled=st.session_state.disprompt,
|
139 |
+
on_click=lambda p: st.session_state.update(
|
140 |
+
disprompt=True,
|
141 |
+
messages=[],
|
142 |
+
new_prompt=p,
|
143 |
+
on_select_quick_action=True
|
144 |
+
)
|
145 |
+
)
|
146 |
+
|
147 |
+
if st.session_state.on_select_quick_action:
|
148 |
+
st.session_state.on_select_quick_action = False
|
149 |
+
st.rerun(scope="app")
|
150 |
+
|
151 |
+
|
152 |
+
def conversation_section():
|
153 |
+
if st.session_state.audio_array.size:
|
154 |
+
with st.chat_message("user"):
|
155 |
+
st.audio(st.session_state.audio_array, format="audio/wav", sample_rate=16000)
|
156 |
+
quick_actions_fragment()
|
157 |
+
|
158 |
+
for message in st.session_state.messages:
|
159 |
+
with st.chat_message(message["role"]):
|
160 |
+
if message.get("error"):
|
161 |
+
st.error(message["error"])
|
162 |
+
for warning_msg in message.get("warnings", []):
|
163 |
+
st.warning(warning_msg)
|
164 |
+
if message.get("content"):
|
165 |
+
st.write(message["content"])
|
166 |
+
|
167 |
+
with st._bottom:
|
168 |
+
bottom_input_section()
|
169 |
+
|
170 |
+
if one_time_prompt := st.session_state.new_prompt:
|
171 |
+
st.session_state.update(new_prompt="", messages=[])
|
172 |
+
|
173 |
+
with st.chat_message("user"):
|
174 |
+
st.write(one_time_prompt)
|
175 |
+
st.session_state.messages.append({"role": "user", "content": one_time_prompt})
|
176 |
+
|
177 |
+
with st.chat_message("assistant"):
|
178 |
+
with st.spinner("Thinking..."):
|
179 |
+
error_msg, warnings, stream = retrive_response(
|
180 |
+
one_time_prompt, st.session_state.audio_base64, stream=True)
|
181 |
+
response = ""
|
182 |
+
|
183 |
+
if error_msg:
|
184 |
+
st.error(error_msg)
|
185 |
+
for warning_msg in warnings:
|
186 |
+
st.warning(warning_msg)
|
187 |
+
if stream:
|
188 |
+
response = st.write_stream(stream)
|
189 |
+
|
190 |
+
st.session_state.messages.append({
|
191 |
+
"role": "assistant",
|
192 |
+
"error": error_msg,
|
193 |
+
"warnings": warnings,
|
194 |
+
"content": response
|
195 |
+
})
|
196 |
+
|
197 |
+
st.session_state.disprompt=False
|
198 |
+
st.rerun(scope="app")
|
199 |
+
|
200 |
+
def playground_page():
|
201 |
+
init_state_section()
|
202 |
+
header_section()
|
203 |
+
|
204 |
+
with st.sidebar:
|
205 |
+
sidebar_fragment()
|
206 |
+
|
207 |
+
successful_example_section()
|
208 |
+
conversation_section()
|
src/content/voice_chat.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import base64
|
3 |
+
|
4 |
+
import numpy as np
|
5 |
+
import streamlit as st
|
6 |
+
|
7 |
+
from src.generation import retrive_response
|
8 |
+
from src.utils import bytes_to_array, array_to_bytes
|
9 |
+
from src.content.common import (
|
10 |
+
DEFAULT_DIALOGUE_STATES,
|
11 |
+
init_state_section,
|
12 |
+
header_section,
|
13 |
+
sidebar_fragment
|
14 |
+
)
|
15 |
+
|
16 |
+
|
17 |
+
# TODO: change this.
|
18 |
+
DEFAULT_PROMPT = "Please follow the instruction in the speech."
|
19 |
+
|
20 |
+
|
21 |
+
MAX_AUDIO_LENGTH = 120
|
22 |
+
|
23 |
+
|
24 |
+
def _update_audio(audio_bytes):
|
25 |
+
origin_audio_array = bytes_to_array(audio_bytes)
|
26 |
+
truncated_audio_array = origin_audio_array[: MAX_AUDIO_LENGTH*16000]
|
27 |
+
truncated_audio_bytes = array_to_bytes(truncated_audio_array)
|
28 |
+
|
29 |
+
st.session_state.audio_array = origin_audio_array
|
30 |
+
st.session_state.audio_base64 = base64.b64encode(truncated_audio_bytes).decode('utf-8')
|
31 |
+
|
32 |
+
|
33 |
+
@st.dialog("Specify Audio")
|
34 |
+
def audio_attach_dialogue():
|
35 |
+
st.markdown("**Upload**")
|
36 |
+
|
37 |
+
uploaded_file = st.file_uploader(
|
38 |
+
label="**Upload Audio:**",
|
39 |
+
label_visibility="collapsed",
|
40 |
+
type=['wav', 'mp3'],
|
41 |
+
on_change=lambda: st.session_state.update(
|
42 |
+
on_upload=True,
|
43 |
+
messages=[],
|
44 |
+
disprompt=True
|
45 |
+
),
|
46 |
+
key='upload'
|
47 |
+
)
|
48 |
+
|
49 |
+
if uploaded_file and st.session_state.on_upload:
|
50 |
+
audio_bytes = uploaded_file.read()
|
51 |
+
_update_audio(audio_bytes)
|
52 |
+
st.session_state.update(
|
53 |
+
on_upload=False,
|
54 |
+
new_prompt=DEFAULT_PROMPT
|
55 |
+
)
|
56 |
+
st.rerun()
|
57 |
+
|
58 |
+
|
59 |
+
def bottom_input_section():
|
60 |
+
bottom_cols = st.columns([0.03, 0.03, 0.94])
|
61 |
+
with bottom_cols[0]:
|
62 |
+
st.button(
|
63 |
+
'Clear',
|
64 |
+
disabled=st.session_state.disprompt,
|
65 |
+
on_click=lambda: st.session_state.update(copy.deepcopy(DEFAULT_DIALOGUE_STATES))
|
66 |
+
)
|
67 |
+
|
68 |
+
with bottom_cols[1]:
|
69 |
+
if st.button("\+ Audio", disabled=st.session_state.disprompt):
|
70 |
+
audio_attach_dialogue()
|
71 |
+
|
72 |
+
with bottom_cols[2]:
|
73 |
+
uploaded_file = st.audio_input(
|
74 |
+
label="record audio",
|
75 |
+
label_visibility="collapsed",
|
76 |
+
on_change=lambda: st.session_state.update(
|
77 |
+
on_record=True,
|
78 |
+
messages=[],
|
79 |
+
disprompt=True
|
80 |
+
),
|
81 |
+
key='record'
|
82 |
+
)
|
83 |
+
|
84 |
+
if uploaded_file and st.session_state.on_record:
|
85 |
+
audio_bytes = uploaded_file.read()
|
86 |
+
_update_audio(audio_bytes)
|
87 |
+
st.session_state.update(
|
88 |
+
on_record=False,
|
89 |
+
new_prompt=DEFAULT_PROMPT
|
90 |
+
)
|
91 |
+
|
92 |
+
|
93 |
+
def conversation_section():
|
94 |
+
for message in st.session_state.messages:
|
95 |
+
with st.chat_message(message["role"]):
|
96 |
+
if message.get("error"):
|
97 |
+
st.error(message["error"])
|
98 |
+
for warning_msg in message.get("warnings", []):
|
99 |
+
st.warning(warning_msg)
|
100 |
+
if message.get("audio", np.array([])).shape[0]:
|
101 |
+
st.audio(message["audio"], format="audio/wav", sample_rate=16000)
|
102 |
+
if message.get("content"):
|
103 |
+
st.write(message["content"])
|
104 |
+
|
105 |
+
with st._bottom:
|
106 |
+
bottom_input_section()
|
107 |
+
|
108 |
+
if one_time_prompt := st.session_state.new_prompt:
|
109 |
+
one_time_array = st.session_state.audio_array
|
110 |
+
one_time_base64 = st.session_state.audio_base64
|
111 |
+
st.session_state.update(
|
112 |
+
new_prompt="",
|
113 |
+
one_time_array=np.array([]),
|
114 |
+
one_time_base64="",
|
115 |
+
messages=[]
|
116 |
+
)
|
117 |
+
|
118 |
+
with st.chat_message("user"):
|
119 |
+
st.audio(one_time_array, format="audio/wav", sample_rate=16000)
|
120 |
+
|
121 |
+
st.session_state.messages.append({"role": "user", "audio": one_time_array})
|
122 |
+
|
123 |
+
with st.chat_message("assistant"):
|
124 |
+
with st.spinner("Thinking..."):
|
125 |
+
error_msg, warnings, stream = retrive_response(
|
126 |
+
one_time_prompt, one_time_base64, stream=True)
|
127 |
+
response = ""
|
128 |
+
|
129 |
+
if error_msg:
|
130 |
+
st.error(error_msg)
|
131 |
+
for warning_msg in warnings:
|
132 |
+
st.warning(warning_msg)
|
133 |
+
if stream:
|
134 |
+
response = st.write_stream(stream)
|
135 |
+
|
136 |
+
st.session_state.messages.append({
|
137 |
+
"role": "assistant",
|
138 |
+
"error": error_msg,
|
139 |
+
"warnings": warnings,
|
140 |
+
"content": response
|
141 |
+
})
|
142 |
+
|
143 |
+
st.session_state.disprompt=False
|
144 |
+
st.rerun(scope="app")
|
145 |
+
|
146 |
+
def voice_chat_page():
|
147 |
+
init_state_section()
|
148 |
+
header_section(component_name="Voice Chat")
|
149 |
+
|
150 |
+
with st.sidebar:
|
151 |
+
sidebar_fragment()
|
152 |
+
|
153 |
+
conversation_section()
|
src/generation.py
CHANGED
@@ -19,6 +19,8 @@ FIXED_GENERATION_CONFIG = dict(
|
|
19 |
seed=42
|
20 |
)
|
21 |
|
|
|
|
|
22 |
|
23 |
def load_model():
|
24 |
"""
|
@@ -100,7 +102,7 @@ def _retry_retrive_response_throws_exception(text_input, base64_audio_input, str
|
|
100 |
return response_object
|
101 |
|
102 |
|
103 |
-
def
|
104 |
"""
|
105 |
TODO: improve the input validation regex.
|
106 |
"""
|
@@ -111,11 +113,17 @@ def _validate_text_input(text_input) -> List[str]:
|
|
111 |
if re.search(r'[\u4e00-\u9fff]+', text_input):
|
112 |
warnings.append("NOTE: Please try to prompt in English for the best performance.")
|
113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
return warnings
|
115 |
|
116 |
|
117 |
def retrive_response(text_input, base64_audio_input, stream=False):
|
118 |
-
warnings =
|
119 |
|
120 |
response_object, error_msg = None, ""
|
121 |
try:
|
|
|
19 |
seed=42
|
20 |
)
|
21 |
|
22 |
+
MAX_AUDIO_LENGTH = 120
|
23 |
+
|
24 |
|
25 |
def load_model():
|
26 |
"""
|
|
|
102 |
return response_object
|
103 |
|
104 |
|
105 |
+
def _validate_input(text_input) -> List[str]:
|
106 |
"""
|
107 |
TODO: improve the input validation regex.
|
108 |
"""
|
|
|
113 |
if re.search(r'[\u4e00-\u9fff]+', text_input):
|
114 |
warnings.append("NOTE: Please try to prompt in English for the best performance.")
|
115 |
|
116 |
+
if st.session_state.audio_array.shape[0] / 16000 > 30.0:
|
117 |
+
warnings.append((
|
118 |
+
"MERaLiON-AudioLLM is trained to process audio up to **30 seconds**."
|
119 |
+
f" Audio longer than **{MAX_AUDIO_LENGTH} seconds** will be truncated."
|
120 |
+
))
|
121 |
+
|
122 |
return warnings
|
123 |
|
124 |
|
125 |
def retrive_response(text_input, base64_audio_input, stream=False):
|
126 |
+
warnings = _validate_input(text_input)
|
127 |
|
128 |
response_object, error_msg = None, ""
|
129 |
try:
|
src/pages.py
DELETED
@@ -1,220 +0,0 @@
|
|
1 |
-
import base64
|
2 |
-
|
3 |
-
import numpy as np
|
4 |
-
import streamlit as st
|
5 |
-
|
6 |
-
from src.generation import retrive_response, postprocess_voice_transcription
|
7 |
-
from src.utils import (
|
8 |
-
GENERAL_INSTRUCTIONS,
|
9 |
-
AUDIO_SAMPLES_W_INSTRUCT,
|
10 |
-
bytes_to_array,
|
11 |
-
array_to_bytes,
|
12 |
-
)
|
13 |
-
|
14 |
-
|
15 |
-
DEFAULT_DIALOGUE_STATES = dict(
|
16 |
-
default_instruction=[],
|
17 |
-
audio_base64='',
|
18 |
-
audio_array=np.array([]),
|
19 |
-
disprompt = False,
|
20 |
-
new_prompt = "",
|
21 |
-
messages=[],
|
22 |
-
voice_instruction="",
|
23 |
-
on_select=False,
|
24 |
-
on_upload=False,
|
25 |
-
on_record=False,
|
26 |
-
on_click_button=False,
|
27 |
-
on_record_voice=False
|
28 |
-
)
|
29 |
-
|
30 |
-
|
31 |
-
MAX_AUDIO_LENGTH = 120
|
32 |
-
|
33 |
-
|
34 |
-
def _update_audio(audio_bytes):
|
35 |
-
origin_audio_array = bytes_to_array(audio_bytes)
|
36 |
-
truncated_audio_array = origin_audio_array[: MAX_AUDIO_LENGTH*16000]
|
37 |
-
truncated_audio_bytes = array_to_bytes(truncated_audio_array)
|
38 |
-
|
39 |
-
st.session_state.audio_array = origin_audio_array
|
40 |
-
st.session_state.audio_base64 = base64.b64encode(truncated_audio_bytes).decode('utf-8')
|
41 |
-
|
42 |
-
|
43 |
-
@st.fragment
|
44 |
-
def sidebar_fragment():
|
45 |
-
st.markdown("""<div class="sidebar-intro">
|
46 |
-
<p><strong>📌 Supported Tasks</strong>
|
47 |
-
<p>Automatic Speech Recognation</p>
|
48 |
-
<p>Speech Translation</p>
|
49 |
-
<p>Spoken Question Answering</p>
|
50 |
-
<p>Spoken Dialogue Summarization</p>
|
51 |
-
<p>Speech Instruction</p>
|
52 |
-
<p>Paralinguistics</p>
|
53 |
-
<br>
|
54 |
-
<p><strong>📎 Generation Config</strong>
|
55 |
-
</div>""", unsafe_allow_html=True)
|
56 |
-
|
57 |
-
st.slider(label='Temperature', min_value=0.0, max_value=2.0, value=0.1, key='temperature')
|
58 |
-
|
59 |
-
st.slider(label='Top P', min_value=0.0, max_value=1.0, value=0.9, key='top_p')
|
60 |
-
|
61 |
-
st.slider(label="Repetition Penalty", min_value=1.0, max_value=1.2, value=1.1, key="repetition_penalty")
|
62 |
-
|
63 |
-
@st.fragment
|
64 |
-
def specify_audio_fragment():
|
65 |
-
col1, col2, col3 = st.columns([4, 2, 2])
|
66 |
-
|
67 |
-
with col1:
|
68 |
-
audio_sample_names = [audio_sample_name for audio_sample_name in AUDIO_SAMPLES_W_INSTRUCT.keys()]
|
69 |
-
|
70 |
-
st.markdown("**Select Audio From Examples:**")
|
71 |
-
|
72 |
-
sample_name = st.selectbox(
|
73 |
-
label="**Select Audio:**",
|
74 |
-
label_visibility="collapsed",
|
75 |
-
options=audio_sample_names,
|
76 |
-
index=None,
|
77 |
-
placeholder="Select an audio sample:",
|
78 |
-
on_change=lambda: st.session_state.update(on_select=True),
|
79 |
-
key='select')
|
80 |
-
|
81 |
-
if sample_name and st.session_state.on_select:
|
82 |
-
audio_bytes = open(f"audio_samples/{sample_name}.wav", "rb").read()
|
83 |
-
st.session_state.default_instruction = AUDIO_SAMPLES_W_INSTRUCT[sample_name] + GENERAL_INSTRUCTIONS
|
84 |
-
_update_audio(audio_bytes)
|
85 |
-
|
86 |
-
|
87 |
-
with col2:
|
88 |
-
st.markdown("or **Upload Audio:**")
|
89 |
-
|
90 |
-
uploaded_file = st.file_uploader(
|
91 |
-
label="**Upload Audio:**",
|
92 |
-
label_visibility="collapsed",
|
93 |
-
type=['wav', 'mp3'],
|
94 |
-
on_change=lambda: st.session_state.update(on_upload=True),
|
95 |
-
key='upload'
|
96 |
-
)
|
97 |
-
|
98 |
-
if uploaded_file and st.session_state.on_upload:
|
99 |
-
audio_bytes = uploaded_file.read()
|
100 |
-
st.session_state.default_instruction = GENERAL_INSTRUCTIONS
|
101 |
-
_update_audio(audio_bytes)
|
102 |
-
|
103 |
-
|
104 |
-
with col3:
|
105 |
-
st.markdown("or **Record Audio:**")
|
106 |
-
|
107 |
-
uploaded_file = st.audio_input(
|
108 |
-
label="**Record Audio:**",
|
109 |
-
label_visibility="collapsed",
|
110 |
-
on_change=lambda: st.session_state.update(on_record=True),
|
111 |
-
key='record'
|
112 |
-
)
|
113 |
-
|
114 |
-
if uploaded_file and st.session_state.on_record:
|
115 |
-
audio_bytes = uploaded_file.read()
|
116 |
-
st.session_state.default_instruction = GENERAL_INSTRUCTIONS
|
117 |
-
_update_audio(audio_bytes)
|
118 |
-
|
119 |
-
st.session_state.update(on_upload=False, on_record=False, on_select=False)
|
120 |
-
|
121 |
-
if st.session_state.audio_array.size:
|
122 |
-
with st.chat_message("user"):
|
123 |
-
if st.session_state.audio_array.shape[0] / 16000 > 30.0:
|
124 |
-
st.warning((
|
125 |
-
"MERaLiON-AudioLLM is trained to process audio up to **30 seconds**."
|
126 |
-
f" Audio longer than **{MAX_AUDIO_LENGTH} seconds** will be truncated."
|
127 |
-
))
|
128 |
-
|
129 |
-
st.audio(st.session_state.audio_array, format="audio/wav", sample_rate=16000)
|
130 |
-
|
131 |
-
for i, inst in enumerate(st.session_state.default_instruction):
|
132 |
-
st.button(
|
133 |
-
f"**Example Instruction {i+1}**: {inst}",
|
134 |
-
args=(inst,),
|
135 |
-
disabled=st.session_state.disprompt,
|
136 |
-
on_click=lambda p: st.session_state.update(disprompt=True, new_prompt=p, on_click_button=True, messages=[])
|
137 |
-
)
|
138 |
-
|
139 |
-
if st.session_state.on_click_button:
|
140 |
-
st.session_state.on_click_button = False
|
141 |
-
st.rerun(scope="app")
|
142 |
-
|
143 |
-
|
144 |
-
def bottom_input_section():
|
145 |
-
bottom_cols = st.columns([0.02, 0.98])
|
146 |
-
|
147 |
-
uploaded_file = bottom_cols[0].audio_input(
|
148 |
-
label="voice",
|
149 |
-
label_visibility="collapsed",
|
150 |
-
disabled=st.session_state.disprompt,
|
151 |
-
on_change=lambda: st.session_state.update(on_record_voice=True),
|
152 |
-
key='voice'
|
153 |
-
)
|
154 |
-
|
155 |
-
if uploaded_file and st.session_state.on_record_voice:
|
156 |
-
audio_bytes = uploaded_file.read()
|
157 |
-
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
|
158 |
-
error_msg, warnings, completion = retrive_response(
|
159 |
-
"Write out the dialogue as text.", audio_base64, stream=False)
|
160 |
-
|
161 |
-
if error_msg:
|
162 |
-
st.toast(error_msg, icon="🚨")
|
163 |
-
for warning_msg in warnings:
|
164 |
-
st.toast(warning_msg, icon="❗")
|
165 |
-
|
166 |
-
st.session_state.update(
|
167 |
-
new_prompt = postprocess_voice_transcription(
|
168 |
-
completion.choices[0].message.content),
|
169 |
-
on_record_voice = False
|
170 |
-
)
|
171 |
-
|
172 |
-
if chat_input := bottom_cols[1].chat_input(
|
173 |
-
placeholder="Type Your Instruction Here",
|
174 |
-
disabled=st.session_state.disprompt,
|
175 |
-
on_submit=lambda: st.session_state.update(disprompt=True, messages=[])
|
176 |
-
):
|
177 |
-
st.session_state.new_prompt = chat_input
|
178 |
-
|
179 |
-
def conversation_section():
|
180 |
-
for message in st.session_state.messages:
|
181 |
-
with st.chat_message(message["role"]):
|
182 |
-
if message.get("error"):
|
183 |
-
st.error(message["error"])
|
184 |
-
for warning_msg in message.get("warnings", []):
|
185 |
-
st.warning(warning_msg)
|
186 |
-
if message.get("content"):
|
187 |
-
st.write(message["content"])
|
188 |
-
|
189 |
-
with st._bottom:
|
190 |
-
bottom_input_section()
|
191 |
-
|
192 |
-
if one_time_prompt := st.session_state.new_prompt:
|
193 |
-
st.session_state.update(new_prompt="", messages=[])
|
194 |
-
|
195 |
-
with st.chat_message("user"):
|
196 |
-
st.write(one_time_prompt)
|
197 |
-
st.session_state.messages.append({"role": "user", "content": one_time_prompt})
|
198 |
-
|
199 |
-
with st.chat_message("assistant"):
|
200 |
-
with st.spinner("Thinking..."):
|
201 |
-
error_msg, warnings, stream = retrive_response(
|
202 |
-
one_time_prompt, st.session_state.audio_base64, stream=True)
|
203 |
-
response = ""
|
204 |
-
|
205 |
-
if error_msg:
|
206 |
-
st.error(error_msg)
|
207 |
-
for warning_msg in warnings:
|
208 |
-
st.warning(warning_msg)
|
209 |
-
if stream:
|
210 |
-
response = st.write_stream(stream)
|
211 |
-
|
212 |
-
st.session_state.messages.append({
|
213 |
-
"role": "assistant",
|
214 |
-
"error": error_msg,
|
215 |
-
"warnings": warnings,
|
216 |
-
"content": response
|
217 |
-
})
|
218 |
-
|
219 |
-
st.session_state.disprompt=False
|
220 |
-
st.rerun(scope="app")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/utils.py
CHANGED
@@ -4,70 +4,6 @@ from scipy.io.wavfile import write
|
|
4 |
import librosa
|
5 |
|
6 |
|
7 |
-
GENERAL_INSTRUCTIONS = [
|
8 |
-
"Please transcribe this speech.",
|
9 |
-
"Please summarise this speech."
|
10 |
-
]
|
11 |
-
|
12 |
-
|
13 |
-
AUDIO_SAMPLES_W_INSTRUCT = {
|
14 |
-
'7_ASR_IMDA_PART3_30_ASR_v2_2269': ["Need this talk written down, please."],
|
15 |
-
'11_ASR_IMDA_PART4_30_ASR_v2_3771': ["Write out the dialogue as text."],
|
16 |
-
'12_ASR_IMDA_PART4_30_ASR_v2_103' : ["Write out the dialogue as text."],
|
17 |
-
'17_ASR_IMDA_PART6_30_ASR_v2_1413': ["Record the spoken word in text form."],
|
18 |
-
|
19 |
-
'32_SQA_CN_COLLEDGE_ENTRANCE_ENGLISH_TEST_SQA_V2_572': ["What does the man think the woman should do at 4:00."],
|
20 |
-
'33_SQA_IMDA_PART3_30_SQA_V2_2310': ["Does Speaker2's wife cook for Speaker2 when they are at home."],
|
21 |
-
'34_SQA_IMDA_PART3_30_SQA_V2_3621': ["Does the phrase \"#gai-gai#\" have a meaning in Chinese or Hokkien language."],
|
22 |
-
'35_SQA_IMDA_PART3_30_SQA_V2_4062': ["What is the color of the vase mentioned in the dialogue."],
|
23 |
-
'36_DS_IMDA_PART4_30_DS_V2_849': ["Condense the dialogue into a concise summary highlighting major topics and conclusions."],
|
24 |
-
|
25 |
-
'39_Paralingual_IEMOCAP_ER_V2_91': ["Based on the speaker's speech patterns, what do you think they are feeling."],
|
26 |
-
'40_Paralingual_IEMOCAP_ER_V2_567': ["Based on the speaker's speech patterns, what do you think they are feeling."],
|
27 |
-
'42_Paralingual_IEMOCAP_GR_V2_320': ["Is it possible for you to identify whether the speaker in this recording is male or female."],
|
28 |
-
'47_Paralingual_IMDA_PART3_30_NR_V2_10479': ["Can you guess which ethnic group this person is from based on their accent."],
|
29 |
-
'49_Paralingual_MELD_ER_V2_676': ["What emotions do you think the speaker is expressing."],
|
30 |
-
'50_Paralingual_MELD_ER_V2_692': ["Based on the speaker's speech patterns, what do you think they are feeling."],
|
31 |
-
'51_Paralingual_VOXCELEB1_GR_V2_2148': ["May I know the gender of the speaker."],
|
32 |
-
'53_Paralingual_VOXCELEB1_NR_V2_2286': ["What's the nationality identity of the speaker."],
|
33 |
-
|
34 |
-
'55_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_2': ["What impact would the growth of the healthcare sector have on the country's economy in terms of employment and growth."],
|
35 |
-
'56_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_415': ["Based on the statement, can you summarize the speaker's position on the recent controversial issues in Singapore."],
|
36 |
-
'57_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_460': ["How does the author respond to parents' worries about masks in schools."],
|
37 |
-
|
38 |
-
'1_ASR_IMDA_PART1_ASR_v2_141' : ["Turn the spoken language into a text format.", "Please translate the content into Chinese."],
|
39 |
-
'2_ASR_IMDA_PART1_ASR_v2_2258': ["Turn the spoken language into a text format.", "Please translate the content into Chinese."],
|
40 |
-
'3_ASR_IMDA_PART1_ASR_v2_2265': ["Turn the spoken language into a text format."],
|
41 |
-
|
42 |
-
'4_ASR_IMDA_PART2_ASR_v2_999' : ["Translate the spoken words into text format."],
|
43 |
-
'5_ASR_IMDA_PART2_ASR_v2_2241': ["Translate the spoken words into text format."],
|
44 |
-
'6_ASR_IMDA_PART2_ASR_v2_3409': ["Translate the spoken words into text format."],
|
45 |
-
|
46 |
-
'8_ASR_IMDA_PART3_30_ASR_v2_1698': ["Need this talk written down, please."],
|
47 |
-
'9_ASR_IMDA_PART3_30_ASR_v2_2474': ["Need this talk written down, please."],
|
48 |
-
|
49 |
-
'10_ASR_IMDA_PART4_30_ASR_v2_1527': ["Write out the dialogue as text."],
|
50 |
-
|
51 |
-
'13_ASR_IMDA_PART5_30_ASR_v2_1446': ["Translate this vocal recording into a textual format."],
|
52 |
-
'14_ASR_IMDA_PART5_30_ASR_v2_2281': ["Translate this vocal recording into a textual format."],
|
53 |
-
'15_ASR_IMDA_PART5_30_ASR_v2_4388': ["Translate this vocal recording into a textual format."],
|
54 |
-
|
55 |
-
'16_ASR_IMDA_PART6_30_ASR_v2_576': ["Record the spoken word in text form."],
|
56 |
-
'18_ASR_IMDA_PART6_30_ASR_v2_2834': ["Record the spoken word in text form."],
|
57 |
-
|
58 |
-
'19_ASR_AIShell_zh_ASR_v2_5044': ["Transform the oral presentation into a text document."],
|
59 |
-
'20_ASR_LIBRISPEECH_CLEAN_ASR_V2_833': ["Please provide a written transcription of the speech."],
|
60 |
-
|
61 |
-
'25_ST_COVOST2_ZH-CN_EN_ST_V2_4567': ["Please translate the given speech to English."],
|
62 |
-
'26_ST_COVOST2_EN_ZH-CN_ST_V2_5422': ["Please translate the given speech to Chinese."],
|
63 |
-
|
64 |
-
'27_ST_COVOST2_EN_ZH-CN_ST_V2_6697': ["Please translate the given speech to Chinese."],
|
65 |
-
'28_SI_ALPACA-GPT4-AUDIO_SI_V2_299': ["Please follow the instruction in the speech."],
|
66 |
-
'29_SI_ALPACA-GPT4-AUDIO_SI_V2_750': ["Please follow the instruction in the speech."],
|
67 |
-
'30_SI_ALPACA-GPT4-AUDIO_SI_V2_1454': ["Please follow the instruction in the speech."],
|
68 |
-
}
|
69 |
-
|
70 |
-
|
71 |
def bytes_to_array(audio_bytes):
|
72 |
audio_array, _ = librosa.load(
|
73 |
io.BytesIO(audio_bytes),
|
|
|
4 |
import librosa
|
5 |
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
def bytes_to_array(audio_bytes):
|
8 |
audio_array, _ = librosa.load(
|
9 |
io.BytesIO(audio_bytes),
|
style/app_style.css
CHANGED
@@ -1,16 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
div[data-testid="stChatMessage"]:has(> div[data-testid="stChatMessageAvatarUser"]) {
|
2 |
flex-direction: row-reverse;
|
3 |
text-align: right;
|
4 |
}
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
padding: 2rem 5rem 1rem;
|
9 |
-
}
|
10 |
}
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
section[data-testid='stFileUploaderDropzone'] {
|
13 |
-
padding:
|
14 |
}
|
15 |
|
16 |
section[data-testid='stFileUploaderDropzone']>button {
|
@@ -21,40 +40,45 @@ div[data-testid="stFileUploaderDropzoneInstructions"]>div>span {
|
|
21 |
display:none;
|
22 |
}
|
23 |
|
24 |
-
div[data-testid="
|
25 |
-
|
26 |
}
|
27 |
|
28 |
-
|
29 |
-
background-color:transparent;
|
30 |
-
/* border:1px solid rgba(49, 51, 63, 0.2); */
|
31 |
-
max-height: 40px;
|
32 |
-
display: block;
|
33 |
-
padding: 0;
|
34 |
-
margin: auto;
|
35 |
-
}
|
36 |
|
37 |
-
div[data-testid="stBottomBlockContainer"] div[data-testid="
|
38 |
-
|
39 |
}
|
40 |
|
41 |
-
div[data-testid="stBottomBlockContainer"] div[data-testid="
|
42 |
-
|
|
|
|
|
43 |
}
|
44 |
|
45 |
-
div[data-testid="stBottomBlockContainer"] div[data-testid="
|
46 |
-
|
|
|
|
|
47 |
}
|
48 |
|
49 |
-
div[data-testid="stBottomBlockContainer"] div[data-testid="
|
50 |
-
|
|
|
51 |
}
|
52 |
|
53 |
-
div[data-testid="stBottomBlockContainer"] div[data-testid="
|
54 |
-
|
|
|
|
|
55 |
}
|
56 |
|
57 |
-
[
|
58 |
-
|
59 |
-
|
|
|
60 |
}
|
|
|
|
|
|
|
|
|
|
1 |
+
div[data-testid="stMainBlockContainer"] div[data-testid="stAudioInput"]>div {
|
2 |
+
max-height: 3rem;
|
3 |
+
}
|
4 |
+
|
5 |
+
div[class="sidebar-intro"] p {
|
6 |
+
margin-bottom: 0.75rem;
|
7 |
+
}
|
8 |
+
|
9 |
+
[class='stAudio'] {
|
10 |
+
max-width: 500px !important;
|
11 |
+
margin: auto !important;
|
12 |
+
}
|
13 |
+
|
14 |
div[data-testid="stChatMessage"]:has(> div[data-testid="stChatMessageAvatarUser"]) {
|
15 |
flex-direction: row-reverse;
|
16 |
text-align: right;
|
17 |
}
|
18 |
|
19 |
+
div[data-testid="stChatMessage"] div[data-testid="stHorizontalBlock"]:has(> div[data-testid="stColumn"]) {
|
20 |
+
flex-direction: row-reverse;
|
|
|
|
|
21 |
}
|
22 |
|
23 |
+
div[data-testid="stChatMessage"] div[data-testid="stHorizontalBlock"]>div[data-testid="stColumn"]:has( div[data-testid="stButton"]) {
|
24 |
+
width: 6rem;
|
25 |
+
min-width: 6rem;
|
26 |
+
flex: 0 0 6rem;
|
27 |
+
}
|
28 |
+
|
29 |
+
/* File uploader */
|
30 |
+
|
31 |
section[data-testid='stFileUploaderDropzone'] {
|
32 |
+
padding:6px 2rem;
|
33 |
}
|
34 |
|
35 |
section[data-testid='stFileUploaderDropzone']>button {
|
|
|
40 |
display:none;
|
41 |
}
|
42 |
|
43 |
+
div[data-testid="stBottomBlockContainer"] {
|
44 |
+
padding-bottom: 2rem;
|
45 |
}
|
46 |
|
47 |
+
/* Chat input component at the bottom */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
+
div[data-testid="stBottomBlockContainer"] div[data-testid="stHorizontalBlock"]:has(> div[data-testid="stColumn"]) {
|
50 |
+
gap: 4px;
|
51 |
}
|
52 |
|
53 |
+
div[data-testid="stBottomBlockContainer"] div[data-testid="stColumn"]:has( div[data-testid="stButton"]):first-of-type {
|
54 |
+
width: 61px;
|
55 |
+
min-width: 61px;
|
56 |
+
flex: 0 0 61px;
|
57 |
}
|
58 |
|
59 |
+
div[data-testid="stBottomBlockContainer"] div[data-testid="stColumn"]:has( div[data-testid="stButton"]):nth-of-type(2) {
|
60 |
+
width: 76px;
|
61 |
+
min-width: 76px;
|
62 |
+
flex: 0 0 76px;
|
63 |
}
|
64 |
|
65 |
+
div[data-testid="stBottomBlockContainer"] div[data-testid="stColumn"] button[data-testid="stBaseButton-secondary"] {
|
66 |
+
background-color: rgb(240, 242, 246);
|
67 |
+
border-color: rgb(240, 242, 246);
|
68 |
}
|
69 |
|
70 |
+
div[data-testid="stBottomBlockContainer"] div[data-testid="stColumn"]:has( div[data-testid="stChatInput"]) {
|
71 |
+
width: 10rem;
|
72 |
+
min-width: 10rem;
|
73 |
+
flex: 1 1 10rem;
|
74 |
}
|
75 |
|
76 |
+
div[data-testid="stBottomBlockContainer"] div[data-testid="stColumn"]:has( div[data-testid="stAudioInput"]) {
|
77 |
+
width: 10rem;
|
78 |
+
min-width: 10rem;
|
79 |
+
flex: 1 1 10rem;
|
80 |
}
|
81 |
+
|
82 |
+
div[data-testid="stBottomBlockContainer"] div[data-testid="stAudioInput"]>div {
|
83 |
+
max-height: 40px;
|
84 |
+
}
|
style/normal_window.css
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@media(min-width: 576px) {
|
2 |
+
.stMainBlockContainer {
|
3 |
+
padding: 2rem 5rem 1rem;
|
4 |
+
}
|
5 |
+
|
6 |
+
div[data-testid="stBottomBlockContainer"] {
|
7 |
+
padding-left: 5rem;
|
8 |
+
padding-right: 5rem;
|
9 |
+
}
|
10 |
+
|
11 |
+
div[class="main-intro-small-window"] {
|
12 |
+
display: none;
|
13 |
+
}
|
14 |
+
}
|
style/small_window.css
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@media(max-width: 576px) {
|
2 |
+
div[data-testid="stMainBlockContainer"] div[data-testid="stVerticalBlock"]>div[data-testid="stElementContainer"]:has( div[data-testid="stHeadingWithActionElements"]) {
|
3 |
+
display: none;
|
4 |
+
}
|
5 |
+
|
6 |
+
div[class="main-intro-normal-window"] {
|
7 |
+
display: none;
|
8 |
+
}
|
9 |
+
}
|