diff --git a/README.md b/README.md index 8a00bb12a279eee5165d19eb79bef0c8c3e02ba9..bc366c8faab51bf7ea525a3ee829ef357225f516 100644 --- a/README.md +++ b/README.md @@ -3,12 +3,11 @@ title: Interactive-Demo / MERaLiON-AudioLLM emoji: 🚀 colorFrom: indigo colorTo: indigo -sdk: streamlit -sdk_version: 1.41.1 -app_file: app.py +sdk: static pinned: true models: +- MERaLiON/MERaLiON-2-10B +- MERaLiON/MERaLiON-2-10B-ASR - MERaLiON/MERaLiON-AudioLLM-Whisper-SEA-LION --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference diff --git a/app.py b/app.py deleted file mode 100644 index e497863323cffda8c0dcd856a502419068d5c166..0000000000000000000000000000000000000000 --- a/app.py +++ /dev/null @@ -1,3 +0,0 @@ -from src.content.playground import playground_page - -playground_page() \ No newline at end of file diff --git a/audio_samples/10_ASR_IMDA_PART4_30_ASR_v2_1527.wav b/audio_samples/10_ASR_IMDA_PART4_30_ASR_v2_1527.wav deleted file mode 100644 index d1fa45ef18df244757240868794d54063f723d56..0000000000000000000000000000000000000000 Binary files a/audio_samples/10_ASR_IMDA_PART4_30_ASR_v2_1527.wav and /dev/null differ diff --git a/audio_samples/11_ASR_IMDA_PART4_30_ASR_v2_3771.wav b/audio_samples/11_ASR_IMDA_PART4_30_ASR_v2_3771.wav deleted file mode 100644 index f8c81dfdd16b9167a273a3964a6a3dd47de80d98..0000000000000000000000000000000000000000 Binary files a/audio_samples/11_ASR_IMDA_PART4_30_ASR_v2_3771.wav and /dev/null differ diff --git a/audio_samples/12_ASR_IMDA_PART4_30_ASR_v2_103.wav b/audio_samples/12_ASR_IMDA_PART4_30_ASR_v2_103.wav deleted file mode 100644 index 186c8c090c344c63e0ea828cc12a51207df58aff..0000000000000000000000000000000000000000 Binary files a/audio_samples/12_ASR_IMDA_PART4_30_ASR_v2_103.wav and /dev/null differ diff --git a/audio_samples/13_ASR_IMDA_PART5_30_ASR_v2_1446.wav b/audio_samples/13_ASR_IMDA_PART5_30_ASR_v2_1446.wav deleted file mode 100644 index 0e606bfd770bb85abab4957d0380fef221c254c4..0000000000000000000000000000000000000000 Binary files a/audio_samples/13_ASR_IMDA_PART5_30_ASR_v2_1446.wav and /dev/null differ diff --git a/audio_samples/14_ASR_IMDA_PART5_30_ASR_v2_2281.wav b/audio_samples/14_ASR_IMDA_PART5_30_ASR_v2_2281.wav deleted file mode 100644 index cbf2fad1cc780f4a7d691974033dcef7fd0fcef0..0000000000000000000000000000000000000000 Binary files a/audio_samples/14_ASR_IMDA_PART5_30_ASR_v2_2281.wav and /dev/null differ diff --git a/audio_samples/15_ASR_IMDA_PART5_30_ASR_v2_4388.wav b/audio_samples/15_ASR_IMDA_PART5_30_ASR_v2_4388.wav deleted file mode 100644 index 8ea95dd7dbab7762154da8588b7ead701317e956..0000000000000000000000000000000000000000 Binary files a/audio_samples/15_ASR_IMDA_PART5_30_ASR_v2_4388.wav and /dev/null differ diff --git a/audio_samples/16_ASR_IMDA_PART6_30_ASR_v2_576.wav b/audio_samples/16_ASR_IMDA_PART6_30_ASR_v2_576.wav deleted file mode 100644 index df7b46f0a8f4f93b280cd9c91e486956e5d89b11..0000000000000000000000000000000000000000 Binary files a/audio_samples/16_ASR_IMDA_PART6_30_ASR_v2_576.wav and /dev/null differ diff --git a/audio_samples/17_ASR_IMDA_PART6_30_ASR_v2_1413.wav b/audio_samples/17_ASR_IMDA_PART6_30_ASR_v2_1413.wav deleted file mode 100644 index 69c2b3ee39465bc58efe769fde69c95c9d5092fc..0000000000000000000000000000000000000000 Binary files a/audio_samples/17_ASR_IMDA_PART6_30_ASR_v2_1413.wav and /dev/null differ diff --git a/audio_samples/18_ASR_IMDA_PART6_30_ASR_v2_2834.wav b/audio_samples/18_ASR_IMDA_PART6_30_ASR_v2_2834.wav deleted file mode 100644 index 1d89e648d87d2bc193f728ac86b54ea7a4e07634..0000000000000000000000000000000000000000 Binary files a/audio_samples/18_ASR_IMDA_PART6_30_ASR_v2_2834.wav and /dev/null differ diff --git a/audio_samples/19_ASR_AIShell_zh_ASR_v2_5044.wav b/audio_samples/19_ASR_AIShell_zh_ASR_v2_5044.wav deleted file mode 100644 index b296224725ec5acf74a02304f6beb6a7723d2c89..0000000000000000000000000000000000000000 Binary files a/audio_samples/19_ASR_AIShell_zh_ASR_v2_5044.wav and /dev/null differ diff --git a/audio_samples/1_ASR_IMDA_PART1_ASR_v2_141.wav b/audio_samples/1_ASR_IMDA_PART1_ASR_v2_141.wav deleted file mode 100644 index 17c5fc99647aaa658eabe035b40f97f8ea7638d1..0000000000000000000000000000000000000000 Binary files a/audio_samples/1_ASR_IMDA_PART1_ASR_v2_141.wav and /dev/null differ diff --git a/audio_samples/20_ASR_LIBRISPEECH_CLEAN_ASR_V2_833.wav b/audio_samples/20_ASR_LIBRISPEECH_CLEAN_ASR_V2_833.wav deleted file mode 100644 index 1ec609efd1c3790487c3c0aec77e5e3e5b0c3eda..0000000000000000000000000000000000000000 Binary files a/audio_samples/20_ASR_LIBRISPEECH_CLEAN_ASR_V2_833.wav and /dev/null differ diff --git a/audio_samples/25_ST_COVOST2_ZH-CN_EN_ST_V2_4567.wav b/audio_samples/25_ST_COVOST2_ZH-CN_EN_ST_V2_4567.wav deleted file mode 100644 index 55c435f3b228e136e3c1047a4b43e992b9acfc0f..0000000000000000000000000000000000000000 Binary files a/audio_samples/25_ST_COVOST2_ZH-CN_EN_ST_V2_4567.wav and /dev/null differ diff --git a/audio_samples/26_ST_COVOST2_EN_ZH-CN_ST_V2_5422.wav b/audio_samples/26_ST_COVOST2_EN_ZH-CN_ST_V2_5422.wav deleted file mode 100644 index f2780b3b7da1d553f59f4f29256b4e848049cf52..0000000000000000000000000000000000000000 Binary files a/audio_samples/26_ST_COVOST2_EN_ZH-CN_ST_V2_5422.wav and /dev/null differ diff --git a/audio_samples/27_ST_COVOST2_EN_ZH-CN_ST_V2_6697.wav b/audio_samples/27_ST_COVOST2_EN_ZH-CN_ST_V2_6697.wav deleted file mode 100644 index 234f811d4c60ab67659f06bcd1db481a11648ca9..0000000000000000000000000000000000000000 Binary files a/audio_samples/27_ST_COVOST2_EN_ZH-CN_ST_V2_6697.wav and /dev/null differ diff --git a/audio_samples/28_SI_ALPACA-GPT4-AUDIO_SI_V2_299.wav b/audio_samples/28_SI_ALPACA-GPT4-AUDIO_SI_V2_299.wav deleted file mode 100644 index 239fff4d4cfcf2653e00d97ca842f334bd31ed18..0000000000000000000000000000000000000000 Binary files a/audio_samples/28_SI_ALPACA-GPT4-AUDIO_SI_V2_299.wav and /dev/null differ diff --git a/audio_samples/29_SI_ALPACA-GPT4-AUDIO_SI_V2_750.wav b/audio_samples/29_SI_ALPACA-GPT4-AUDIO_SI_V2_750.wav deleted file mode 100644 index 35d9dfbdc9ca3169a05c50a548cb5836adc65d52..0000000000000000000000000000000000000000 Binary files a/audio_samples/29_SI_ALPACA-GPT4-AUDIO_SI_V2_750.wav and /dev/null differ diff --git a/audio_samples/2_ASR_IMDA_PART1_ASR_v2_2258.wav b/audio_samples/2_ASR_IMDA_PART1_ASR_v2_2258.wav deleted file mode 100644 index 1b3ff08f36d5e02043445bd8c0f37b73cdd9f59c..0000000000000000000000000000000000000000 Binary files a/audio_samples/2_ASR_IMDA_PART1_ASR_v2_2258.wav and /dev/null differ diff --git a/audio_samples/30_SI_ALPACA-GPT4-AUDIO_SI_V2_1454.wav b/audio_samples/30_SI_ALPACA-GPT4-AUDIO_SI_V2_1454.wav deleted file mode 100644 index d84f6abdca95d5bfa3f292f45b370c243bf79f86..0000000000000000000000000000000000000000 Binary files a/audio_samples/30_SI_ALPACA-GPT4-AUDIO_SI_V2_1454.wav and /dev/null differ diff --git a/audio_samples/31_SI_OPENHERMES-AUDIO_SI_V2_673.wav b/audio_samples/31_SI_OPENHERMES-AUDIO_SI_V2_673.wav deleted file mode 100644 index e0d9a7f61f0a8b0137bc8c5ddd4d03c02686b49b..0000000000000000000000000000000000000000 Binary files a/audio_samples/31_SI_OPENHERMES-AUDIO_SI_V2_673.wav and /dev/null differ diff --git a/audio_samples/32_SQA_CN_COLLEDGE_ENTRANCE_ENGLISH_TEST_SQA_V2_572.wav b/audio_samples/32_SQA_CN_COLLEDGE_ENTRANCE_ENGLISH_TEST_SQA_V2_572.wav deleted file mode 100644 index 4f0aadf1e9ac1e100c052fa9df0760651e2b2c4f..0000000000000000000000000000000000000000 Binary files a/audio_samples/32_SQA_CN_COLLEDGE_ENTRANCE_ENGLISH_TEST_SQA_V2_572.wav and /dev/null differ diff --git a/audio_samples/33_SQA_IMDA_PART3_30_SQA_V2_2310.wav b/audio_samples/33_SQA_IMDA_PART3_30_SQA_V2_2310.wav deleted file mode 100644 index c2858560478a1b51a6085e0f54a34d4bbca30b8e..0000000000000000000000000000000000000000 Binary files a/audio_samples/33_SQA_IMDA_PART3_30_SQA_V2_2310.wav and /dev/null differ diff --git a/audio_samples/34_SQA_IMDA_PART3_30_SQA_V2_3621.wav b/audio_samples/34_SQA_IMDA_PART3_30_SQA_V2_3621.wav deleted file mode 100644 index e4f53b20b6210ef6bba708ea1bccb9ad787caf22..0000000000000000000000000000000000000000 Binary files a/audio_samples/34_SQA_IMDA_PART3_30_SQA_V2_3621.wav and /dev/null differ diff --git a/audio_samples/35_SQA_IMDA_PART3_30_SQA_V2_4062.wav b/audio_samples/35_SQA_IMDA_PART3_30_SQA_V2_4062.wav deleted file mode 100644 index 8e18d39cdceaa84abc9dff3f002a0c6502c30b69..0000000000000000000000000000000000000000 Binary files a/audio_samples/35_SQA_IMDA_PART3_30_SQA_V2_4062.wav and /dev/null differ diff --git a/audio_samples/36_DS_IMDA_PART4_30_DS_V2_849.wav b/audio_samples/36_DS_IMDA_PART4_30_DS_V2_849.wav deleted file mode 100644 index 6b381a7b04f312f0b317bd3b6a0581155aeaf4c1..0000000000000000000000000000000000000000 Binary files a/audio_samples/36_DS_IMDA_PART4_30_DS_V2_849.wav and /dev/null differ diff --git a/audio_samples/39_Paralingual_IEMOCAP_ER_V2_91.wav b/audio_samples/39_Paralingual_IEMOCAP_ER_V2_91.wav deleted file mode 100644 index 738c14bf9ff890820659be0ad4d27ec5576ea7c4..0000000000000000000000000000000000000000 Binary files a/audio_samples/39_Paralingual_IEMOCAP_ER_V2_91.wav and /dev/null differ diff --git a/audio_samples/3_ASR_IMDA_PART1_ASR_v2_2265.wav b/audio_samples/3_ASR_IMDA_PART1_ASR_v2_2265.wav deleted file mode 100644 index 507bca925cbe5c433d1021c89f8f5c2108fc00d6..0000000000000000000000000000000000000000 Binary files a/audio_samples/3_ASR_IMDA_PART1_ASR_v2_2265.wav and /dev/null differ diff --git a/audio_samples/40_Paralingual_IEMOCAP_ER_V2_567.wav b/audio_samples/40_Paralingual_IEMOCAP_ER_V2_567.wav deleted file mode 100644 index 6709d5a7b3509690d89d222e8a75120b0a9c4d35..0000000000000000000000000000000000000000 Binary files a/audio_samples/40_Paralingual_IEMOCAP_ER_V2_567.wav and /dev/null differ diff --git a/audio_samples/42_Paralingual_IEMOCAP_GR_V2_320.wav b/audio_samples/42_Paralingual_IEMOCAP_GR_V2_320.wav deleted file mode 100644 index 593e18ad1ff04af7877072ba964c323786ba580e..0000000000000000000000000000000000000000 Binary files a/audio_samples/42_Paralingual_IEMOCAP_GR_V2_320.wav and /dev/null differ diff --git a/audio_samples/43_Paralingual_IEMOCAP_GR_V2_129.wav b/audio_samples/43_Paralingual_IEMOCAP_GR_V2_129.wav deleted file mode 100644 index cd143063c19ca28fb3820ded2f1caa2cda0a8861..0000000000000000000000000000000000000000 Binary files a/audio_samples/43_Paralingual_IEMOCAP_GR_V2_129.wav and /dev/null differ diff --git a/audio_samples/45_Paralingual_IMDA_PART3_30_GR_V2_12312.wav b/audio_samples/45_Paralingual_IMDA_PART3_30_GR_V2_12312.wav deleted file mode 100644 index af4fe23487085a3047ad1f0f56b824a6a75907f4..0000000000000000000000000000000000000000 Binary files a/audio_samples/45_Paralingual_IMDA_PART3_30_GR_V2_12312.wav and /dev/null differ diff --git a/audio_samples/47_Paralingual_IMDA_PART3_30_NR_V2_10479.wav b/audio_samples/47_Paralingual_IMDA_PART3_30_NR_V2_10479.wav deleted file mode 100644 index 20a685bb51cd1670280e104e1f06987e471657bb..0000000000000000000000000000000000000000 Binary files a/audio_samples/47_Paralingual_IMDA_PART3_30_NR_V2_10479.wav and /dev/null differ diff --git a/audio_samples/49_Paralingual_MELD_ER_V2_676.wav b/audio_samples/49_Paralingual_MELD_ER_V2_676.wav deleted file mode 100644 index a614033adb66d5d8b5a0054530336876c0d61d86..0000000000000000000000000000000000000000 Binary files a/audio_samples/49_Paralingual_MELD_ER_V2_676.wav and /dev/null differ diff --git a/audio_samples/4_ASR_IMDA_PART2_ASR_v2_999.wav b/audio_samples/4_ASR_IMDA_PART2_ASR_v2_999.wav deleted file mode 100644 index 48bfb135fc3eb12814801c49abd0b8250178ad86..0000000000000000000000000000000000000000 Binary files a/audio_samples/4_ASR_IMDA_PART2_ASR_v2_999.wav and /dev/null differ diff --git a/audio_samples/50_Paralingual_MELD_ER_V2_692.wav b/audio_samples/50_Paralingual_MELD_ER_V2_692.wav deleted file mode 100644 index 69f435f7308b5090f2668d22c1f324d30dd8857e..0000000000000000000000000000000000000000 Binary files a/audio_samples/50_Paralingual_MELD_ER_V2_692.wav and /dev/null differ diff --git a/audio_samples/51_Paralingual_VOXCELEB1_GR_V2_2148.wav b/audio_samples/51_Paralingual_VOXCELEB1_GR_V2_2148.wav deleted file mode 100644 index 42d4d89846cfcd0c6bb0de173f584ad2b6d6d131..0000000000000000000000000000000000000000 Binary files a/audio_samples/51_Paralingual_VOXCELEB1_GR_V2_2148.wav and /dev/null differ diff --git a/audio_samples/53_Paralingual_VOXCELEB1_NR_V2_2286.wav b/audio_samples/53_Paralingual_VOXCELEB1_NR_V2_2286.wav deleted file mode 100644 index ce05d92f8004d6054d39fae59f4d3a34c3b80e49..0000000000000000000000000000000000000000 Binary files a/audio_samples/53_Paralingual_VOXCELEB1_NR_V2_2286.wav and /dev/null differ diff --git a/audio_samples/55_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_2.wav b/audio_samples/55_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_2.wav deleted file mode 100644 index f8513f46825e7b386b1f00f058d249044dac82d2..0000000000000000000000000000000000000000 Binary files a/audio_samples/55_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_2.wav and /dev/null differ diff --git a/audio_samples/56_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_415.wav b/audio_samples/56_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_415.wav deleted file mode 100644 index f95f167ebe177b0db82f346f9dbd2c51eb828ec1..0000000000000000000000000000000000000000 Binary files a/audio_samples/56_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_415.wav and /dev/null differ diff --git a/audio_samples/57_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_460.wav b/audio_samples/57_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_460.wav deleted file mode 100644 index c2decc6d21300257c3fc74b6718f1898dedbf4e2..0000000000000000000000000000000000000000 Binary files a/audio_samples/57_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_460.wav and /dev/null differ diff --git a/audio_samples/5_ASR_IMDA_PART2_ASR_v2_2241.wav b/audio_samples/5_ASR_IMDA_PART2_ASR_v2_2241.wav deleted file mode 100644 index 55063388c14bd69df6a8023e5a65e4c9c3a01fb5..0000000000000000000000000000000000000000 Binary files a/audio_samples/5_ASR_IMDA_PART2_ASR_v2_2241.wav and /dev/null differ diff --git a/audio_samples/6_ASR_IMDA_PART2_ASR_v2_3409.wav b/audio_samples/6_ASR_IMDA_PART2_ASR_v2_3409.wav deleted file mode 100644 index daf99a1877bae21a5ab72147a7a6359c8953e242..0000000000000000000000000000000000000000 Binary files a/audio_samples/6_ASR_IMDA_PART2_ASR_v2_3409.wav and /dev/null differ diff --git a/audio_samples/7_ASR_IMDA_PART3_30_ASR_v2_2269.wav b/audio_samples/7_ASR_IMDA_PART3_30_ASR_v2_2269.wav deleted file mode 100644 index 5e439cf43817a436692e132e194bcf2b43332126..0000000000000000000000000000000000000000 Binary files a/audio_samples/7_ASR_IMDA_PART3_30_ASR_v2_2269.wav and /dev/null differ diff --git a/audio_samples/8_ASR_IMDA_PART3_30_ASR_v2_1698.wav b/audio_samples/8_ASR_IMDA_PART3_30_ASR_v2_1698.wav deleted file mode 100644 index e0929f09849acb481f80ca007bf257a9d937c035..0000000000000000000000000000000000000000 Binary files a/audio_samples/8_ASR_IMDA_PART3_30_ASR_v2_1698.wav and /dev/null differ diff --git a/audio_samples/9_ASR_IMDA_PART3_30_ASR_v2_2474.wav b/audio_samples/9_ASR_IMDA_PART3_30_ASR_v2_2474.wav deleted file mode 100644 index 11e66f37907da37aa2d90a492e407bc3a7a20bb1..0000000000000000000000000000000000000000 Binary files a/audio_samples/9_ASR_IMDA_PART3_30_ASR_v2_2474.wav and /dev/null differ diff --git a/audio_samples/female_pilot.wav b/audio_samples/female_pilot.wav deleted file mode 100644 index 8492f4dcff3ae93e641d10bdf2cba8a3290bda4c..0000000000000000000000000000000000000000 --- a/audio_samples/female_pilot.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a881f64b78e959491e5a0731e41acdb513fc8a91cf817c3f1da6776d1cfa0245 -size 5664058 diff --git a/audio_samples/song_1.wav b/audio_samples/song_1.wav deleted file mode 100644 index 5a40bbc94858189c9252921e9ee11157e3821bd2..0000000000000000000000000000000000000000 --- a/audio_samples/song_1.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:303fc4a3003e2f49cfcfd1c5f7f79fed09ec50b67a0f96838a20c39d306538f7 -size 11420470 diff --git a/index.html b/index.html new file mode 100644 index 0000000000000000000000000000000000000000..2a7abb44a7abcb287ac148f1a0db1d73841bc4ba --- /dev/null +++ b/index.html @@ -0,0 +1,8 @@ + + +Huggingface Mirror + + + + diff --git a/pages/agent.py b/pages/agent.py deleted file mode 100644 index 36a9a35623385096495fc8d00d77b7302d9f4a5c..0000000000000000000000000000000000000000 --- a/pages/agent.py +++ /dev/null @@ -1,3 +0,0 @@ -from src.content.agent import agent_page - -agent_page() \ No newline at end of file diff --git a/pages/playground.py b/pages/playground.py deleted file mode 100644 index da5d8c7953bac6ca10d1fc69c8e755b0284f616d..0000000000000000000000000000000000000000 --- a/pages/playground.py +++ /dev/null @@ -1,4 +0,0 @@ -from src.content.playground import playground_page - - -playground_page() diff --git a/pages/voice_chat.py b/pages/voice_chat.py deleted file mode 100644 index c0627ef4f40fc78b1fc85ed7b9db34a0ee603a12..0000000000000000000000000000000000000000 --- a/pages/voice_chat.py +++ /dev/null @@ -1,4 +0,0 @@ -from src.content.voice_chat import voice_chat_page - - -voice_chat_page() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 00e88effff70de748ef0d24d97927c0d8cdf0544..0000000000000000000000000000000000000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -librosa==0.10.2.post1 -streamlit==1.40.2 -openai==1.57.1 -streamlit_mic_recorder==0.0.8 -sshtunnel \ No newline at end of file diff --git a/src/content/agent.py b/src/content/agent.py deleted file mode 100644 index 680366e8355b86633208c686a8b47a3bb21ceb51..0000000000000000000000000000000000000000 --- a/src/content/agent.py +++ /dev/null @@ -1,283 +0,0 @@ -import os -import requests - -import numpy as np -import streamlit as st - -from src.retrieval import STANDARD_QUERIES -from src.content.common import ( - MODEL_NAMES, - AUDIO_SAMPLES_W_INSTRUCT, - AGENT_DIALOGUE_STATES, - reset_states, - update_voice_instruction_state, - init_state_section, - header_section, - sidebar_fragment, - successful_example_section, - audio_attach_dialogue, - retrive_response_with_ui -) - - -API_BASE_URL = os.getenv('API_BASE_URL') - - -LLM_NO_AUDIO_PROMPT_TEMPLATE = """{user_question}""" - - -LLM_PROMPT_TEMPLATE = """User asked a question about the audio clip. - -## User Question -{user_question} - -{audio_information_prompt}Please reply to user's question with a friendly, accurate, and helpful answer.""" - - -AUDIO_INFO_TEMPLATE = """Here are some information about this audio clip. - -## Audio Information -{audio_information} - -However, the audio analysis may or may not contain relevant information to the user question, please only reply the user with the relevant information. - -""" - - -AUDIO_ANALYSIS_STATUS = "MERaLiON-AudioLLM Analysis" - - -AG_CONVERSATION_STATES = dict( - ag_messages=[], - ag_model_messages=[], - ag_visited_query_indices=[], -) - - -def bottom_input_section(): - bottom_cols = st.columns([0.03, 0.03, 0.91, 0.03]) - with bottom_cols[0]: - st.button( - ':material/delete:', - disabled=st.session_state.disprompt, - on_click=lambda: reset_states(AGENT_DIALOGUE_STATES) - ) - - with bottom_cols[1]: - if st.button(":material/add:", disabled=st.session_state.disprompt): - audio_attach_dialogue( - audio_array_state="ag_audio_array", - audio_base64_state="ag_audio_base64", - restore_state=AG_CONVERSATION_STATES - ) - - with bottom_cols[2]: - if chat_input := st.chat_input( - placeholder="Instruction...", - disabled=st.session_state.disprompt, - on_submit=lambda: st.session_state.update(disprompt=True) - ): - st.session_state.new_prompt = chat_input - - with bottom_cols[3]: - uploaded_voice = st.audio_input( - label="voice_instruction", - label_visibility="collapsed", - disabled=st.session_state.disprompt, - on_change=lambda: st.session_state.update( - disprompt=True, - on_record_voice_instruction=True - ), - key='voice_instruction' - ) - - if uploaded_voice and st.session_state.on_record_voice_instruction: - voice_bytes = uploaded_voice.read() - update_voice_instruction_state(voice_bytes) - st.session_state.on_record_voice_instruction = False - - -def _prepare_final_prompt_with_ui(one_time_prompt): - if st.session_state.ag_audio_array.shape[0] == 0: - return LLM_NO_AUDIO_PROMPT_TEMPLATE.format(user_question=one_time_prompt) - - with st.spinner("Searching appropriate querys..."): - response = requests.get( - f"{API_BASE_URL}retrieve_relevant_docs", - params={"user_question": one_time_prompt} - ) - relevant_query_indices = response.json() - - if len(st.session_state.ag_messages) <= 2: - relevant_query_indices.append(0) - - relevant_query_indices = list( - set(relevant_query_indices).difference(st.session_state.ag_visited_query_indices) - ) - - st.session_state.ag_visited_query_indices.extend(relevant_query_indices) - - if not relevant_query_indices: - return LLM_PROMPT_TEMPLATE.format( - user_question=one_time_prompt, - audio_information_prompt="" - ) - - audio_info = [] - with st.status(AUDIO_ANALYSIS_STATUS, expanded=False) as status: - for i, standard_idx in enumerate(relevant_query_indices): - new_label = ( - f"{AUDIO_ANALYSIS_STATUS}: " - f"{STANDARD_QUERIES[standard_idx]['ui_text']} " - f"({i+1}/{len(relevant_query_indices)})" - ) - - status.update(label=new_label, state="running") - error_msg, warnings, response = retrive_response_with_ui( - model_name=MODEL_NAMES["audiollm"]["vllm_name"], - text_input=STANDARD_QUERIES[standard_idx]["query_text"], - array_audio_input=st.session_state.ag_audio_array, - base64_audio_input=st.session_state.ag_audio_base64, - prefix=f"**{STANDARD_QUERIES[standard_idx]['ui_text']}**: ", - stream=True, - show_warning=i==0 - ) - audio_info.append(STANDARD_QUERIES[standard_idx]["response_prefix_text"] + response) - - st.session_state.ag_messages[-1]["process"].append({ - "error": error_msg, - "warnings": warnings, - "content": response - }) - - status.update(label=AUDIO_ANALYSIS_STATUS, state="complete") - - audio_information_prompt = AUDIO_INFO_TEMPLATE.format( - audio_information="\n".join(audio_info) - ) - - return LLM_PROMPT_TEMPLATE.format( - user_question=one_time_prompt, - audio_information_prompt=audio_information_prompt - ) - - -def conversation_section(): - chat_message_container = st.container(height=480) - if st.session_state.ag_audio_array.size: - with chat_message_container.chat_message("user"): - st.audio(st.session_state.ag_audio_array, format="audio/wav", sample_rate=16000) - - for message in st.session_state.ag_messages: - with chat_message_container.chat_message(name=message["role"]): - if message.get("error"): - st.error(message["error"]) - for warning_msg in message.get("warnings", []): - st.warning(warning_msg) - if process := message.get("process", []): - with st.status(AUDIO_ANALYSIS_STATUS, expanded=False, state="complete"): - for proc in process: - if proc.get("error"): - st.error(proc["error"]) - for proc_warning_msg in proc.get("warnings", []): - st.warning(proc_warning_msg) - if proc.get("content"): - st.write(proc["content"]) - if message.get("content"): - st.write(message["content"]) - - with st._bottom: - bottom_input_section() - - if (not st.session_state.new_prompt) and (not st.session_state.new_vi_base64): - return - - one_time_prompt = st.session_state.new_prompt - one_time_vi_array = st.session_state.new_vi_array - one_time_vi_base64 = st.session_state.new_vi_base64 - - st.session_state.update( - new_prompt="", - new_vi_array=np.array([]), - new_vi_base64="", - ) - - with chat_message_container.chat_message("user"): - if one_time_vi_base64: - with st.spinner("Transcribing..."): - error_msg, warnings, one_time_prompt = retrive_response_with_ui( - model_name=MODEL_NAMES["audiollm"]["vllm_name"], - text_input="Write out the dialogue as text.", - array_audio_input=one_time_vi_array, - base64_audio_input=one_time_vi_base64, - stream=False, - normalise_response=True - ) - else: - error_msg, warnings = "", [] - st.write(one_time_prompt) - - st.session_state.ag_messages.append({ - "role": "user", - "error": error_msg, - "warnings": warnings, - "content": one_time_prompt - }) - - with chat_message_container.chat_message("assistant"): - assistant_message = {"role": "assistant", "process": []} - st.session_state.ag_messages.append(assistant_message) - - final_prompt = _prepare_final_prompt_with_ui(one_time_prompt) - - llm_response_prefix = f"**{MODEL_NAMES['llm']['ui_name']}**: " - error_msg, warnings, response = retrive_response_with_ui( - model_name=MODEL_NAMES["llm"]["vllm_name"], - text_input=final_prompt, - array_audio_input=st.session_state.ag_audio_array, - base64_audio_input="", - prefix=llm_response_prefix, - stream=True, - history=st.session_state.ag_model_messages, - show_warning=False - ) - - assistant_message.update({ - "error": error_msg, - "warnings": warnings, - "content": response - }) - - pure_response = response.replace(llm_response_prefix, "") - st.session_state.ag_model_messages.extend([ - {"role": "user", "content": final_prompt}, - {"role": "assistant", "content": pure_response} - ]) - - st.session_state.disprompt=False - st.rerun(scope="app") - - -def agent_page(): - init_state_section() - header_section( - component_name="Chatbot", - description=""" It is implemented by connecting multiple AI models, - offers more flexibility, and supports multi-round conversation.""", - concise_description=""" It is implemented by connecting multiple AI models and - support multi-round conversation.""", - icon="👥" - ) - - with st.sidebar: - sidebar_fragment() - - audio_sample_names = [name for name in AUDIO_SAMPLES_W_INSTRUCT.keys() if "Paral" in name] - - successful_example_section( - audio_sample_names, - audio_array_state="ag_audio_array", - audio_base64_state="ag_audio_base64", - restore_state=AG_CONVERSATION_STATES - ) - conversation_section() \ No newline at end of file diff --git a/src/content/common.py b/src/content/common.py deleted file mode 100644 index 07849058960285c458b53222a532fe092139c87f..0000000000000000000000000000000000000000 --- a/src/content/common.py +++ /dev/null @@ -1,623 +0,0 @@ -import os -import re -import copy -import base64 -import requests -import itertools -from collections import OrderedDict -from typing import List, Optional - -import numpy as np -import streamlit as st - -from src.logger import load_logger -from src.utils import array_to_bytes, bytes_to_array, postprocess_voice_transcription -from src.generation import FIXED_GENERATION_CONFIG, MAX_AUDIO_LENGTH - -API_BASE_URL = os.getenv('API_BASE_URL') - -PLAYGROUND_DIALOGUE_STATES = dict( - pg_audio_base64='', - pg_audio_array=np.array([]), - pg_messages=[] -) - - -VOICE_CHAT_DIALOGUE_STATES = dict( - vc_audio_base64='', - vc_audio_array=np.array([]), - vc_messages=[], - vc_model_messages=[] -) - - -AGENT_DIALOGUE_STATES = dict( - ag_audio_base64='', - ag_audio_array=np.array([]), - ag_visited_query_indices=[], - ag_messages=[], - ag_model_messages=[] -) - - -COMMON_DIALOGUE_STATES = dict( - disprompt=False, - new_prompt="", - new_vi_array=np.array([]), - new_vi_base64="", - on_select=False, - on_upload=False, - on_record=False, - on_select_quick_action=False, - on_record_voice_instruction=False -) - - -DEFAULT_DIALOGUE_STATE_DICTS = [ - PLAYGROUND_DIALOGUE_STATES, - VOICE_CHAT_DIALOGUE_STATES, - AGENT_DIALOGUE_STATES, - COMMON_DIALOGUE_STATES -] - - -MODEL_NAMES = OrderedDict({ - "llm": { - "vllm_name": "MERaLiON-Gemma", - "model_name": "MERaLiON-Gemma", - "ui_name": "MERaLiON-Gemma" - }, - "audiollm": { - "vllm_name": "MERaLiON/MERaLiON-AudioLLM-Whisper-SEA-LION", - "model_name": "MERaLiON-AudioLLM-Whisper-SEA-LION", - "ui_name": "MERaLiON-AudioLLM" - }, - "audiollm-it": { - "vllm_name": "MERaLiON/MERaLiON-AudioLLM-Whisper-SEA-LION-it", - "model_name": "MERaLiON-AudioLLM-Whisper-SEA-LION-it", - "ui_name": "MERaLiON-AudioLLM-Instruction-Tuning" - } -}) - - -AUDIO_SAMPLES_W_INSTRUCT = { - "song_1": { - "apperance": "Instruction Following Demo: Music Question Answering", - "instructions": [ - "Please provide a detailed description of the song in both English and Chinese." - ] - }, - "7_ASR_IMDA_PART3_30_ASR_v2_2269": { - "apperance": "7. Automatic Speech Recognition task: conversation in Singapore accent", - "instructions": [ - "Need this talk written down, please." - ] - }, - "11_ASR_IMDA_PART4_30_ASR_v2_3771": { - "apperance": "11. Automatic Speech Recognition task: conversation with Singlish code-switch", - "instructions": [ - "Write out the dialogue as text." - ] - }, - "12_ASR_IMDA_PART4_30_ASR_v2_103": { - "apperance": "12. Automatic Speech Recognition task: conversation with Singlish code-switch", - "instructions": [ - "Write out the dialogue as text." - ] - }, - "17_ASR_IMDA_PART6_30_ASR_v2_1413": { - "apperance": "17. Automatic Speech Recognition task: conversation in Singapore accent", - "instructions": [ - "Record the spoken word in text form." - ] - }, - "32_SQA_CN_COLLEDGE_ENTRANCE_ENGLISH_TEST_SQA_V2_572": { - "apperance": "32. Spoken Question Answering task: general speech", - "instructions": [ - "What does the man think the woman should do at 4:00." - ] - }, - "33_SQA_IMDA_PART3_30_SQA_V2_2310": { - "apperance": "33. Spoken Question Answering task: conversation in Singapore accent", - "instructions": [ - "Does Speaker2's wife cook for Speaker2 when they are at home." - ] - }, - "34_SQA_IMDA_PART3_30_SQA_V2_3621": { - "apperance": "34. Spoken Question Answering task: conversation in Singapore accent", - "instructions": [ - "Does the phrase \"#gai-gai#\" have a meaning in Chinese or Hokkien language." - ] - }, - "35_SQA_IMDA_PART3_30_SQA_V2_4062": { - "apperance": "35. Spoken Question Answering task: conversation in Singapore accent", - "instructions": [ - "What is the color of the vase mentioned in the dialogue." - ] - }, - "36_DS_IMDA_PART4_30_DS_V2_849": { - "apperance": "36. Spoken Dialogue Summarization task: conversation with Singlish code-switch", - "instructions": [ - "Condense the dialogue into a concise summary highlighting major topics and conclusions." - ] - }, - "39_Paralingual_IEMOCAP_ER_V2_91": { - "apperance": "39. Paralinguistics task: general speech", - "instructions": [ - "Based on the speaker's speech patterns, what do you think they are feeling." - ] - }, - "40_Paralingual_IEMOCAP_ER_V2_567": { - "apperance": "40. Paralinguistics task: general speech", - "instructions": [ - "Based on the speaker's speech patterns, what do you think they are feeling." - ] - }, - "42_Paralingual_IEMOCAP_GR_V2_320": { - "apperance": "42. Paralinguistics task: general speech", - "instructions": [ - "Is it possible for you to identify whether the speaker in this recording is male or female." - ] - }, - "47_Paralingual_IMDA_PART3_30_NR_V2_10479": { - "apperance": "47. Paralinguistics task: conversation in Singapore accent", - "instructions": [ - "Can you guess which ethnic group this person is from based on their accent." - ] - }, - "49_Paralingual_MELD_ER_V2_676": { - "apperance": "49. Paralinguistics task: general speech", - "instructions": [ - "What emotions do you think the speaker is expressing." - ] - }, - "50_Paralingual_MELD_ER_V2_692": { - "apperance": "50. Paralinguistics task: general speech", - "instructions": [ - "Based on the speaker's speech patterns, what do you think they are feeling." - ] - }, - "51_Paralingual_VOXCELEB1_GR_V2_2148": { - "apperance": "51. Paralinguistics task: general speech", - "instructions": [ - "May I know the gender of the speaker." - ] - }, - "53_Paralingual_VOXCELEB1_NR_V2_2286": { - "apperance": "53. Paralinguistics task: general speech", - "instructions": [ - "What's the nationality identity of the speaker." - ] - }, - "55_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_2": { - "apperance": "55. Spoken Question Answering task: general speech", - "instructions": [ - "What impact would the growth of the healthcare sector have on the country's economy in terms of employment and growth." - ] - }, - "56_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_415": { - "apperance": "56. Spoken Question Answering task: general speech", - "instructions": [ - "Based on the statement, can you summarize the speaker's position on the recent controversial issues in Singapore." - ] - }, - "57_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_460": { - "apperance": "57. Spoken Question Answering task: general speech", - "instructions": [ - "How does the author respond to parents' worries about masks in schools." - ] - }, - "1_ASR_IMDA_PART1_ASR_v2_141": { - "apperance": "1. Automatic Speech Recognition task: phonetically balanced reading", - "instructions": [ - "Turn the spoken language into a text format.", - "Please translate the content into Chinese." - ] - }, - "2_ASR_IMDA_PART1_ASR_v2_2258": { - "apperance": "2. Automatic Speech Recognition task: phonetically balanced reading", - "instructions": [ - "Turn the spoken language into a text format.", - "Please translate the content into Chinese." - ] - }, - "3_ASR_IMDA_PART1_ASR_v2_2265": { - "apperance": "3. Automatic Speech Recognition task: phonetically balanced reading", - "instructions": [ - "Turn the spoken language into a text format." - ] - }, - "4_ASR_IMDA_PART2_ASR_v2_999": { - "apperance": "4. Automatic Speech Recognition task: reading in Singapore context", - "instructions": [ - "Translate the spoken words into text format." - ] - }, - "5_ASR_IMDA_PART2_ASR_v2_2241": { - "apperance": "5. Automatic Speech Recognition task: reading in Singapore context", - "instructions": [ - "Translate the spoken words into text format." - ] - }, - "6_ASR_IMDA_PART2_ASR_v2_3409": { - "apperance": "6. Automatic Speech Recognition task: reading in Singapore context", - "instructions": [ - "Translate the spoken words into text format." - ] - }, - "8_ASR_IMDA_PART3_30_ASR_v2_1698": { - "apperance": "8. Automatic Speech Recognition task: conversation in Singapore accent", - "instructions": [ - "Need this talk written down, please." - ] - }, - "9_ASR_IMDA_PART3_30_ASR_v2_2474": { - "apperance": "9. Automatic Speech Recognition task: conversation in Singapore accent", - "instructions": [ - "Need this talk written down, please." - ] - }, - "10_ASR_IMDA_PART4_30_ASR_v2_1527": { - "apperance": "10. Automatic Speech Recognition task: conversation with Singlish code-switch", - "instructions": [ - "Write out the dialogue as text." - ] - }, - "13_ASR_IMDA_PART5_30_ASR_v2_1446": { - "apperance": "13. Automatic Speech Recognition task: conversation in Singapore accent", - "instructions": [ - "Translate this vocal recording into a textual format." - ] - }, - "14_ASR_IMDA_PART5_30_ASR_v2_2281": { - "apperance": "14. Automatic Speech Recognition task: conversation in Singapore accent", - "instructions": [ - "Translate this vocal recording into a textual format." - ] - }, - "15_ASR_IMDA_PART5_30_ASR_v2_4388": { - "apperance": "15. Automatic Speech Recognition task: conversation in Singapore accent", - "instructions": [ - "Translate this vocal recording into a textual format." - ] - }, - "16_ASR_IMDA_PART6_30_ASR_v2_576": { - "apperance": "16. Automatic Speech Recognition task: conversation in Singapore accent", - "instructions": [ - "Record the spoken word in text form." - ] - }, - "18_ASR_IMDA_PART6_30_ASR_v2_2834": { - "apperance": "18. Automatic Speech Recognition task: conversation in Singapore accent", - "instructions": [ - "Record the spoken word in text form." - ] - }, - "19_ASR_AIShell_zh_ASR_v2_5044": { - "apperance": "19. Automatic Speech Recognition task: speech in Chinese ", - "instructions": [ - "Transform the oral presentation into a text document." - ] - }, - "20_ASR_LIBRISPEECH_CLEAN_ASR_V2_833": { - "apperance": "20. Automatic Speech Recognition task: general speech", - "instructions": [ - "Please provide a written transcription of the speech." - ] - }, - "25_ST_COVOST2_ZH-CN_EN_ST_V2_4567": { - "apperance": "25. Speech Translation task: Chinese to English", - "instructions": [ - "Please translate the given speech to English." - ] - }, - "26_ST_COVOST2_EN_ZH-CN_ST_V2_5422": { - "apperance": "26. Speech Translation task: English to Chinese", - "instructions": [ - "Please translate the given speech to Chinese." - ] - }, - "27_ST_COVOST2_EN_ZH-CN_ST_V2_6697": { - "apperance": "27. Speech Translation task: English to Chinese", - "instructions": [ - "Please translate the given speech to Chinese." - ] - }, - "28_SI_ALPACA-GPT4-AUDIO_SI_V2_299": { - "apperance": "28. Speech Instruction task: general speech", - "instructions": [ - "Please follow the instruction in the speech." - ] - }, - "29_SI_ALPACA-GPT4-AUDIO_SI_V2_750": { - "apperance": "29. Speech Instruction task: general speech", - "instructions": [ - "Please follow the instruction in the speech." - ] - }, - "30_SI_ALPACA-GPT4-AUDIO_SI_V2_1454": { - "apperance": "30. Speech Instruction task: general speech", - "instructions": [ - "Please follow the instruction in the speech." - ] - }, - "female_pilot#1": { - "apperance": "Female Pilot Interview: Transcription", - "instructions": [ - "Please transcribe the speech" - ] - }, - "female_pilot#2": { - "apperance": "Female Pilot Interview: Aircraft name", - "instructions": [ - "What does 大力士 mean in the conversation" - ] - }, - "female_pilot#3": { - "apperance": "Female Pilot Interview: Air Force Personnel Count", - "instructions": [ - "How many air force personnel are there?" - ] - }, - "female_pilot#4": { - "apperance": "Female Pilot Interview: Air Force Personnel Name", - "instructions": [ - "Can you tell me the names of the two pilots?" - ] - }, - "female_pilot#5": { - "apperance": "Female Pilot Interview: Conversation Mood", - "instructions": [ - "What is the mood of the conversation?" - ] - } -} - - -def reset_states(*state_dicts): - for states in state_dicts: - st.session_state.update(copy.deepcopy(states)) - st.session_state.update(copy.deepcopy(COMMON_DIALOGUE_STATES)) - - -def process_audio_bytes(audio_bytes): - origin_audio_array = bytes_to_array(audio_bytes) - truncated_audio_array = origin_audio_array[: MAX_AUDIO_LENGTH*16000] - truncated_audio_bytes = array_to_bytes(truncated_audio_array) - audio_base64 = base64.b64encode(truncated_audio_bytes).decode('utf-8') - - return origin_audio_array, audio_base64 - - -def update_voice_instruction_state(voice_bytes): - st.session_state.new_vi_array, st.session_state.new_vi_base64 = \ - process_audio_bytes(voice_bytes) - - -def init_state_section(): - st.set_page_config(page_title='MERaLiON-AudioLLM', page_icon = "🔥", layout='wide') - - st.markdown( - ( - '' - ), - unsafe_allow_html=True - ) - - if "logger" not in st.session_state: - st.session_state.logger = load_logger() - st.session_state.session_id = st.session_state.logger.register_session() - - - for key, value in FIXED_GENERATION_CONFIG.items(): - if key not in st.session_state: - st.session_state[key]=copy.deepcopy(value) - - for states in DEFAULT_DIALOGUE_STATE_DICTS: - for key, value in states.items(): - if key not in st.session_state: - st.session_state[key]=copy.deepcopy(value) - - -def header_section(component_name, description="", concise_description="", icon="🤖"): - st.markdown( - f"

MERaLiON-AudioLLM {component_name} {icon}

", - unsafe_allow_html=True - ) - - st.markdown( - f"""
-

This {component_name.lower()} is based on - MERaLiON-AudioLLM, - developed by I2R, A*STAR, in collaboration with AISG, Singapore. - {description}

""", - unsafe_allow_html=True - ) - - st.markdown( - f"""
-

This {component_name.lower()} is based on - MERaLiON-AudioLLM.{concise_description}

""", - unsafe_allow_html=True - ) - - -@st.fragment -def sidebar_fragment(): - with st.container(height=256, border=False): - st.page_link("pages/playground.py", disabled=st.session_state.disprompt, label="🚀 Playground") - st.page_link("pages/agent.py", disabled=st.session_state.disprompt, label="👥 Cascade System") - st.page_link("pages/voice_chat.py", disabled=st.session_state.disprompt, label="🗣️ End-to-End Voice Chat") - - st.divider() - - st.slider(label='Temperature', min_value=0.0, max_value=2.0, value=0.1, key='temperature') - - st.slider(label='Top P', min_value=0.0, max_value=1.0, value=0.9, key='top_p') - - st.slider(label="Repetition Penalty", min_value=1.0, max_value=1.2, value=1.1, key="repetition_penalty") - - -@st.fragment -def successful_example_section(audio_sample_names, audio_array_state, audio_base64_state, restore_state={}): - st.markdown(":fire: **Successful Tasks and Examples**") - - sample_name = st.selectbox( - label="**Select Audio:**", - label_visibility="collapsed", - options=audio_sample_names, - format_func=lambda o: AUDIO_SAMPLES_W_INSTRUCT[o]["apperance"], - index=None, - placeholder="Select an audio sample:", - on_change=lambda: st.session_state.update( - on_select=True, - disprompt=True, - **copy.deepcopy(restore_state) - ), - key='select') - - if sample_name and st.session_state.on_select: - file_name = sample_name.split("#")[0] - audio_bytes = open(f"audio_samples/{file_name}.wav", "rb").read() - st.session_state.update( - on_select=False, - new_prompt=AUDIO_SAMPLES_W_INSTRUCT[sample_name]["instructions"][0] - ) - st.session_state[audio_array_state], st.session_state[audio_base64_state] = \ - process_audio_bytes(audio_bytes) - st.rerun(scope="app") - - -@st.dialog("Specify audio context for analysis") -def audio_attach_dialogue(audio_array_state, audio_base64_state, restore_state={}): - st.markdown("**Upload**") - - uploaded_file = st.file_uploader( - label="**Upload Audio:**", - label_visibility="collapsed", - type=['wav', 'mp3'], - on_change=lambda: st.session_state.update( - on_upload=True, - **copy.deepcopy(restore_state) - ), - key='upload' - ) - - if uploaded_file and st.session_state.on_upload: - audio_bytes = uploaded_file.read() - st.session_state[audio_array_state], st.session_state[audio_base64_state] = \ - process_audio_bytes(audio_bytes) - st.session_state.on_upload = False - st.rerun() - - st.markdown("**Record**") - - uploaded_file = st.audio_input( - label="**Record Audio:**", - label_visibility="collapsed", - on_change=lambda: st.session_state.update( - on_record=True, - **copy.deepcopy(restore_state) - ), - key='record' - ) - - if uploaded_file and st.session_state.on_record: - audio_bytes = uploaded_file.read() - st.session_state[audio_array_state], st.session_state[audio_base64_state] = \ - process_audio_bytes(audio_bytes) - st.session_state.on_record = False - st.rerun() - - -def retrive_response_with_ui( - model_name: str, - text_input: str, - array_audio_input: np.ndarray, - base64_audio_input: str, - prefix: str = "", - stream: bool = True, - normalise_response: bool = False, - history: Optional[List] = None, - show_warning: bool = True, - **kwargs - ): - - if history is None: - history = [] - - # Prepare request data - request_data = { - "text_input": str(text_input), - "model_name": str(model_name), - "array_audio_input": array_audio_input.tolist(), # Convert numpy array to list - "base64_audio_input": str(base64_audio_input) if base64_audio_input else None, - "history": list(history) if history else None, - "stream": bool(stream), - "max_completion_tokens": int(st.session_state.max_completion_tokens), - "temperature": float(st.session_state.temperature), - "top_p": float(st.session_state.top_p), - "repetition_penalty": float(st.session_state.repetition_penalty), - "top_k": int(st.session_state.top_k), - "length_penalty": float(st.session_state.length_penalty), - "seed": int(st.session_state.seed), - "extra_params": {} - } - - # print(request_data) - # print(model_name) - - error_msg = "" - warnings = [] - response = "" - - try: - if stream: - # Streaming response - response_stream = requests.post(f"{API_BASE_URL}chat", json=request_data, stream=True) - response_stream.raise_for_status() - - response_obj = itertools.chain([prefix], (chunk.decode() for chunk in response_stream)) - response = st.write_stream(response_obj) - else: - # Non-streaming response - api_response = requests.post(f"{API_BASE_URL}chat", json=request_data) - api_response.raise_for_status() - result = api_response.json() - - if "warnings" in result: - warnings = result["warnings"] - - response = result.get("response", "") - if normalise_response: - response = postprocess_voice_transcription(response) - response = prefix + response - st.write(response) - - except requests.exceptions.RequestException as e: - error_msg = re.sub("[a-zA-Z0-9_\-.]+\.com", "", str(e)) - error_msg = f"API request failed: {error_msg}" - st.error(error_msg) - - if show_warning: - for warning_msg in warnings: - st.warning(warning_msg) - - st.session_state.logger.register_query( - session_id=st.session_state.session_id, - base64_audio=base64_audio_input, - text_input=text_input, - history=history, - params=request_data["extra_params"], - response=response, - warnings=warnings, - error_msg=error_msg - ) - - return error_msg, warnings, response \ No newline at end of file diff --git a/src/content/playground.py b/src/content/playground.py deleted file mode 100644 index d996db3605fd956ec28ffa0da3446ab9c02cb5a3..0000000000000000000000000000000000000000 --- a/src/content/playground.py +++ /dev/null @@ -1,229 +0,0 @@ -import numpy as np -import streamlit as st - -from src.content.common import ( - MODEL_NAMES, - AUDIO_SAMPLES_W_INSTRUCT, - PLAYGROUND_DIALOGUE_STATES, - reset_states, - update_voice_instruction_state, - init_state_section, - header_section, - sidebar_fragment, - successful_example_section, - audio_attach_dialogue, - retrive_response_with_ui -) - - -QUICK_ACTIONS = [ - { - "name": "**Summary**", - "instruction": "Please summarise this speech.", - "width": 10, - }, - { - "name": "**Transcript**", - "instruction": "Please transcribe the speech", - "width": 9.5, - } -] - - -PG_CONVERSATION_STATES = dict( - pg_messages=[], -) - - -@st.fragment -def select_model_variants_fradment(): - display_mapper = { - value["vllm_name"]: value["ui_name"] - for key, value in MODEL_NAMES.items() - if "audiollm" in key - } - - st.selectbox( - label=":fire: Explore more MERaLiON-AudioLLM variants!", - options=list(display_mapper.keys()), - index=0, - format_func=lambda o: display_mapper[o], - key="pg_model_name", - placeholder=":fire: Explore more MERaLiON-AudioLLM variants!", - disabled=st.session_state.disprompt, - ) - - -def bottom_input_section(): - select_model_variants_fradment() - - bottom_cols = st.columns([0.03, 0.03, 0.91, 0.03]) - with bottom_cols[0]: - st.button( - ':material/delete:', - disabled=st.session_state.disprompt, - on_click=lambda: reset_states(PLAYGROUND_DIALOGUE_STATES) - ) - - with bottom_cols[1]: - if st.button(":material/add:", disabled=st.session_state.disprompt): - audio_attach_dialogue( - audio_array_state="pg_audio_array", - audio_base64_state="pg_audio_base64", - restore_state=PG_CONVERSATION_STATES - ) - - with bottom_cols[2]: - if chat_input := st.chat_input( - placeholder="Instruction...", - disabled=st.session_state.disprompt, - on_submit=lambda: st.session_state.update( - disprompt=True, - **PG_CONVERSATION_STATES - ) - ): - st.session_state.new_prompt = chat_input - - with bottom_cols[3]: - uploaded_voice = st.audio_input( - label="voice_instruction", - label_visibility="collapsed", - disabled=st.session_state.disprompt, - on_change=lambda: st.session_state.update( - disprompt=True, - on_record_voice_instruction=True, - **PG_CONVERSATION_STATES - ), - key='voice_instruction' - ) - - if uploaded_voice and st.session_state.on_record_voice_instruction: - voice_bytes = uploaded_voice.read() - update_voice_instruction_state(voice_bytes) - st.session_state.on_record_voice_instruction = False - - -@st.fragment -def quick_actions_fragment(): - action_cols_spec = [_["width"] for _ in QUICK_ACTIONS] - action_cols = st.columns(action_cols_spec) - - for idx, action in enumerate(QUICK_ACTIONS): - action_cols[idx].button( - action["name"], - args=(action["instruction"],), - disabled=st.session_state.disprompt, - on_click=lambda p: st.session_state.update( - disprompt=True, - pg_messages=[], - new_prompt=p, - on_select_quick_action=True - ) - ) - - if st.session_state.on_select_quick_action: - st.session_state.on_select_quick_action = False - st.rerun(scope="app") - - -def conversation_section(): - if st.session_state.pg_audio_array.size: - with st.chat_message("user"): - st.audio(st.session_state.pg_audio_array, format="audio/wav", sample_rate=16000) - quick_actions_fragment() - - for message in st.session_state.pg_messages: - with st.chat_message(message["role"]): - if message.get("error"): - st.error(message["error"]) - for warning_msg in message.get("warnings", []): - st.warning(warning_msg) - if message.get("content"): - st.write(message["content"]) - - with st._bottom: - bottom_input_section() - - if (not st.session_state.new_prompt) and (not st.session_state.new_vi_base64): - return - - one_time_prompt = st.session_state.new_prompt - one_time_vi_array = st.session_state.new_vi_array - one_time_vi_base64 = st.session_state.new_vi_base64 - - st.session_state.update( - new_prompt="", - new_vi_array=np.array([]), - new_vi_base64="", - pg_messages=[] - ) - - with st.chat_message("user"): - if one_time_vi_base64: - with st.spinner("Transcribing..."): - error_msg, warnings, one_time_prompt = retrive_response_with_ui( - model_name=MODEL_NAMES["audiollm"]["vllm_name"], - text_input="Write out the dialogue as text.", - array_audio_input=one_time_vi_array, - base64_audio_input=one_time_vi_base64, - stream=False, - normalise_response=True - ) - else: - error_msg, warnings = "", [] - st.write(one_time_prompt) - - st.session_state.pg_messages.append({ - "role": "user", - "error": error_msg, - "warnings": warnings, - "content": one_time_prompt - }) - - with st.chat_message("assistant"): - with st.spinner("Thinking..."): - error_msg, warnings, response = retrive_response_with_ui( - model_name=st.session_state.pg_model_name, - text_input=one_time_prompt, - array_audio_input=st.session_state.pg_audio_array, - base64_audio_input=st.session_state.pg_audio_base64, - stream=True - ) - - st.session_state.pg_messages.append({ - "role": "assistant", - "error": error_msg, - "warnings": warnings, - "content": response - }) - - st.session_state.disprompt=False - st.rerun(scope="app") - - -def playground_page(): - init_state_section() - header_section( - component_name="Playground", - description=""" It is tailored for Singapore’s multilingual and multicultural landscape. - MERaLiON-AudioLLM supports - Automatic Speech Recognition, - Speech Translation, - Spoken Question Answering, - Spoken Dialogue Summarization, - Speech Instruction, and - Paralinguistics tasks.""", - concise_description="" - ) - - with st.sidebar: - sidebar_fragment() - - audio_sample_names = [name for name in AUDIO_SAMPLES_W_INSTRUCT.keys()] - successful_example_section( - audio_sample_names, - audio_array_state="pg_audio_array", - audio_base64_state="pg_audio_base64", - restore_state=PG_CONVERSATION_STATES - ) - conversation_section() \ No newline at end of file diff --git a/src/content/voice_chat.py b/src/content/voice_chat.py deleted file mode 100644 index 3d0237f47b61a90e2140ca75461a9575bafcee11..0000000000000000000000000000000000000000 --- a/src/content/voice_chat.py +++ /dev/null @@ -1,154 +0,0 @@ -import numpy as np -import streamlit as st - -from src.generation import ( - prepare_multimodal_content, - change_multimodal_content -) -from src.content.common import ( - MODEL_NAMES, - VOICE_CHAT_DIALOGUE_STATES, - reset_states, - process_audio_bytes, - init_state_section, - header_section, - sidebar_fragment, - retrive_response_with_ui -) - - -# TODO: change this. -DEFAULT_PROMPT = "Based on the information in this user’s voice, please reply to the user in a friendly and helpful way." -MAX_VC_ROUNDS = 5 - - -def bottom_input_section(): - bottom_cols = st.columns([0.03, 0.97]) - with bottom_cols[0]: - st.button( - ':material/delete:', - disabled=st.session_state.disprompt, - on_click=lambda: reset_states(VOICE_CHAT_DIALOGUE_STATES) - ) - - with bottom_cols[1]: - uploaded_file = st.audio_input( - label="record audio", - label_visibility="collapsed", - disabled=st.session_state.disprompt, - on_change=lambda: st.session_state.update( - on_record=True, - disprompt=True - ), - key='record' - ) - - if uploaded_file and st.session_state.on_record: - audio_bytes = uploaded_file.read() - st.session_state.vc_audio_array, st.session_state.vc_audio_base64 = \ - process_audio_bytes(audio_bytes) - st.session_state.update( - on_record=False, - ) - - -@st.fragment -def system_prompt_fragment(): - with st.expander("System Prompt"): - st.text_area( - label="Insert system instructions or background knowledge here.", - label_visibility="collapsed", - disabled=st.session_state.disprompt, - max_chars=5000, - key="system_prompt", - value=DEFAULT_PROMPT, - ) - - -def conversation_section(): - chat_message_container = st.container(height=480) - for message in st.session_state.vc_messages: - with chat_message_container.chat_message(message["role"]): - if message.get("error"): - st.error(message["error"]) - for warning_msg in message.get("warnings", []): - st.warning(warning_msg) - if message.get("audio", np.array([])).shape[0]: - st.audio(message["audio"], format="audio/wav", sample_rate=16000) - if message.get("content"): - st.write(message["content"]) - - with st._bottom: - bottom_input_section() - - if not st.session_state.vc_audio_base64: - return - - if len(st.session_state.vc_messages) >= MAX_VC_ROUNDS * 2: - st.toast(f":warning: max conversation rounds ({MAX_VC_ROUNDS}) reached!") - return - - one_time_prompt = DEFAULT_PROMPT - one_time_array = st.session_state.vc_audio_array - one_time_base64 = st.session_state.vc_audio_base64 - st.session_state.update( - vc_audio_array=np.array([]), - vc_audio_base64="", - ) - - with chat_message_container.chat_message("user"): - st.audio(one_time_array, format="audio/wav", sample_rate=16000) - - st.session_state.vc_messages.append({"role": "user", "audio": one_time_array}) - - if not st.session_state.vc_model_messages: - one_time_prompt = st.session_state.system_prompt - else: - st.session_state.vc_model_messages[0]["content"] = change_multimodal_content( - st.session_state.vc_model_messages[0]["content"], - text_input=st.session_state.system_prompt - ) - - with chat_message_container.chat_message("assistant"): - with st.spinner("Thinking..."): - error_msg, warnings, response = retrive_response_with_ui( - model_name=MODEL_NAMES["audiollm-it"]["vllm_name"], - text_input=one_time_prompt, - array_audio_input=one_time_array, - base64_audio_input=one_time_base64, - stream=True, - history=st.session_state.vc_model_messages - ) - - st.session_state.vc_messages.append({ - "role": "assistant", - "error": error_msg, - "warnings": warnings, - "content": response - }) - - mm_content = prepare_multimodal_content(one_time_prompt, one_time_base64) - st.session_state.vc_model_messages.extend([ - {"role": "user", "content": mm_content}, - {"role": "assistant", "content": response} - ]) - - st.session_state.disprompt=False - st.rerun(scope="app") - - -def voice_chat_page(): - init_state_section() - header_section( - component_name="Voice Chat", - description=""" Currently support up to 5 rounds of conversations. - Feel free to talk about anything.""", - concise_description=" Currently support up to 5 rounds of conversations.", - icon="🗣️" - ) - - with st.sidebar: - sidebar_fragment() - - system_prompt_fragment() - conversation_section() \ No newline at end of file diff --git a/src/exceptions.py b/src/exceptions.py deleted file mode 100644 index a939af4bd90c5f30609d62b695c305dab1d6c674..0000000000000000000000000000000000000000 --- a/src/exceptions.py +++ /dev/null @@ -1,2 +0,0 @@ -class NoAudioException(Exception): - pass \ No newline at end of file diff --git a/src/generation.py b/src/generation.py deleted file mode 100644 index debef67a3c05081a8cf69142554b8f1b760ef5e4..0000000000000000000000000000000000000000 --- a/src/generation.py +++ /dev/null @@ -1,47 +0,0 @@ -FIXED_GENERATION_CONFIG = dict( - max_completion_tokens=1024, - top_k=50, - length_penalty=1.0, - seed=42 -) - -MAX_AUDIO_LENGTH = 120 - - - -def prepare_multimodal_content(text_input, base64_audio_input): - return [ - { - "type": "text", - "text": f"Text instruction: {text_input}" - }, - { - "type": "audio_url", - "audio_url": { - "url": f"data:audio/ogg;base64,{base64_audio_input}" - }, - }, - ] - - -def change_multimodal_content( - original_content, - text_input="", - base64_audio_input=""): - - # Since python 3.7 dictionary is ordered. - if text_input: - original_content[0] = { - "type": "text", - "text": f"Text instruction: {text_input}" - } - - if base64_audio_input: - original_content[1] = { - "type": "audio_url", - "audio_url": { - "url": f"data:audio/ogg;base64,{base64_audio_input}" - } - } - - return original_content diff --git a/src/logger.py b/src/logger.py deleted file mode 100644 index 426bee2e36d246e79fd1e990c9181893987b9ef7..0000000000000000000000000000000000000000 --- a/src/logger.py +++ /dev/null @@ -1,111 +0,0 @@ -import io -import os -import time -import json -from threading import Thread, Lock - -import streamlit as st -from huggingface_hub import HfApi - -from src.utils import get_current_strftime - - -logger_lock = Lock() - - -def threaded(fn): - def wrapper(*args, **kwargs): - thread = Thread(target=fn, args=args, kwargs=kwargs) - thread.start() - return thread - return wrapper - - -class Logger: - def __init__(self): - self.app_id = get_current_strftime() - self.session_increment = 0 - self.query_increment = 0 - self.sync_interval = 180 - - self.session_data = [] - self.query_data = [] - self.audio_data = [] - - self.sync_data() - - def register_session(self) -> str: - new_session_id = f"{self.app_id}+{self.session_increment}" - with logger_lock: - self.session_data.append({ - "session_id": new_session_id, - "creation_time": get_current_strftime() - }) - - self.session_increment += 1 - return new_session_id - - def register_query(self, - session_id, - base64_audio, - text_input, - response, - **kwargs - ): - new_query_id = self.query_increment - current_time = get_current_strftime() - - with logger_lock: - current_query_data = { - "session_id": session_id, - "query_id": new_query_id, - "creation_time": current_time, - "text": text_input, - "response": response, - } - current_query_data.update(kwargs) - self.query_data.append(current_query_data) - - self.audio_data.append({ - "session_id": session_id, - "query_id": new_query_id, - "creation_time": current_time, - "audio": base64_audio, - }) - self.query_increment += 1 - - - @threaded - def sync_data(self): - api = HfApi() - - while True: - time.sleep(self.sync_interval) - - for data_name in ["session_data", "query_data", "audio_data"]: - with logger_lock: - last_data = getattr(self, data_name, []) - setattr(self, data_name, []) - - if not last_data: - continue - - buffer = io.BytesIO() - for row in last_data: - row_str = json.dumps(row, ensure_ascii=False)+"\n" - buffer.write(row_str.encode("utf-8")) - - api.upload_file( - path_or_fileobj=buffer, - path_in_repo=f"{data_name}/{get_current_strftime()}.json", - repo_id=os.getenv("LOGGING_REPO_NAME"), - repo_type="dataset", - token=os.getenv('HF_TOKEN') - ) - - buffer.close() - - -@st.cache_resource() -def load_logger(): - return Logger() \ No newline at end of file diff --git a/src/retrieval.py b/src/retrieval.py deleted file mode 100644 index b7f3ea39f0c38d3e00a23e3041e90ecef1c1b300..0000000000000000000000000000000000000000 --- a/src/retrieval.py +++ /dev/null @@ -1,38 +0,0 @@ -STANDARD_QUERIES = [ - { - "query_text": "Please transcribe this speech.", - "doc_text": "Listen to a speech and write down exactly what is being said in text form. It's essentially converting spoken words into written words. Provide the exact transcription of the given audio. Record whatever the speaker has said into written text.", - "response_prefix_text": "The transcription of the speech is: ", - "ui_text": "speech trancription" - }, - { - "query_text": "Please describe what happended in this audio", - "doc_text": "Text captions describing the sound events and environments in the audio clips, describing the events and actions happened in the audio.", - "response_prefix_text": "Events in this audio clip: ", - "ui_text": "audio caption" - }, - { - "query_text": "May I know the gender of the speakers", - "doc_text": "Identify the gender, male or female, based on pitch, formants, harmonics, and prosody features, and other speech pattern differences between genders.", - "response_prefix_text": "By analyzing pitch, formants, harmonics, and prosody features, which reflect physiological and speech pattern differences between genders: ", - "ui_text": "gender recognition" - }, - { - "query_text": "May I know the nationality of the speakers", - "doc_text": "Discover speakers' nationality, country, or the place he is coming from, from his/her accent, pronunciation patterns, and other language-specific speech features influenced by cultural and linguistic backgrounds.", - "response_prefix_text": "By analyzing accent, pronunciation patterns, intonation, rhythm, phoneme usage, and language-specific speech features influenced by cultural and linguistic backgrounds: ", - "ui_text": "natinoality recognition" - }, - { - "query_text": "Can you guess which ethnic group this person is from based on their accent.", - "doc_text": "Discover speakers' ethnic group, home country, or the place he is coming from, from his/her accent, tone, and other vocal characteristics influenced by cultural, regional, and linguistic factors.", - "response_prefix_text": "By analyzing speech features like accent, tone, intonation, phoneme variations, and vocal characteristics influenced by cultural, regional, and linguistic factors: ", - "ui_text": "ethnic group recognition" - }, - { - "query_text": "What do you think the speakers are feeling.", - "doc_text": "What do you think the speakers are feeling. Please identify speakers' emotions by analyzing vocal features like pitch, tone, volume, speech rate, rhythm, and spectral energy, which reflect emotional states such as happiness, anger, sadness, or fear.", - "response_prefix_text": "By analyzing vocal features like pitch, tone, volume, speech rate, rhythm, and spectral energy: ", - "ui_text": "emotion recognition" - }, -] diff --git a/src/utils.py b/src/utils.py deleted file mode 100644 index efd7011f2abc1ada13463d7dec3945f2f991ed76..0000000000000000000000000000000000000000 --- a/src/utils.py +++ /dev/null @@ -1,31 +0,0 @@ -import io -import re -from datetime import datetime -from scipy.io.wavfile import write - -import librosa - - -def get_current_strftime(): - return datetime.now().strftime(r'%d-%m-%y-%H-%M-%S') - - -def bytes_to_array(audio_bytes): - audio_array, _ = librosa.load( - io.BytesIO(audio_bytes), - sr=16000 - ) - return audio_array - - -def array_to_bytes(audio_array): - bytes_wav = bytes() - byte_io = io.BytesIO(bytes_wav) - write(byte_io, 16000, audio_array) - return byte_io.read() - - -def postprocess_voice_transcription(text): - text = re.sub("<.*>:?|\(.*\)|\[.*\]", "", text) - text = re.sub("\s+", " ", text).strip() - return text \ No newline at end of file diff --git a/style.css b/style.css new file mode 100644 index 0000000000000000000000000000000000000000..b76ad33e50a78d60e7487a2d9db5884e3013831e --- /dev/null +++ b/style.css @@ -0,0 +1,17 @@ +body { + padding: 2rem; + font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif; +} + +h1 { + font-size: 16px; + margin-top: 0; +} + +p { + color: rgb(107, 114, 128); + font-size: 15px; + margin-bottom: 10px; + margin-top: 5px; +} + diff --git a/style/app_style.css b/style/app_style.css deleted file mode 100644 index 405194228b62cf9c87586085fd4a174ddf37e20c..0000000000000000000000000000000000000000 --- a/style/app_style.css +++ /dev/null @@ -1,150 +0,0 @@ -div[data-testid="stMainBlockContainer"] { - padding-top: 2rem; - padding-bottom: 1rem; -} - -div[data-testid="stMainBlockContainer"]:has( div[height="480"][data-testid="stVerticalBlockBorderWrapper"]) { - height: calc(100% - 90px); -} - -div[data-testid="stMainBlockContainer"]>div[data-testid="stVerticalBlockBorderWrapper"] { - height: 100%; -} - -div[data-testid="stMainBlockContainer"]>div[data-testid="stVerticalBlockBorderWrapper"]>div { - height: 100%; -} - -div[data-testid="stMainBlockContainer"]>div[data-testid="stVerticalBlockBorderWrapper"]>div>div { - height: 100%; -} - -div[data-testid="stMainBlockContainer"] div[data-testid="stAudioInput"]>div { - max-height: 3rem; -} - -div[data-testid="stMainBlockContainer"] h1 { - padding-top: 0.25rem; -} - -div[class="sidebar-intro"] p { - margin-bottom: 0.75rem; -} - -[class='stAudio'] { - max-width: 500px !important; - margin: auto !important; -} - -div[data-testid="stChatMessage"]:has(> div[data-testid="stChatMessageAvatarUser"]) { - flex-direction: row-reverse; - text-align: right; -} - -div[height="480"][data-testid="stVerticalBlockBorderWrapper"] { - height: 100%; - min-height: 20px; -} - -/* audio quick actions */ - -div[data-testid="stChatMessage"] div[data-testid="stVerticalBlock"]:has( audio[data-testid="stAudio"]) { - gap: 2px; -} - -div[data-testid="stChatMessage"] div[data-testid="stHorizontalBlock"]:has(> div[data-testid="stColumn"]) { - flex-direction: row-reverse; - gap: 4px; -} - -div[data-testid="stChatMessage"] div[data-testid="stHorizontalBlock"]>div[data-testid="stColumn"]:has( div[data-testid="stButton"]) { - width: 6rem; - min-width: 6rem; - flex: 0 0 6rem; -} - -/* File uploader */ - -section[data-testid='stFileUploaderDropzone'] { - padding:6px 2rem; -} - -section[data-testid='stFileUploaderDropzone']>button { - display:none; -} - -div[data-testid="stFileUploaderDropzoneInstructions"]>div>span { - display:none; -} - -div[data-testid="stBottomBlockContainer"] { - padding-bottom: 2rem; -} - -/* Chat input component at the bottom */ - -div[data-testid="stBottomBlockContainer"] div[data-testid="stHorizontalBlock"]:has(> div[data-testid="stColumn"]) { - gap: 4px; -} - -div[data-testid="stBottomBlockContainer"] div[data-testid="stColumn"]:has( div[data-testid="stButton"]):first-of-type { - width: 42px; - min-width: 42px; - flex: 0 0 42px; -} - -div[data-testid="stBottomBlockContainer"] div[data-testid="stColumn"]:has( div[data-testid="stButton"]):nth-of-type(2) { - width: 42px; - min-width: 42px; - flex: 0 0 42px; -} - -div[data-testid="stBottomBlockContainer"] div[data-testid="stColumn"]:has( div[data-testid="stChatInput"]) { - width: 10rem; - min-width: 10rem; - flex: 1 1 10rem; -} - -div[data-testid="stBottomBlockContainer"] div[data-testid="stColumn"]:has( div[data-testid="stAudioInput"]) { - width: 10rem; - min-width: 10rem; - flex: 1 1 10rem; -} - -div[data-testid="stBottomBlockContainer"] div[data-testid="stAudioInput"]>div { - max-height: 40px; -} - -/* Mic Button */ - -div[data-testid="stBottomBlockContainer"]:has( div[data-testid="stChatInput"]) div[data-testid="stAudioInput"]>div { - display: block; - padding: 0; - margin: auto; -} - -div[data-testid="stBottomBlockContainer"]:has( div[data-testid="stChatInput"]) div[data-testid="stAudioInput"]>div>div:last-of-type { - display:none; -} - -div[data-testid="stBottomBlockContainer"]:has( div[data-testid="stChatInput"]) div[data-testid="stAudioInput"]>div>div:nth-of-type(2) { - margin:auto; -} - -div[data-testid="stBottomBlockContainer"]:has( div[data-testid="stChatInput"]) div[data-testid="stAudioInput"]>div>div:nth-of-type(2)>span:last-of-type { - display:none; -} - -div[data-testid="stBottomBlockContainer"]:has( div[data-testid="stChatInput"]) div[data-testid="stAudioInput"]>div>div:nth-of-type(2)>span:only-of-type { - display:block; -} - -div[data-testid="stBottomBlockContainer"]:has( div[data-testid="stChatInput"]) div[data-testid="stAudioInput"]>div>span { - display:none; -} - -div[data-testid="stBottomBlockContainer"]:has( div[data-testid="stChatInput"]) div[data-testid="stColumn"]:has( div[data-testid="stAudioInput"]) { - width: 24px; - min-width: 24px; - flex: 0 0 24px; -} \ No newline at end of file diff --git a/style/normal_window.css b/style/normal_window.css deleted file mode 100644 index f66ffb8ca6da843c656cdb7ec5f6b4fac2283552..0000000000000000000000000000000000000000 --- a/style/normal_window.css +++ /dev/null @@ -1,18 +0,0 @@ -@media(min-width: 800px) { - div[data-testid="stMainBlockContainer"] { - padding-left: 5rem; - padding-right: 5rem; - } - - div[data-testid="stBottomBlockContainer"] { - padding-left: 5rem; - padding-right: 5rem; - } -} - - -@media(min-width: 800px) and (min-height: 800px) { - div[class="main-intro-small-window"] { - display: none; - } -} \ No newline at end of file diff --git a/style/small_window.css b/style/small_window.css deleted file mode 100644 index 63875f8d6a6ef7872462517ddc5cdee3859ec342..0000000000000000000000000000000000000000 --- a/style/small_window.css +++ /dev/null @@ -1,25 +0,0 @@ -@media(max-width: 800px) { - div[data-testid="stMainBlockContainer"] { - padding-left: 1rem; - padding-right: 1rem; - } - - div[data-testid="stBottomBlockContainer"] { - padding-left: 1rem; - padding-right: 1rem; - } - - div[data-testid="stSidebarCollapsedControl"] button[data-testid="stBaseButton-headerNoPadding"]::after { - content: "More Use Cases" - } -} - -@media(max-width: 800px) or (max-height: 800px) { - div[data-testid="stMainBlockContainer"] div[data-testid="stVerticalBlock"]>div[data-testid="stElementContainer"]:has( div[data-testid="stHeadingWithActionElements"]) { - display: none; - } - - div[class="main-intro-normal-window"] { - display: none; - } -} \ No newline at end of file