Spaces:
Paused
Paused
File size: 6,086 Bytes
1f7af69 a67942c 51499e8 6a84f9b a67942c bcf2709 92bc38d a67942c 7b66ddd 3eb8d72 92bc38d 3eb8d72 7b66ddd 3eb8d72 51499e8 a67942c 5b5f276 a67942c 51499e8 a67942c 3eb8d72 0bd56e6 3eb8d72 0bd56e6 3eb8d72 a67942c 28b7956 148b8e4 a67942c 92bc38d 27924ec 92bc38d 51499e8 009429d a67942c 7b66ddd 51499e8 a67942c 0bd56e6 a67942c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
import os
os.system("pip install git+https://github.com/openai/whisper.git")
import gradio as gr
import whisper
from share_btn import community_icon_html, loading_icon_html, share_js
model = whisper.load_model("medium")
languages = {'auto': None} | {long_name: short_name for short_name, long_name in whisper.tokenizer.LANGUAGES.items()}
def inference(audio, language_long_name):
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
#_, probs = model.detect_language(mel)
options = whisper.DecodingOptions(fp16 = False, language=languages[language_long_name])
result = whisper.decode(model, mel, options)
print(result.text)
return result.text, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
css = """
.gradio-container {
font-family: 'IBM Plex Sans', sans-serif;
}
.gr-button {
color: white;
border-color: black;
background: black;
}
input[type='range'] {
accent-color: black;
}
.dark input[type='range'] {
accent-color: #dfdfdf;
}
.container {
max-width: 730px;
margin: auto;
padding-top: 1.5rem;
}
.details:hover {
text-decoration: underline;
}
.gr-button {
white-space: nowrap;
}
.gr-button:focus {
border-color: rgb(147 197 253 / var(--tw-border-opacity));
outline: none;
box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
--tw-border-opacity: 1;
--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
--tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
--tw-ring-opacity: .5;
}
.footer {
margin-bottom: 45px;
margin-top: 35px;
text-align: center;
border-bottom: 1px solid #e5e5e5;
}
.footer>p {
font-size: .8rem;
display: inline-block;
padding: 0 10px;
transform: translateY(10px);
background: white;
}
.dark .footer {
border-color: #303030;
}
.dark .footer>p {
background: #0b0f19;
}
.prompt h4{
margin: 1.25em 0 .25em 0;
font-weight: bold;
font-size: 115%;
}
.animate-spin {
animation: spin 1s linear infinite;
}
@keyframes spin {
from {
transform: rotate(0deg);
}
to {
transform: rotate(360deg);
}
}
#share-btn-container {
display: flex; margin-top: 1.5rem !important; padding-left: 0.5rem !important; padding-right: 0.5rem !important; background-color: #000000; justify-content: center; align-items: center; border-radius: 9999px !important; width: 13rem;
}
#share-btn {
all: initial; color: #ffffff;font-weight: 600; cursor:pointer; font-family: 'IBM Plex Sans', sans-serif; margin-left: 0.5rem !important; padding-top: 0.25rem !important; padding-bottom: 0.25rem !important;
}
#share-btn * {
all: unset;
}
"""
block = gr.Blocks(css=css)
with block:
gr.HTML(
"""
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
"
<h1 style="font-weight: 900; margin-bottom: 7px;">
🤫 Whisper demo
</h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%">
Whisper is een generiek spraakherkenning-model van OpenAI. Het is getraind op een grote, diverse dataset van audio. Het kan audio transcriberen, talen herkennen en vertalen. Deze demo kapt audio af na ongeveer 30 seconden. </p>
</div>
"""
)
with gr.Group():
with gr.Box():
with gr.Row().style(mobile_collapse=False, equal_height=True):
audio = gr.Audio(
label="Input Audio",
show_label=False,
source="microphone",
type="filepath"
)
language_long_name = gr.Dropdown(
list(languages.keys()), value="auto", label="Taal van gesproken tekst", info="Taal van de gesproken tekst. Kies auto voor automatische detectie."
)
btn = gr.Button("Transcribeer")
text = gr.Textbox(show_label=False, elem_id="result-textarea")
with gr.Group(elem_id="share-btn-container"):
community_icon = gr.HTML(community_icon_html, visible=False)
loading_icon = gr.HTML(loading_icon_html, visible=False)
share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)
btn.click(inference, inputs=[audio, language_long_name], outputs=[text, community_icon, loading_icon, share_button])
share_button.click(None, [], [], _js=share_js)
gr.HTML('''
<div class="footer">
<p>Model door <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a> - Demo gebaseerd op de <a href="https://huggingface.co/spaces/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI Whisper Demo</a> van 🤗 Hugging Face
</p>
</div>
''')
block.launch() |