Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -36,25 +36,30 @@ def transcribe_base(audio, language):
|
|
| 36 |
speaker_class_string = f'Speaker found in database, ID {speaker}'
|
| 37 |
return transcription#, speaker_class_string
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
def transcribe_mic(audio_microphone, language):
|
| 40 |
print("Transcription microphone")
|
| 41 |
transcription = transcribe_base(audio_microphone, language)
|
|
|
|
| 42 |
if language=="it":
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
"uno studente", "uno psicologo", "uno zio",
|
| 46 |
-
"di autore", "a uomo", "su imperatore", "con amico", "per artista"
|
| 47 |
-
}
|
| 48 |
-
transcription = re.sub(r"\b(un|l|d|s|t|m|c|n|quest|all|dall|nell|sull|coll|pell)\s+(?=[aeiouhàèìòùáéíóú])", r"\1'", transcription)
|
| 49 |
-
transcription = re.sub(r"\b(s|t|m|c|n)\s+(?=è\b|ha\b|hanno\b)", r"\1'", transcription)
|
| 50 |
-
transcription = re.sub(r"\bpo\b", "po'", transcription)
|
| 51 |
-
transcription = re.sub(r"\b(senz) ([aeiou])", r"\1'\2", transcription)
|
| 52 |
-
pattern_numbers = r"\b(trenta|quaranta|cinquanta|sessanta|settanta|ottanta|novanta)\s+(?=anni|ore)\b"
|
| 53 |
-
replacement_numbers = lambda m: m.group(1)[:-1] + "’" + m.group(0).split()[1]
|
| 54 |
-
transcription = re.sub(pattern_numbers, replacement_numbers, transcription)
|
| 55 |
-
for phrase in no_elision_cases:
|
| 56 |
-
fixed = phrase.replace(" ", "'")
|
| 57 |
-
transcription = transcription.replace(fixed, phrase)
|
| 58 |
return transcription
|
| 59 |
#return transcribe_base(audio_microphone, language)
|
| 60 |
|
|
|
|
| 36 |
speaker_class_string = f'Speaker found in database, ID {speaker}'
|
| 37 |
return transcription#, speaker_class_string
|
| 38 |
|
| 39 |
+
def fix_italian_transcription(transcription):
|
| 40 |
+
no_elision_cases = {
|
| 41 |
+
"un autore", "un artista", "un uomo", "un amico", "un imperatore",
|
| 42 |
+
"uno studente", "uno psicologo", "uno zio",
|
| 43 |
+
"di autore", "a uomo", "su imperatore", "con amico", "per artista"
|
| 44 |
+
}
|
| 45 |
+
transcription = re.sub(r"\b(un|l|d|s|t|m|c|n|quest|all|dall|nell|sull|coll|pell)\s+(?=[aeiouhàèìòùáéíóú])", r"\1'", transcription)
|
| 46 |
+
transcription = re.sub(r"\b(s|t|m|c|n)\s+(?=è\b|ha\b|hanno\b)", r"\1'", transcription)
|
| 47 |
+
transcription = re.sub(r"\bpo\b", "po'", transcription)
|
| 48 |
+
transcription = re.sub(r"\b(senz) ([aeiou])", r"\1'\2", transcription)
|
| 49 |
+
pattern_numbers = r"\b(trenta|quaranta|cinquanta|sessanta|settanta|ottanta|novanta)\s+(?=anni|ore)\b"
|
| 50 |
+
replacement_numbers = lambda m: m.group(1)[:-1] + "’" + m.group(0).split()[1]
|
| 51 |
+
transcription = re.sub(pattern_numbers, replacement_numbers, transcription)
|
| 52 |
+
for phrase in no_elision_cases:
|
| 53 |
+
fixed = phrase.replace(" ", "'")
|
| 54 |
+
transcription = transcription.replace(fixed, phrase)
|
| 55 |
+
|
| 56 |
def transcribe_mic(audio_microphone, language):
|
| 57 |
print("Transcription microphone")
|
| 58 |
transcription = transcribe_base(audio_microphone, language)
|
| 59 |
+
|
| 60 |
if language=="it":
|
| 61 |
+
transcription = fix_italian_transcription(transcription)
|
| 62 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
return transcription
|
| 64 |
#return transcribe_base(audio_microphone, language)
|
| 65 |
|