Spaces:

KettaP
/

asr_test

Sleeping

App Files Files Community

asr_test / app.py

KettaP

Update app.py

51558b1 verified 20 days ago

raw

history blame contribute delete

5.65 kB

	import gradio as gr
	import os
	import requests
	from transformers import pipeline

	# Set your FastAPI backend endpoint
	BACKEND_URL = "https://35d2-41-84-202-90.ngrok-free.app/submit-feedback"

	# Map of models
	model_map = {
	"english": "jonatasgrosman/wav2vec2-large-xlsr-53-english"
	}

	# Create storage directory
	os.makedirs("responses", exist_ok=True)

	# Transcription function
	def transcribe(audio, language):
	asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0)
	text = asr(audio)["text"]
	return text, audio

	# Save feedback by sending it to FastAPI backend
	def save_feedback(audio_file, transcription, age_group, gender, evaluated_language, speak_level, write_level,
	native, native_language, env, device, domain, accuracy, orthography, meaning, errors,
	performance, improvement, usability, technical_issues, final_comments, email):
	try:
	# Read binary content of audio file
	with open(audio_file, "rb") as f:
	audio_content = f.read()

	# Prepare metadata as form fields
	metadata = {
	"transcription": transcription,
	"age_group": age_group,
	"gender": gender,
	"evaluated_language": evaluated_language,
	"speak_level": speak_level,
	"write_level": write_level,
	"native": native,
	"native_language": native_language,
	"environment": env,
	"device": device,
	"domain": domain,
	"accuracy": accuracy,
	"orthography": orthography,
	"meaning": meaning,
	"errors": ",".join(errors) if errors else "",
	"performance": performance,
	"improvement": improvement,
	"usability": usability,
	"technical_issues": technical_issues,
	"final_comments": final_comments,
	"email": email
	}

	files = {
	"audio_file": ("audio.wav", audio_content, "audio/wav")
	}

	response = requests.post(BACKEND_URL, data=metadata, files=files, timeout=20)

	if response.status_code == 201:
	return "✅ Feedback submitted successfully. Thank you!"
	else:
	return f"⚠️ Submission failed: {response.status_code} — {response.text}"

	except Exception as e:
	return f"❌ Could not connect to the backend: {str(e)}"


	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("## African ASR + Feedback")

	with gr.Row():
	audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or record audio")
	lang = gr.Dropdown(list(model_map.keys()), label="Select Language")

	transcribed_text = gr.Textbox(label="Transcribed Text")
	submit_btn = gr.Button("Transcribe")
	submit_btn.click(fn=transcribe, inputs=[audio_input, lang], outputs=[transcribed_text, audio_input])

	gr.Markdown("---\n## Feedback Form")

	age_group = gr.Dropdown(["18 to 30", "31 to 50", "50+", "Prefer not to say"], label="Age Group")
	gender = gr.Dropdown(["Male", "Female", "Prefer not to say", "Other"], label="Gender")
	evaluated_language = gr.Dropdown(list(model_map.keys()), label="Which language did you evaluate for?")
	speak_level = gr.Slider(1, 10, label="How well do you speak this language?")
	write_level = gr.Slider(1, 10, label="How well do you write the language?")
	native = gr.Radio(["Yes", "No"], label="Are you a native speaker of this language?")
	native_language = gr.Textbox(label="If not, what is your native language?")
	env = gr.Dropdown(["Studio/Professional Recording", "Quiet Room", "Noisy Background", "Multiple Environments", "Unsure", "Other"], label="Recording environment")
	device = gr.Dropdown(["Mobile Phone/Tablet", "Tablet", "Laptop/Computer Microphone", "Dedicated Microphone", "Unsure", "Other"], label="Recording device")
	domain = gr.Textbox(label="Was the speech related to a specific domain or topic? (Optional)")
	accuracy = gr.Slider(1, 10, label="How accurate was the model’s transcription?")
	orthography = gr.Dropdown(["Yes, mostly correct", "No, major issues", "Partially", "Not Applicable"], label="Did the transcription use standard orthography?")
	meaning = gr.Slider(1, 10, label="Did the transcription preserve the original meaning?")
	errors = gr.CheckboxGroup([
	"Substitutions", "Omissions", "Insertions", "Pronunciation-related", "Diacritic Errors",
	"Code-switching Errors", "Named Entity Errors", "Punctuation Errors", "No significant errors"
	], label="Which errors were prominent?")
	performance = gr.Textbox(label="What did the model do well? What did it struggle with?")
	improvement = gr.Textbox(label="How could this ASR model be improved?")
	usability = gr.Slider(1, 5, label="How easy was it to use the tool?")
	technical_issues = gr.Textbox(label="Did you encounter any technical issues?")
	final_comments = gr.Textbox(label="Any other comments or suggestions?")
	email = gr.Textbox(label="Email (optional)")

	save_btn = gr.Button("Submit Feedback")
	output_msg = gr.Textbox(interactive=False)
	save_btn.click(fn=save_feedback,
	inputs=[audio_input, transcribed_text, age_group, gender, evaluated_language, speak_level, write_level,
	native, native_language, env, device, domain, accuracy, orthography, meaning, errors,
	performance, improvement, usability, technical_issues, final_comments, email],
	outputs=[output_msg])

	# Launch the interface
	demo.launch()