import gradio as gr from transformers import AutoProcessor, AutoModel import torch import torchaudio import os HF_TOKEN = os.getenv("hear2") processor = AutoProcessor.from_pretrained("google/hear-pytorch", use_auth_token=HF_TOKEN) model = AutoModel.from_pretrained("google/hear-pytorch", use_auth_token=HF_TOKEN) def predict(audio): waveform, sample_rate = torchaudio.load(audio) inputs = processor(waveform, sampling_rate=sample_rate, return_tensors="pt") with torch.no_grad(): embeddings = model(**inputs).last_hidden_state return f"Embedding shape: {embeddings.shape}" iface = gr.Interface( fn=predict, inputs=gr.Audio(type="filepath"), outputs="text", title="HeAR PyTorch Demo", description="Upload an audio file to generate health acoustic embeddings." ) if __name__ == "__main__": iface.launch()