import gradio as gr from transformers import AutoModel, AutoTokenizer import torch # Load model with caching tokenizer = AutoTokenizer.from_pretrained( "Qwen/Qwen3-Embedding-8B", trust_remote_code=True ) model = AutoModel.from_pretrained( "Qwen/Qwen3-Embedding-8B", trust_remote_code=True, device_map="auto" ).eval() def get_embedding(text): inputs = tokenizer(text, return_tensors="pt", truncation=True).to(model.device) with torch.no_grad(): outputs = model(**inputs) embedding = outputs.last_hidden_state.mean(dim=1).squeeze().tolist() return {"text": text, "embedding_size": len(embedding)} demo = gr.Interface( fn=get_embedding, inputs=gr.Textbox(label="Input text"), outputs=gr.JSON(), title="Qwen3 Embeddings" ) demo.launch()