from flask import Flask, request, jsonify from transformers import TFAutoModel, AutoTokenizer app = Flask(__name__) # Load PhoBERT (TensorFlow version) tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base") model = TFAutoModel.from_pretrained("vinai/phobert-base") @app.route('/embed', methods=['POST']) def embed(): data = request.get_json() text = data.get('text', '') if not text: return jsonify({"error": "No text provided"}), 400 inputs = tokenizer(text, return_tensors="tf") # Chuyển sang TensorFlow tensor outputs = model(**inputs) # Lấy embedding từ hidden state đầu tiên embedding = outputs.last_hidden_state[:, 0, :].numpy().tolist() # Dùng .numpy() để chuyển từ TensorFlow tensor sang list return jsonify({"embeddings": embedding}) @app.route('/', methods=['GET']) def index(): return "PhoBERT Space is running!" if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)