VietCat commited on
Commit
b269ebb
·
1 Parent(s): 8680a64
Files changed (3) hide show
  1. .gitignore +2 -0
  2. Dockerfile +16 -0
  3. app.py +32 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # files
2
+ *.DS_Store
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Cài đặt các thư viện cần thiết
4
+ RUN pip install --no-cache-dir torch transformers flask
5
+
6
+ # Tạo thư mục làm việc
7
+ WORKDIR /app
8
+
9
+ # Copy mã nguồn vào container
10
+ COPY . /app
11
+
12
+ # Expose cổng 7860 (cổng mặc định HF Space dùng)
13
+ EXPOSE 7860
14
+
15
+ # Chạy app
16
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from transformers import AutoModel, AutoTokenizer
3
+ import torch
4
+
5
+ app = Flask(__name__)
6
+
7
+ # Load PhoBERT
8
+ tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
9
+ model = AutoModel.from_pretrained("vinai/phobert-base")
10
+
11
+ @app.route('/embed', methods=['POST'])
12
+ def embed():
13
+ data = request.get_json()
14
+ text = data.get('text', '')
15
+ if not text:
16
+ return jsonify({"error": "No text provided"}), 400
17
+
18
+ inputs = tokenizer(text, return_tensors="pt")
19
+ with torch.no_grad():
20
+ outputs = model(**inputs)
21
+
22
+ # Lấy embedding từ hidden state đầu tiên
23
+ embedding = outputs.last_hidden_state[:, 0, :].squeeze().tolist()
24
+
25
+ return jsonify({"embedding": embedding})
26
+
27
+ @app.route('/', methods=['GET'])
28
+ def index():
29
+ return "PhoBERT Space is running!"
30
+
31
+ if __name__ == "__main__":
32
+ app.run(host="0.0.0.0", port=7860)