BackEnd / app.py
HaRin2806
fix embedding data
89397a4
from flask import Flask, jsonify, request
from flask_cors import CORS
import logging
import os
import threading
from dotenv import load_dotenv
from flask_jwt_extended import JWTManager
import datetime
# Cấu hình logging cho HuggingFace
if os.getenv("SPACE_ID"):
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler()]
)
else:
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Tải biến môi trường
load_dotenv()
# Khởi tạo Flask app
app = Flask(__name__)
# Cấu hình JWT
app.config['JWT_SECRET_KEY'] = os.getenv("JWT_SECRET_KEY", "hathimylinh")
app.config['JWT_ACCESS_TOKEN_EXPIRES'] = datetime.timedelta(hours=24)
app.config['JWT_TOKEN_LOCATION'] = ['headers', 'cookies']
app.config['JWT_COOKIE_SECURE'] = False
app.config['JWT_COOKIE_CSRF_PROTECT'] = False
# Cấu hình upload files cho admin
app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50MB max file size
jwt = JWTManager(app)
# Cho phép CORS
CORS(app, resources={
r"/api/*": {
"origins": "*", # Cho phép tất cả origins trên HF
"supports_credentials": True
}
})
def setup_data_background():
"""Setup data in background thread"""
try:
logger.info("Starting background data setup...")
from startup import setup_data
setup_data()
logger.info("Background data setup completed")
except Exception as e:
logger.error(f"Background setup failed: {e}")
import traceback
logger.error(traceback.format_exc())
# Basic routes first
@app.route('/', methods=['GET'])
def root():
"""Root endpoint"""
return jsonify({
"message": "Nutribot API is running",
"status": "healthy",
"endpoints": {
"health": "/api/health",
"data_status": "/api/data-status",
"embed_data": "/api/embed-data"
}
})
@app.route('/api/health', methods=['GET'])
def health_check():
"""API endpoint để kiểm tra trạng thái của server"""
import time
try:
from core.embedding_model import get_embedding_model
embedding_model = get_embedding_model()
collection_count = embedding_model.count()
except Exception as e:
logger.error(f"Error getting embedding count: {e}")
collection_count = 0
return jsonify({
"status": "healthy",
"message": "Server đang hoạt động",
"time": time.strftime('%Y-%m-%d %H:%M:%S'),
"data_items": collection_count,
"environment": "huggingface" if os.getenv("SPACE_ID") else "local"
})
@app.route('/api/data-status', methods=['GET'])
def data_status():
"""API endpoint để kiểm tra trạng thái dữ liệu"""
try:
from core.embedding_model import get_embedding_model
from config import CHROMA_PERSIST_DIRECTORY
embedding_model = get_embedding_model()
count = embedding_model.count()
# Kiểm tra data directory
data_dir_exists = os.path.exists("data")
data_files_count = 0
if data_dir_exists:
for root, dirs, files in os.walk("data"):
data_files_count += len(files)
return jsonify({
"success": True,
"embeddings_count": count,
"data_ready": count > 0,
"chroma_dir": CHROMA_PERSIST_DIRECTORY,
"data_dir_exists": data_dir_exists,
"data_files_count": data_files_count,
"is_huggingface": bool(os.getenv("SPACE_ID"))
})
except Exception as e:
logger.error(f"Error checking data status: {e}")
return jsonify({
"success": False,
"error": str(e)
}), 500
@app.route('/api/embed-data', methods=['POST'])
def manual_embed_data():
"""API endpoint để chạy embedding data thủ công"""
try:
force = False
if request.is_json and request.json:
force = request.json.get('force', False)
data_dir = "data"
if not os.path.exists(data_dir):
return jsonify({
"success": False,
"error": "Thư mục data không tồn tại"
}), 400
# Chạy embedding trong thread riêng để không block request
def run_embedding():
try:
from scripts.embed_data import embed_all_data
success = embed_all_data(data_dir, force=force)
if success:
logger.info("Manual embedding completed successfully")
else:
logger.error("Manual embedding failed")
except Exception as e:
logger.error(f"Manual embedding failed: {e}")
threading.Thread(target=run_embedding, daemon=True).start()
return jsonify({
"success": True,
"message": "Đã bắt đầu quá trình embedding data trong background"
})
except Exception as e:
logger.error(f"Lỗi chạy embedding: {str(e)}")
return jsonify({
"success": False,
"error": str(e)
}), 500
# Import và đăng ký các blueprint sau khi định nghĩa basic routes
try:
from api.auth import auth_routes
from api.chat import chat_routes
from api.data import data_routes
from api.history import history_routes
from api.feedback import feedback_routes
from api.admin import admin_routes
# Đăng ký các blueprint
app.register_blueprint(auth_routes, url_prefix='/api/auth')
app.register_blueprint(chat_routes, url_prefix='/api')
app.register_blueprint(data_routes, url_prefix='/api')
app.register_blueprint(history_routes, url_prefix='/api')
app.register_blueprint(feedback_routes, url_prefix='/api')
app.register_blueprint(admin_routes, url_prefix='/api/admin')
logger.info("All blueprints registered successfully")
except Exception as e:
logger.error(f"Error importing/registering blueprints: {e}")
# Auto setup data khi chạy trên HuggingFace
if os.getenv("SPACE_ID"):
logger.info("Detected HuggingFace environment, starting background data setup...")
threading.Thread(target=setup_data_background, daemon=True).start()
else:
logger.info("Running in local environment")
if __name__ == '__main__':
# Tạo admin và feedback indexes
try:
from models.admin_model import AdminUser
success, result = AdminUser.create_default_super_admin()
if success and "email" in result:
logger.info("=== THÔNG TIN ADMIN ===")
logger.info(f"Email: {result['email']}")
logger.info(f"Password: {result['password']}")
logger.info("======================")
except Exception as e:
logger.error(f"Lỗi tạo super admin: {e}")
try:
from models.feedback_model import ensure_indexes
ensure_indexes()
except Exception as e:
logger.error(f"Lỗi tạo feedback indexes: {e}")
# Chạy Flask app
port = int(os.getenv("PORT", 7860))
app.run(host='0.0.0.0', port=port, debug=False)