init project using Cursor
Browse files- .env.example +19 -0
- Dockerfile +6 -2
- README.md +124 -0
- app/__init__.py +1 -0
- app/config.py +43 -0
- app/constants.py +3 -0
- app/embedding.py +60 -0
- app/facebook.py +104 -0
- app/health.py +7 -0
- app/main.py +180 -0
- app/sheets.py +133 -0
- app/supabase_db.py +73 -0
- app/utils.py +87 -0
- config.py +0 -11
- main.py +0 -54
- modules/embedding.py +0 -7
- modules/facebook.py +0 -13
- modules/sheets.py +0 -29
- modules/supabase_db.py +0 -8
- modules/utils.py +0 -71
- modules/vector_search.py +0 -18
- requirements.txt +14 -9
.env.example
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Facebook Configuration
|
2 |
+
FACEBOOK_VERIFY_TOKEN=your_verify_token_here
|
3 |
+
FACEBOOK_APP_SECRET=your_app_secret_here
|
4 |
+
|
5 |
+
# Google Sheets Configuration
|
6 |
+
GOOGLE_SHEETS_CREDENTIALS_FILE=credentials.json
|
7 |
+
GOOGLE_SHEETS_TOKEN_FILE=token.json
|
8 |
+
CONVERSATION_SHEET_ID=your_sheet_id_here
|
9 |
+
|
10 |
+
# Supabase Configuration
|
11 |
+
SUPABASE_URL=your_supabase_url
|
12 |
+
SUPABASE_KEY=your_supabase_key
|
13 |
+
|
14 |
+
# Logging Configuration
|
15 |
+
LOG_LEVEL=INFO
|
16 |
+
|
17 |
+
# Server Configuration
|
18 |
+
HOST=0.0.0.0
|
19 |
+
PORT=8000
|
Dockerfile
CHANGED
@@ -1,6 +1,10 @@
|
|
1 |
-
FROM python:3.
|
|
|
2 |
WORKDIR /app
|
|
|
3 |
COPY requirements.txt .
|
4 |
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
5 |
COPY . .
|
6 |
-
|
|
|
|
1 |
+
FROM python:3.11-slim
|
2 |
+
|
3 |
WORKDIR /app
|
4 |
+
|
5 |
COPY requirements.txt .
|
6 |
RUN pip install --no-cache-dir -r requirements.txt
|
7 |
+
|
8 |
COPY . .
|
9 |
+
|
10 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
README.md
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# WeBot - Facebook Messenger Chatbot
|
2 |
+
|
3 |
+
WeBot là một chatbot Facebook Messenger được xây dựng với FastAPI, tích hợp với Google Sheets để lưu trữ lịch sử hội thoại và Supabase để quản lý token cũng như tìm kiếm vector.
|
4 |
+
|
5 |
+
## Tính năng
|
6 |
+
|
7 |
+
- Webhook Facebook Messenger để nhận và xử lý tin nhắn
|
8 |
+
- Phân tích nội dung tin nhắn và từ khóa
|
9 |
+
- Tích hợp với Google Sheets để lưu trữ lịch sử hội thoại
|
10 |
+
- Sử dụng Supabase để:
|
11 |
+
- Quản lý access token cho từng Facebook Page
|
12 |
+
- Vector similarity search với embedding
|
13 |
+
- Xử lý bất đồng bộ với async/await
|
14 |
+
- Logging chi tiết với loguru
|
15 |
+
|
16 |
+
## Yêu cầu
|
17 |
+
|
18 |
+
- Python 3.8+
|
19 |
+
- Facebook Page và App
|
20 |
+
- Tài khoản Google Cloud với Sheets API được kích hoạt
|
21 |
+
- Tài khoản Supabase
|
22 |
+
|
23 |
+
## Cài đặt
|
24 |
+
|
25 |
+
1. Clone repository:
|
26 |
+
```bash
|
27 |
+
git clone https://github.com/yourusername/webot.git
|
28 |
+
cd webot
|
29 |
+
```
|
30 |
+
|
31 |
+
2. Tạo và kích hoạt môi trường ảo:
|
32 |
+
```bash
|
33 |
+
python -m venv venv
|
34 |
+
source venv/bin/activate # Linux/Mac
|
35 |
+
# hoặc
|
36 |
+
.\venv\Scripts\activate # Windows
|
37 |
+
```
|
38 |
+
|
39 |
+
3. Cài đặt dependencies:
|
40 |
+
```bash
|
41 |
+
pip install -r requirements.txt
|
42 |
+
```
|
43 |
+
|
44 |
+
4. Cấu hình môi trường:
|
45 |
+
- Copy file `.env.example` thành `.env`
|
46 |
+
- Điền các thông tin cấu hình cần thiết
|
47 |
+
|
48 |
+
## Cấu hình
|
49 |
+
|
50 |
+
1. Facebook:
|
51 |
+
- Tạo Facebook App và Page
|
52 |
+
- Cấu hình webhook với URL của ứng dụng
|
53 |
+
- Lưu Page Access Token vào Supabase
|
54 |
+
|
55 |
+
2. Google Sheets:
|
56 |
+
- Tạo project trong Google Cloud Console
|
57 |
+
- Kích hoạt Sheets API
|
58 |
+
- Tải credentials.json và cấu hình trong .env
|
59 |
+
|
60 |
+
3. Supabase:
|
61 |
+
- Tạo bảng `page_tokens` để lưu trữ access token
|
62 |
+
- Cấu hình vector similarity search
|
63 |
+
- Tạo RPC function `match_documents`
|
64 |
+
|
65 |
+
## Chạy ứng dụng
|
66 |
+
|
67 |
+
### Phát triển
|
68 |
+
|
69 |
+
```bash
|
70 |
+
uvicorn app.main:app --reload
|
71 |
+
```
|
72 |
+
|
73 |
+
### Production với Docker
|
74 |
+
|
75 |
+
```bash
|
76 |
+
docker build -t webot .
|
77 |
+
docker run -p 8000:8000 webot
|
78 |
+
```
|
79 |
+
|
80 |
+
## Deploy
|
81 |
+
|
82 |
+
### Fly.io
|
83 |
+
|
84 |
+
1. Cài đặt Fly CLI
|
85 |
+
2. Đăng nhập và khởi tạo ứng dụng:
|
86 |
+
```bash
|
87 |
+
flyctl auth login
|
88 |
+
flyctl launch
|
89 |
+
```
|
90 |
+
|
91 |
+
3. Deploy:
|
92 |
+
```bash
|
93 |
+
flyctl deploy
|
94 |
+
```
|
95 |
+
|
96 |
+
### Render
|
97 |
+
|
98 |
+
1. Kết nối repository với Render
|
99 |
+
2. Tạo Web Service mới
|
100 |
+
3. Cấu hình các biến môi trường
|
101 |
+
4. Deploy
|
102 |
+
|
103 |
+
## Cấu trúc dự án
|
104 |
+
|
105 |
+
```
|
106 |
+
webot/
|
107 |
+
├── app/
|
108 |
+
│ ├── __init__.py
|
109 |
+
│ ├── main.py # FastAPI application
|
110 |
+
│ ├── config.py # Configuration management
|
111 |
+
│ ├── facebook.py # Facebook Messenger integration
|
112 |
+
│ ├── sheets.py # Google Sheets integration
|
113 |
+
│ ├── supabase_db.py # Supabase integration
|
114 |
+
│ ├── embedding.py # Text embedding
|
115 |
+
│ └── utils.py # Utility functions
|
116 |
+
├── requirements.txt
|
117 |
+
├── Dockerfile
|
118 |
+
├── .env.example
|
119 |
+
└── README.md
|
120 |
+
```
|
121 |
+
|
122 |
+
## Đóng góp
|
123 |
+
|
124 |
+
Mọi đóng góp đều được hoan nghênh! Vui lòng tạo issue hoặc pull request.
|
app/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
app/config.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic_settings import BaseSettings
|
2 |
+
from functools import lru_cache
|
3 |
+
import os
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
class Settings(BaseSettings):
|
9 |
+
"""
|
10 |
+
Lưu trữ và quản lý cấu hình ứng dụng từ biến môi trường.
|
11 |
+
Các thuộc tính: facebook_verify_token, facebook_app_secret, ...
|
12 |
+
"""
|
13 |
+
# Facebook Configuration
|
14 |
+
facebook_verify_token: str = os.getenv("FACEBOOK_VERIFY_TOKEN") or ""
|
15 |
+
facebook_app_secret: str = os.getenv("FACEBOOK_APP_SECRET") or ""
|
16 |
+
|
17 |
+
# Google Sheets Configuration
|
18 |
+
google_sheets_credentials_file: str = os.getenv("GOOGLE_SHEETS_CREDENTIALS_FILE") or ""
|
19 |
+
google_sheets_token_file: str = os.getenv("GOOGLE_SHEETS_TOKEN_FILE") or ""
|
20 |
+
conversation_sheet_id: str = os.getenv("CONVERSATION_SHEET_ID") or ""
|
21 |
+
|
22 |
+
# Supabase Configuration
|
23 |
+
supabase_url: str = os.getenv("SUPABASE_URL") or ""
|
24 |
+
supabase_key: str = os.getenv("SUPABASE_KEY") or ""
|
25 |
+
|
26 |
+
# Server Configuration
|
27 |
+
host: str = os.getenv("HOST", "0.0.0.0") or ""
|
28 |
+
port: int = int(os.getenv("PORT", "8000")) or 8000
|
29 |
+
|
30 |
+
# Logging Configuration
|
31 |
+
log_level: str = os.getenv("LOG_LEVEL", "INFO") or "INFO"
|
32 |
+
|
33 |
+
class Config:
|
34 |
+
env_file = ".env"
|
35 |
+
|
36 |
+
@lru_cache()
|
37 |
+
def get_settings() -> Settings:
|
38 |
+
"""
|
39 |
+
Lấy instance Settings đã cache (singleton).
|
40 |
+
Input: None
|
41 |
+
Output: Settings instance.
|
42 |
+
"""
|
43 |
+
return Settings()
|
app/constants.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
VEHICLE_KEYWORDS = ["xe máy", "ô tô", "xe đạp", "xe hơi"]
|
2 |
+
SHEET_RANGE = 'Conversations!A2:F'
|
3 |
+
EMBEDDING_DIM = 1536
|
app/embedding.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import numpy as np
|
3 |
+
from loguru import logger
|
4 |
+
import httpx
|
5 |
+
from tenacity import retry, stop_after_attempt, wait_exponential
|
6 |
+
|
7 |
+
from .utils import timing_decorator
|
8 |
+
|
9 |
+
class EmbeddingClient:
|
10 |
+
def __init__(self):
|
11 |
+
"""
|
12 |
+
Khởi tạo EmbeddingClient.
|
13 |
+
Input: None
|
14 |
+
Output: EmbeddingClient instance.
|
15 |
+
"""
|
16 |
+
self._client = httpx.AsyncClient()
|
17 |
+
|
18 |
+
@timing_decorator
|
19 |
+
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
|
20 |
+
async def create_embedding(self, text: str) -> List[float]:
|
21 |
+
"""
|
22 |
+
Tạo embedding vector từ text bằng dịch vụ embedding (ví dụ OpenAI).
|
23 |
+
Input: text (str)
|
24 |
+
Output: list[float] embedding vector.
|
25 |
+
"""
|
26 |
+
try:
|
27 |
+
# This is a placeholder for your actual embedding service
|
28 |
+
# You should replace this with your preferred embedding service (e.g., OpenAI, Cohere, etc.)
|
29 |
+
# For example, using OpenAI's embedding API:
|
30 |
+
|
31 |
+
# response = await self._client.post(
|
32 |
+
# "https://api.openai.com/v1/embeddings",
|
33 |
+
# headers={"Authorization": f"Bearer {api_key}"},
|
34 |
+
# json={
|
35 |
+
# "input": text,
|
36 |
+
# "model": "text-embedding-ada-002"
|
37 |
+
# }
|
38 |
+
# )
|
39 |
+
# embedding = response.json()["data"][0]["embedding"]
|
40 |
+
|
41 |
+
# For now, we'll return a random vector as a placeholder
|
42 |
+
embedding = np.random.normal(0, 1, 1536).tolist() # 1536 is OpenAI's embedding dimension
|
43 |
+
return embedding
|
44 |
+
except Exception as e:
|
45 |
+
logger.error(f"Error creating embedding: {e}")
|
46 |
+
raise
|
47 |
+
|
48 |
+
def cosine_similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
|
49 |
+
"""
|
50 |
+
Tính cosine similarity giữa hai embedding.
|
51 |
+
Input: embedding1 (list[float]), embedding2 (list[float])
|
52 |
+
Output: float (giá trị similarity)
|
53 |
+
"""
|
54 |
+
try:
|
55 |
+
a = np.array(embedding1)
|
56 |
+
b = np.array(embedding2)
|
57 |
+
return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))
|
58 |
+
except Exception as e:
|
59 |
+
logger.error(f"Error calculating similarity: {e}")
|
60 |
+
return 0.0
|
app/facebook.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import hmac
|
2 |
+
import hashlib
|
3 |
+
import json
|
4 |
+
from typing import Any, Dict, Optional
|
5 |
+
import httpx
|
6 |
+
from fastapi import HTTPException, Request
|
7 |
+
from loguru import logger
|
8 |
+
|
9 |
+
from .utils import timing_decorator
|
10 |
+
|
11 |
+
class FacebookClient:
|
12 |
+
def __init__(self, app_secret: str):
|
13 |
+
"""
|
14 |
+
Khởi tạo FacebookClient với app_secret.
|
15 |
+
Input: app_secret (str) - Facebook App Secret.
|
16 |
+
Output: FacebookClient instance.
|
17 |
+
"""
|
18 |
+
self.app_secret = app_secret
|
19 |
+
self._client = httpx.AsyncClient()
|
20 |
+
|
21 |
+
async def verify_webhook(self, token: str, challenge: str, verify_token: str) -> int:
|
22 |
+
"""
|
23 |
+
Xác thực webhook Facebook bằng verify_token và trả về challenge.
|
24 |
+
Input: token (str), challenge (str), verify_token (str)
|
25 |
+
Output: int (challenge nếu thành công, lỗi nếu thất bại)
|
26 |
+
"""
|
27 |
+
if token != verify_token:
|
28 |
+
raise HTTPException(status_code=403, detail="Invalid verify token")
|
29 |
+
return int(challenge)
|
30 |
+
|
31 |
+
def verify_signature(self, request: Request, payload: bytes) -> bool:
|
32 |
+
"""
|
33 |
+
Kiểm tra chữ ký X-Hub-Signature-256 để xác thực request từ Facebook.
|
34 |
+
Input: request (Request), payload (bytes)
|
35 |
+
Output: bool (True nếu hợp lệ, False nếu không)
|
36 |
+
"""
|
37 |
+
signature = request.headers.get("X-Hub-Signature-256", "")
|
38 |
+
if not signature.startswith("sha256="):
|
39 |
+
return False
|
40 |
+
|
41 |
+
expected = hmac.new(
|
42 |
+
self.app_secret.encode(),
|
43 |
+
payload,
|
44 |
+
hashlib.sha256
|
45 |
+
).hexdigest()
|
46 |
+
|
47 |
+
return hmac.compare_digest(signature[7:], expected)
|
48 |
+
|
49 |
+
@timing_decorator
|
50 |
+
async def send_message(self, page_access_token: str, recipient_id: str, message: str) -> Dict[str, Any]:
|
51 |
+
"""
|
52 |
+
Gửi message tới user qua Facebook Messenger API.
|
53 |
+
Input: page_access_token (str), recipient_id (str), message (str)
|
54 |
+
Output: dict (response từ Facebook API)
|
55 |
+
"""
|
56 |
+
url = f"https://graph.facebook.com/v18.0/me/messages?access_token={page_access_token}"
|
57 |
+
|
58 |
+
payload = {
|
59 |
+
"recipient": {"id": recipient_id},
|
60 |
+
"message": {"text": message}
|
61 |
+
}
|
62 |
+
|
63 |
+
try:
|
64 |
+
async with self._client as client:
|
65 |
+
response = await client.post(url, json=payload)
|
66 |
+
response.raise_for_status()
|
67 |
+
return response.json()
|
68 |
+
except httpx.HTTPError as e:
|
69 |
+
logger.error(f"Error sending message to Facebook: {e}")
|
70 |
+
raise HTTPException(status_code=500, detail="Failed to send message to Facebook")
|
71 |
+
|
72 |
+
def parse_message(self, body: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
73 |
+
"""
|
74 |
+
Parse message từ payload Facebook webhook.
|
75 |
+
Input: body (dict) - payload JSON từ Facebook.
|
76 |
+
Output: dict chứa sender_id, page_id, timestamp, text, attachments hoặc None nếu lỗi.
|
77 |
+
"""
|
78 |
+
try:
|
79 |
+
entry = body["entry"][0]
|
80 |
+
messaging = entry["messaging"][0]
|
81 |
+
|
82 |
+
sender_id = messaging["sender"]["id"]
|
83 |
+
recipient_id = messaging["recipient"]["id"]
|
84 |
+
timestamp = messaging["timestamp"]
|
85 |
+
|
86 |
+
message_data = {
|
87 |
+
"sender_id": sender_id,
|
88 |
+
"page_id": recipient_id,
|
89 |
+
"timestamp": timestamp,
|
90 |
+
"text": None,
|
91 |
+
"attachments": []
|
92 |
+
}
|
93 |
+
|
94 |
+
if "message" in messaging:
|
95 |
+
message = messaging["message"]
|
96 |
+
if "text" in message:
|
97 |
+
message_data["text"] = message["text"]
|
98 |
+
if "attachments" in message:
|
99 |
+
message_data["attachments"] = message["attachments"]
|
100 |
+
|
101 |
+
return message_data
|
102 |
+
except (KeyError, IndexError) as e:
|
103 |
+
logger.error(f"Error parsing Facebook message: {e}")
|
104 |
+
return None
|
app/health.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
|
3 |
+
router = APIRouter()
|
4 |
+
|
5 |
+
@router.get("/healthz")
|
6 |
+
async def health_check():
|
7 |
+
return {"status": "ok"}
|
app/main.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, Request, HTTPException, Depends
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
from loguru import logger
|
4 |
+
import json
|
5 |
+
from typing import Dict, Any, List
|
6 |
+
import asyncio
|
7 |
+
from concurrent.futures import ThreadPoolExecutor
|
8 |
+
|
9 |
+
from .config import Settings, get_settings
|
10 |
+
from .facebook import FacebookClient
|
11 |
+
from .sheets import SheetsClient
|
12 |
+
from .supabase_db import SupabaseClient
|
13 |
+
from .embedding import EmbeddingClient
|
14 |
+
from .utils import setup_logging, extract_command, extract_keywords, timing_decorator, ensure_log_dir, validate_config
|
15 |
+
from .constants import VEHICLE_KEYWORDS, SHEET_RANGE
|
16 |
+
from .health import router as health_router
|
17 |
+
|
18 |
+
app = FastAPI(title="WeBot Facebook Messenger API")
|
19 |
+
|
20 |
+
# Add CORS middleware
|
21 |
+
app.add_middleware(
|
22 |
+
CORSMiddleware,
|
23 |
+
allow_origins=["*"],
|
24 |
+
allow_credentials=True,
|
25 |
+
allow_methods=["*"],
|
26 |
+
allow_headers=["*"],
|
27 |
+
)
|
28 |
+
|
29 |
+
# Initialize clients
|
30 |
+
settings = get_settings()
|
31 |
+
setup_logging(settings.log_level)
|
32 |
+
|
33 |
+
facebook_client = FacebookClient(settings.facebook_app_secret)
|
34 |
+
sheets_client = SheetsClient(
|
35 |
+
settings.google_sheets_credentials_file,
|
36 |
+
settings.google_sheets_token_file,
|
37 |
+
settings.conversation_sheet_id
|
38 |
+
)
|
39 |
+
supabase_client = SupabaseClient(settings.supabase_url, settings.supabase_key)
|
40 |
+
embedding_client = EmbeddingClient()
|
41 |
+
|
42 |
+
# Keywords to look for in messages
|
43 |
+
VEHICLE_KEYWORDS = ["xe máy", "ô tô", "xe đạp", "xe hơi"]
|
44 |
+
|
45 |
+
app.include_router(health_router)
|
46 |
+
|
47 |
+
ensure_log_dir()
|
48 |
+
validate_config(settings)
|
49 |
+
executor = ThreadPoolExecutor(max_workers=4)
|
50 |
+
|
51 |
+
@app.get("/webhook")
|
52 |
+
async def verify_webhook(request: Request):
|
53 |
+
"""
|
54 |
+
Xác thực webhook Facebook Messenger.
|
55 |
+
Input: request (Request) - request từ Facebook với các query params.
|
56 |
+
Output: Trả về challenge nếu verify thành công, lỗi nếu thất bại.
|
57 |
+
"""
|
58 |
+
params = dict(request.query_params)
|
59 |
+
|
60 |
+
mode = params.get("hub.mode")
|
61 |
+
token = str(params.get("hub.verify_token", ""))
|
62 |
+
challenge = str(params.get("hub.challenge", ""))
|
63 |
+
|
64 |
+
if not all([mode, token, challenge]):
|
65 |
+
raise HTTPException(status_code=400, detail="Missing parameters")
|
66 |
+
|
67 |
+
return await facebook_client.verify_webhook(
|
68 |
+
token,
|
69 |
+
challenge,
|
70 |
+
settings.facebook_verify_token
|
71 |
+
)
|
72 |
+
|
73 |
+
@app.post("/webhook")
|
74 |
+
@timing_decorator
|
75 |
+
async def webhook(request: Request):
|
76 |
+
"""
|
77 |
+
Nhận và xử lý message từ Facebook Messenger webhook.
|
78 |
+
Input: request (Request) - request chứa payload JSON từ Facebook.
|
79 |
+
Output: JSON status.
|
80 |
+
"""
|
81 |
+
body_bytes = await request.body()
|
82 |
+
|
83 |
+
# Verify request is from Facebook
|
84 |
+
if not facebook_client.verify_signature(request, body_bytes):
|
85 |
+
raise HTTPException(status_code=403, detail="Invalid signature")
|
86 |
+
|
87 |
+
try:
|
88 |
+
body = json.loads(body_bytes)
|
89 |
+
message_data = facebook_client.parse_message(body)
|
90 |
+
|
91 |
+
if not message_data:
|
92 |
+
return {"status": "ok"}
|
93 |
+
|
94 |
+
# Process the message
|
95 |
+
await process_message(message_data)
|
96 |
+
|
97 |
+
return {"status": "ok"}
|
98 |
+
except Exception as e:
|
99 |
+
logger.error(f"Error processing webhook: {e}")
|
100 |
+
raise HTTPException(status_code=500, detail="Internal server error")
|
101 |
+
|
102 |
+
async def process_message(message_data: Dict[str, Any]):
|
103 |
+
"""
|
104 |
+
Xử lý message từ người dùng Facebook, phân tích, truy vấn, gửi phản hồi và log lại.
|
105 |
+
Input: message_data (dict) - thông tin message đã parse từ Facebook.
|
106 |
+
Output: None (gửi message và log hội thoại).
|
107 |
+
"""
|
108 |
+
sender_id = message_data["sender_id"]
|
109 |
+
page_id = message_data["page_id"]
|
110 |
+
message_text = message_data["text"]
|
111 |
+
logger.bind(user_id=sender_id, page_id=page_id, message=message_text).info("Processing message")
|
112 |
+
if not message_text:
|
113 |
+
return
|
114 |
+
|
115 |
+
# Get page access token
|
116 |
+
page_token = await supabase_client.get_page_token(page_id)
|
117 |
+
if not page_token:
|
118 |
+
logger.error(f"No access token found for page {page_id}")
|
119 |
+
return
|
120 |
+
|
121 |
+
# Extract command and keywords
|
122 |
+
command, remaining_text = extract_command(message_text)
|
123 |
+
keywords = extract_keywords(message_text, VEHICLE_KEYWORDS)
|
124 |
+
|
125 |
+
# Get conversation history (run in thread pool)
|
126 |
+
history = await asyncio.get_event_loop().run_in_executor(
|
127 |
+
executor, lambda: sheets_client.get_conversation_history(sender_id, page_id).result()
|
128 |
+
)
|
129 |
+
|
130 |
+
response = ""
|
131 |
+
if command == "xong":
|
132 |
+
if not keywords:
|
133 |
+
response = "Vui lòng cho biết loại phương tiện bạn cần tìm (xe máy, ô tô...)"
|
134 |
+
else:
|
135 |
+
# Create embedding from message
|
136 |
+
embedding = await embedding_client.create_embedding(message_text)
|
137 |
+
|
138 |
+
# Search for similar documents
|
139 |
+
matches = await supabase_client.match_documents(embedding)
|
140 |
+
|
141 |
+
if matches:
|
142 |
+
response = format_search_results(matches)
|
143 |
+
else:
|
144 |
+
response = "Xin lỗi, tôi không tìm thấy thông tin phù hợp."
|
145 |
+
else:
|
146 |
+
response = "Vui lòng cung cấp thêm thông tin và gõ lệnh \\xong khi hoàn tất."
|
147 |
+
|
148 |
+
# Send response
|
149 |
+
await facebook_client.send_message(page_token, sender_id, response)
|
150 |
+
|
151 |
+
# Log conversation (run in thread pool)
|
152 |
+
await asyncio.get_event_loop().run_in_executor(
|
153 |
+
executor, lambda: sheets_client.log_conversation(sender_id, page_id, message_text, keywords, response).result()
|
154 |
+
)
|
155 |
+
|
156 |
+
def format_search_results(matches: List[Dict[str, Any]]) -> str:
|
157 |
+
"""
|
158 |
+
Format kết quả truy vấn vector search thành chuỗi gửi về user.
|
159 |
+
Input: matches (list[dict]) - danh sách kết quả từ Supabase.
|
160 |
+
Output: Chuỗi kết quả đã format.
|
161 |
+
"""
|
162 |
+
if not matches:
|
163 |
+
return "Không tìm thấy kết quả phù hợp."
|
164 |
+
|
165 |
+
result = "Đây là một số kết quả phù hợp:\n\n"
|
166 |
+
for i, match in enumerate(matches, 1):
|
167 |
+
result += f"{i}. {match['content']}\n"
|
168 |
+
if match.get('metadata', {}).get('url'):
|
169 |
+
result += f" Link: {match['metadata']['url']}\n"
|
170 |
+
result += "\n"
|
171 |
+
|
172 |
+
return result.strip()
|
173 |
+
|
174 |
+
if __name__ == "__main__":
|
175 |
+
import uvicorn
|
176 |
+
uvicorn.run(
|
177 |
+
"app.main:app",
|
178 |
+
host=settings.host,
|
179 |
+
port=settings.port
|
180 |
+
)
|
app/sheets.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Dict, List, Optional
|
2 |
+
from google.oauth2.credentials import Credentials
|
3 |
+
from google_auth_oauthlib.flow import InstalledAppFlow
|
4 |
+
from google.auth.transport.requests import Request
|
5 |
+
from googleapiclient.discovery import build
|
6 |
+
import os
|
7 |
+
import pickle
|
8 |
+
from datetime import datetime
|
9 |
+
from loguru import logger
|
10 |
+
|
11 |
+
from .utils import timing_decorator
|
12 |
+
from .constants import SHEET_RANGE
|
13 |
+
|
14 |
+
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
|
15 |
+
|
16 |
+
class SheetsClient:
|
17 |
+
def __init__(self, credentials_file: str, token_file: str, sheet_id: str):
|
18 |
+
"""
|
19 |
+
Khởi tạo SheetsClient với thông tin xác thực và sheet_id.
|
20 |
+
Input: credentials_file (str), token_file (str), sheet_id (str)
|
21 |
+
Output: SheetsClient instance.
|
22 |
+
"""
|
23 |
+
self.credentials_file = credentials_file
|
24 |
+
self.token_file = token_file
|
25 |
+
self.sheet_id = sheet_id
|
26 |
+
self.creds = None
|
27 |
+
self.service = None
|
28 |
+
|
29 |
+
def authenticate(self) -> None:
|
30 |
+
"""
|
31 |
+
Xác thực với Google Sheets API, tạo self.service.
|
32 |
+
Input: None
|
33 |
+
Output: None (raise exception nếu lỗi)
|
34 |
+
"""
|
35 |
+
if os.path.exists(self.token_file):
|
36 |
+
with open(self.token_file, 'rb') as token:
|
37 |
+
self.creds = pickle.load(token)
|
38 |
+
|
39 |
+
if not self.creds or not self.creds.valid:
|
40 |
+
if self.creds and self.creds.expired and self.creds.refresh_token:
|
41 |
+
self.creds.refresh(Request())
|
42 |
+
else:
|
43 |
+
flow = InstalledAppFlow.from_client_secrets_file(
|
44 |
+
self.credentials_file, SCOPES)
|
45 |
+
self.creds = flow.run_local_server(port=0)
|
46 |
+
|
47 |
+
with open(self.token_file, 'wb') as token:
|
48 |
+
pickle.dump(self.creds, token)
|
49 |
+
|
50 |
+
self.service = build('sheets', 'v4', credentials=self.creds)
|
51 |
+
|
52 |
+
@timing_decorator
|
53 |
+
async def get_conversation_history(self, user_id: str, page_id: str) -> List[Dict[str, Any]]:
|
54 |
+
"""
|
55 |
+
Lấy lịch sử hội thoại của user từ Google Sheets.
|
56 |
+
Input: user_id (str), page_id (str)
|
57 |
+
Output: list[dict] các dòng hội thoại (nếu có).
|
58 |
+
"""
|
59 |
+
try:
|
60 |
+
if not self.service:
|
61 |
+
self.authenticate()
|
62 |
+
if not self.service:
|
63 |
+
raise RuntimeError("Google Sheets service not initialized")
|
64 |
+
range_name = SHEET_RANGE
|
65 |
+
result = self.service.spreadsheets().values().get(
|
66 |
+
spreadsheetId=self.sheet_id,
|
67 |
+
range=range_name
|
68 |
+
).execute()
|
69 |
+
|
70 |
+
values = result.get('values', [])
|
71 |
+
history = []
|
72 |
+
|
73 |
+
for row in values:
|
74 |
+
if len(row) >= 6 and row[0] == user_id and row[1] == page_id:
|
75 |
+
history.append({
|
76 |
+
'user_id': row[0],
|
77 |
+
'page_id': row[1],
|
78 |
+
'timestamp': row[2],
|
79 |
+
'message': row[3],
|
80 |
+
'keywords': row[4],
|
81 |
+
'response': row[5]
|
82 |
+
})
|
83 |
+
|
84 |
+
return history
|
85 |
+
except Exception as e:
|
86 |
+
logger.error(f"Error getting conversation history: {e}")
|
87 |
+
return []
|
88 |
+
|
89 |
+
@timing_decorator
|
90 |
+
async def log_conversation(
|
91 |
+
self,
|
92 |
+
user_id: str,
|
93 |
+
page_id: str,
|
94 |
+
message: str,
|
95 |
+
keywords: List[str],
|
96 |
+
response: str
|
97 |
+
) -> bool:
|
98 |
+
"""
|
99 |
+
Ghi log hội thoại vào Google Sheets.
|
100 |
+
Input: user_id (str), page_id (str), message (str), keywords (list[str]), response (str)
|
101 |
+
Output: bool (True nếu thành công, False nếu lỗi)
|
102 |
+
"""
|
103 |
+
try:
|
104 |
+
if not self.service:
|
105 |
+
self.authenticate()
|
106 |
+
if not self.service:
|
107 |
+
raise RuntimeError("Google Sheets service not initialized")
|
108 |
+
timestamp = datetime.now().isoformat()
|
109 |
+
values = [[
|
110 |
+
user_id,
|
111 |
+
page_id,
|
112 |
+
timestamp,
|
113 |
+
message,
|
114 |
+
','.join(keywords),
|
115 |
+
response
|
116 |
+
]]
|
117 |
+
|
118 |
+
body = {
|
119 |
+
'values': values
|
120 |
+
}
|
121 |
+
|
122 |
+
range_name = SHEET_RANGE
|
123 |
+
self.service.spreadsheets().values().append(
|
124 |
+
spreadsheetId=self.sheet_id,
|
125 |
+
range=range_name,
|
126 |
+
valueInputOption='RAW',
|
127 |
+
body=body
|
128 |
+
).execute()
|
129 |
+
|
130 |
+
return True
|
131 |
+
except Exception as e:
|
132 |
+
logger.error(f"Error logging conversation: {e}")
|
133 |
+
return False
|
app/supabase_db.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Dict, List, Optional
|
2 |
+
from supabase import create_client, Client
|
3 |
+
from loguru import logger
|
4 |
+
|
5 |
+
from .utils import timing_decorator
|
6 |
+
|
7 |
+
class SupabaseClient:
|
8 |
+
def __init__(self, url: str, key: str):
|
9 |
+
"""
|
10 |
+
Khởi tạo SupabaseClient với url và key.
|
11 |
+
Input: url (str), key (str)
|
12 |
+
Output: SupabaseClient instance.
|
13 |
+
"""
|
14 |
+
self.client: Client = create_client(url, key)
|
15 |
+
|
16 |
+
@timing_decorator
|
17 |
+
def get_page_token(self, page_id: str):
|
18 |
+
"""
|
19 |
+
Lấy access token của Facebook page từ Supabase.
|
20 |
+
Input: page_id (str)
|
21 |
+
Output: access_token (str) hoặc None nếu không có.
|
22 |
+
"""
|
23 |
+
try:
|
24 |
+
response = self.client.table('page_tokens').select('access_token').eq('page_id', page_id).execute()
|
25 |
+
if response.data and len(response.data) > 0:
|
26 |
+
return response.data[0]['access_token']
|
27 |
+
return None
|
28 |
+
except Exception as e:
|
29 |
+
logger.error(f"Error getting page token: {e}")
|
30 |
+
return None
|
31 |
+
|
32 |
+
@timing_decorator
|
33 |
+
def match_documents(self, embedding: List[float], match_count: int = 5):
|
34 |
+
"""
|
35 |
+
Truy vấn vector similarity search qua RPC match_documents.
|
36 |
+
Input: embedding (list[float]), match_count (int)
|
37 |
+
Output: list[dict] kết quả truy vấn.
|
38 |
+
"""
|
39 |
+
try:
|
40 |
+
response = self.client.rpc(
|
41 |
+
'match_documents',
|
42 |
+
{
|
43 |
+
'query_embedding': embedding,
|
44 |
+
'match_threshold': 0.7,
|
45 |
+
'match_count': match_count
|
46 |
+
}
|
47 |
+
).execute()
|
48 |
+
|
49 |
+
if response.data:
|
50 |
+
return response.data
|
51 |
+
return []
|
52 |
+
except Exception as e:
|
53 |
+
logger.error(f"Error matching documents: {e}")
|
54 |
+
return []
|
55 |
+
|
56 |
+
@timing_decorator
|
57 |
+
def store_embedding(self, text: str, embedding: List[float], metadata: Dict[str, Any]):
|
58 |
+
"""
|
59 |
+
Lưu embedding vào Supabase.
|
60 |
+
Input: text (str), embedding (list[float]), metadata (dict)
|
61 |
+
Output: bool (True nếu thành công, False nếu lỗi)
|
62 |
+
"""
|
63 |
+
try:
|
64 |
+
response = self.client.table('embeddings').insert({
|
65 |
+
'content': text,
|
66 |
+
'embedding': embedding,
|
67 |
+
'metadata': metadata
|
68 |
+
}).execute()
|
69 |
+
|
70 |
+
return bool(response.data)
|
71 |
+
except Exception as e:
|
72 |
+
logger.error(f"Error storing embedding: {e}")
|
73 |
+
return False
|
app/utils.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
from functools import wraps
|
3 |
+
from loguru import logger
|
4 |
+
from typing import Any, Callable
|
5 |
+
import os
|
6 |
+
|
7 |
+
def timing_decorator(func: Callable) -> Callable:
|
8 |
+
"""
|
9 |
+
Decorator đo thời gian thực thi của hàm async, log thời lượng xử lý.
|
10 |
+
Input: func (Callable) - hàm async cần đo.
|
11 |
+
Output: Kết quả trả về của func.
|
12 |
+
"""
|
13 |
+
@wraps(func)
|
14 |
+
async def wrapper(*args: Any, **kwargs: Any) -> Any:
|
15 |
+
start_time = time.time()
|
16 |
+
result = await func(*args, **kwargs)
|
17 |
+
end_time = time.time()
|
18 |
+
duration = end_time - start_time
|
19 |
+
logger.info(f"{func.__name__} took {duration:.2f} seconds to execute")
|
20 |
+
return result
|
21 |
+
return wrapper
|
22 |
+
|
23 |
+
def setup_logging(log_level: str = "INFO") -> None:
|
24 |
+
"""
|
25 |
+
Thiết lập logging với loguru, log ra file và console.
|
26 |
+
Input: log_level (str) - mức log.
|
27 |
+
Output: None.
|
28 |
+
"""
|
29 |
+
logger.remove() # Remove default handler
|
30 |
+
logger.add(
|
31 |
+
"logs/webot.log",
|
32 |
+
rotation="500 MB",
|
33 |
+
retention="10 days",
|
34 |
+
level=log_level,
|
35 |
+
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}",
|
36 |
+
)
|
37 |
+
logger.add(
|
38 |
+
lambda msg: print(msg),
|
39 |
+
level=log_level,
|
40 |
+
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}",
|
41 |
+
)
|
42 |
+
|
43 |
+
def extract_command(text: str) -> tuple[str, str]:
|
44 |
+
"""
|
45 |
+
Tách lệnh (bắt đầu bằng \) và phần còn lại từ message.
|
46 |
+
Input: text (str) - message từ user.
|
47 |
+
Output: (command, remaining_text) - tuple (str, str).
|
48 |
+
"""
|
49 |
+
if not text.startswith("\\"):
|
50 |
+
return "", text
|
51 |
+
|
52 |
+
parts = text.split(maxsplit=1)
|
53 |
+
command = parts[0][1:] # Remove the backslash
|
54 |
+
remaining = parts[1] if len(parts) > 1 else ""
|
55 |
+
return command, remaining
|
56 |
+
|
57 |
+
def extract_keywords(text: str, keywords: list[str]) -> list[str]:
|
58 |
+
"""
|
59 |
+
Tìm các từ khóa xuất hiện trong message.
|
60 |
+
Input: text (str), keywords (list[str])
|
61 |
+
Output: list[str] các từ khóa tìm thấy.
|
62 |
+
"""
|
63 |
+
return [keyword for keyword in keywords if keyword.lower() in text.lower()]
|
64 |
+
|
65 |
+
def ensure_log_dir():
|
66 |
+
"""
|
67 |
+
Đảm bảo thư mục logs tồn tại, tạo nếu chưa có.
|
68 |
+
Input: None
|
69 |
+
Output: None
|
70 |
+
"""
|
71 |
+
os.makedirs("logs", exist_ok=True)
|
72 |
+
|
73 |
+
def validate_config(settings) -> None:
|
74 |
+
"""
|
75 |
+
Kiểm tra các biến môi trường/config bắt buộc, raise lỗi nếu thiếu.
|
76 |
+
Input: settings (Settings)
|
77 |
+
Output: None (raise RuntimeError nếu thiếu)
|
78 |
+
"""
|
79 |
+
missing = []
|
80 |
+
for field in [
|
81 |
+
'facebook_verify_token', 'facebook_app_secret',
|
82 |
+
'google_sheets_credentials_file', 'google_sheets_token_file', 'conversation_sheet_id',
|
83 |
+
'supabase_url', 'supabase_key']:
|
84 |
+
if not getattr(settings, field, None):
|
85 |
+
missing.append(field)
|
86 |
+
if missing:
|
87 |
+
raise RuntimeError(f"Missing config: {', '.join(missing)}")
|
config.py
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
from dotenv import load_dotenv
|
3 |
-
|
4 |
-
load_dotenv()
|
5 |
-
|
6 |
-
VERIFY_TOKEN = os.getenv("VERIFY_TOKEN")
|
7 |
-
FACEBOOK_API_VERSION = "v22.0"
|
8 |
-
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
|
9 |
-
GOOGLE_SHEET_ID = os.getenv("GOOGLE_SHEET_ID")
|
10 |
-
SUPABASE_URL = os.getenv("SUPABASE_URL")
|
11 |
-
SUPABASE_KEY = os.getenv("SUPABASE_KEY")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main.py
DELETED
@@ -1,54 +0,0 @@
|
|
1 |
-
import time
|
2 |
-
from fastapi import FastAPI, Request
|
3 |
-
from pydantic import BaseModel
|
4 |
-
from modules import facebook, sheets, supabase_db, embedding, vector_search, utils
|
5 |
-
import config
|
6 |
-
|
7 |
-
app = FastAPI()
|
8 |
-
|
9 |
-
class FBMessagePayload(BaseModel):
|
10 |
-
entry: list
|
11 |
-
|
12 |
-
@app.get("/facebook-webhook")
|
13 |
-
def verify_token(mode: str, token: str, challenge: str):
|
14 |
-
if mode == "subscribe" and token == config.VERIFY_TOKEN:
|
15 |
-
return challenge
|
16 |
-
return {"error": "Token invalid"}
|
17 |
-
|
18 |
-
@app.post("/facebook-webhook")
|
19 |
-
async def handle_message(payload: FBMessagePayload):
|
20 |
-
start = time.time()
|
21 |
-
log = utils.create_logger("main")
|
22 |
-
try:
|
23 |
-
messaging = payload.entry[0]['messaging'][0]
|
24 |
-
sender_id = messaging['sender']['id']
|
25 |
-
page_id = messaging['recipient']['id']
|
26 |
-
text = messaging.get('message', {}).get('text', '')
|
27 |
-
attachments = messaging.get('message', {}).get('attachments', [])
|
28 |
-
|
29 |
-
log.info(f"⏱️ New message from {sender_id}: {text}")
|
30 |
-
|
31 |
-
token = supabase_db.get_page_token(page_id)
|
32 |
-
history = sheets.find_unfinished_conversation(page_id, sender_id)
|
33 |
-
parsed = utils.parse_message(text)
|
34 |
-
parsed.update({"recipient_id": sender_id, "page_id": page_id, "attachments": attachments})
|
35 |
-
|
36 |
-
action = utils.decide_action(parsed, history)
|
37 |
-
|
38 |
-
if action in ["insert&responsenocommand", "update&responsewithcommand"]:
|
39 |
-
vec = embedding.generate_embedding(text)
|
40 |
-
parsed["embedding"] = vec
|
41 |
-
columns = utils.detect_vehicle(parsed['vehicle'])
|
42 |
-
results = vector_search.query(vec, columns)
|
43 |
-
answer = utils.format_answer(results)
|
44 |
-
facebook.send_text(sender_id, page_id, token, answer)
|
45 |
-
else:
|
46 |
-
prompt = utils.get_instruction(action)
|
47 |
-
facebook.send_text(sender_id, page_id, token, prompt)
|
48 |
-
|
49 |
-
sheets.update_conversation(parsed, action, history)
|
50 |
-
log.info(f"✅ Handled in {time.time()-start:.2f}s")
|
51 |
-
return {"status": "ok"}
|
52 |
-
except Exception as e:
|
53 |
-
log.exception(f"❌ Error: {e}")
|
54 |
-
return {"error": str(e)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/embedding.py
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
from sentence_transformers import SentenceTransformer
|
2 |
-
import config
|
3 |
-
|
4 |
-
model = SentenceTransformer(config.EMBEDDING_MODEL)
|
5 |
-
|
6 |
-
def generate_embedding(text):
|
7 |
-
return model.encode(text).tolist()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/facebook.py
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
import httpx
|
2 |
-
import config
|
3 |
-
|
4 |
-
def send_text(recipient_id, page_id, token, message):
|
5 |
-
url = f"https://graph.facebook.com/{config.FACEBOOK_API_VERSION}/{page_id}/messages"
|
6 |
-
payload = {
|
7 |
-
"recipient": {"id": recipient_id},
|
8 |
-
"messaging_type": "RESPONSE",
|
9 |
-
"message": {"text": message}
|
10 |
-
}
|
11 |
-
params = {"access_token": token}
|
12 |
-
res = httpx.post(url, params=params, json=payload)
|
13 |
-
return res.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/sheets.py
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
import gspread
|
2 |
-
from oauth2client.service_account import ServiceAccountCredentials
|
3 |
-
import config
|
4 |
-
|
5 |
-
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
|
6 |
-
creds = ServiceAccountCredentials.from_json_keyfile_name("service_account.json", scope)
|
7 |
-
client = gspread.authorize(creds)
|
8 |
-
sheet = client.open_by_key(config.GOOGLE_SHEET_ID).worksheet("chat")
|
9 |
-
|
10 |
-
def find_unfinished_conversation(page_id, recipient_id):
|
11 |
-
records = sheet.get_all_records()
|
12 |
-
for row in reversed(records):
|
13 |
-
if row['page_id'] == page_id and row['recipient_id'] == recipient_id and row['isdone'] == "FALSE":
|
14 |
-
return row
|
15 |
-
return None
|
16 |
-
|
17 |
-
def update_conversation(parsed, action, history):
|
18 |
-
new_row = {
|
19 |
-
"conversation_id": parsed.get("conversation_id"),
|
20 |
-
"recipient_id": parsed["recipient_id"],
|
21 |
-
"page_id": parsed["page_id"],
|
22 |
-
"originaltext": parsed.get("message_text"),
|
23 |
-
"originalvehicle": parsed.get("vehicle"),
|
24 |
-
"originalcommand": parsed.get("command"),
|
25 |
-
"originalcontent": parsed.get("content"),
|
26 |
-
"originalattachments": parsed.get("attachments"),
|
27 |
-
"isdone": "TRUE" if "response" in action else "FALSE"
|
28 |
-
}
|
29 |
-
sheet.append_row(list(new_row.values()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/supabase_db.py
DELETED
@@ -1,8 +0,0 @@
|
|
1 |
-
from supabase import create_client
|
2 |
-
import config
|
3 |
-
|
4 |
-
supabase = create_client(config.SUPABASE_URL, config.SUPABASE_KEY)
|
5 |
-
|
6 |
-
def get_page_token(page_id: str) -> str:
|
7 |
-
res = supabase.table("PageToken").select("token").eq("id", page_id).execute()
|
8 |
-
return res.data[0]['token'] if res.data else ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/utils.py
DELETED
@@ -1,71 +0,0 @@
|
|
1 |
-
from loguru import logger
|
2 |
-
import uuid
|
3 |
-
|
4 |
-
keywords = ["mô tô", "xe máy điện", "xe máy", "xe gắn máy", "ô tô tải", "ô tô", "xe khách", "máy kéo", "xe chuyên dùng", "xe đạp máy", "xe đạp điện", "xe thô sơ", "xích lô", "xe bò", "súc vật kéo", "xe đạp", "đi bộ"]
|
5 |
-
|
6 |
-
|
7 |
-
def create_logger(name: str):
|
8 |
-
logger.add(f"logs/{name}.log", rotation="1 day")
|
9 |
-
return logger
|
10 |
-
|
11 |
-
def parse_message(text: str):
|
12 |
-
result = {"command": "", "password": "", "content": "", "vehicle": "", "message_text": text}
|
13 |
-
if text.startswith("\\"):
|
14 |
-
parts = text[1:].split("\\")
|
15 |
-
if len(parts) >= 2:
|
16 |
-
result.update({"command": parts[0], "password": parts[1], "content": "\\".join(parts[2:])})
|
17 |
-
else:
|
18 |
-
result["command"] = parts[0]
|
19 |
-
for kw in keywords:
|
20 |
-
if kw in text.lower():
|
21 |
-
result["vehicle"] = kw
|
22 |
-
break
|
23 |
-
return result
|
24 |
-
|
25 |
-
def detect_vehicle(text):
|
26 |
-
return [text] if text else []
|
27 |
-
|
28 |
-
def decide_action(parsed, history):
|
29 |
-
if not parsed['message_text'] and not parsed['attachments']:
|
30 |
-
return "invalidinput"
|
31 |
-
if not history:
|
32 |
-
if parsed['command']:
|
33 |
-
return "insert&askwithcommand"
|
34 |
-
elif parsed['vehicle']:
|
35 |
-
return "insert&responsenocommand"
|
36 |
-
else:
|
37 |
-
return "insert&asknocommand"
|
38 |
-
else:
|
39 |
-
if parsed['command'] == "xong":
|
40 |
-
return "update&responsewithcommand"
|
41 |
-
elif parsed['command']:
|
42 |
-
return "invalidcommand"
|
43 |
-
elif parsed['vehicle']:
|
44 |
-
return "update&asknocommand"
|
45 |
-
else:
|
46 |
-
return "update&askwithcommand"
|
47 |
-
|
48 |
-
def format_answer(results):
|
49 |
-
if not results:
|
50 |
-
return "Không có kết quả phù hợp!"
|
51 |
-
top = max(results, key=lambda r: r.get("similarity", 0))
|
52 |
-
lines = [f"Thực hiện hành vi: {top.get('tieude', '').strip()} {top.get('noidung', '').strip()}"]
|
53 |
-
if top.get("canhantu"):
|
54 |
-
lines.append(f"Cá nhân bị phạt từ {top['canhantu']} đến {top.get('canhanden', '')} VNĐ")
|
55 |
-
if top.get("tochuctu"):
|
56 |
-
lines.append(f"Tổ chức bị phạt từ {top['tochuctu']} đến {top.get('tochucden', '')} VNĐ")
|
57 |
-
if top.get("hpbsnoidung"):
|
58 |
-
lines.append(f"Hình phạt bổ sung: {top['hpbsnoidung']}")
|
59 |
-
if top.get("bpkpnoidung"):
|
60 |
-
lines.append(f"Biện pháp khắc phục: {top['bpkpnoidung']}")
|
61 |
-
return "\n".join(lines)
|
62 |
-
|
63 |
-
def get_instruction(action):
|
64 |
-
return {
|
65 |
-
"invalidinput": "Bạn chưa cung cấp nội dung hợp lệ!",
|
66 |
-
"insert&asknocommand": "Bạn có thể gửi thêm hình ảnh hoặc gõ '\\xong' để hoàn tất.",
|
67 |
-
"insert&askwithcommand": "Cảm ơn! Bạn muốn thực hiện hành động nào khác không?",
|
68 |
-
"update&asknocommand": "Hãy tiếp tục gửi thông tin phương tiện.",
|
69 |
-
"update&askwithcommand": "Vui lòng hoàn tất câu lệnh hoặc xác nhận bằng '\\xong'.",
|
70 |
-
"invalidcommand": "Lệnh bạn gửi chưa được hỗ trợ!"
|
71 |
-
}.get(action, "Tôi chưa hiểu ý bạn lắm. Bạn có thể diễn đạt lại?")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/vector_search.py
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
import httpx
|
2 |
-
import config
|
3 |
-
|
4 |
-
def query(embedding, vehicle_columns):
|
5 |
-
url = f"{config.SUPABASE_URL}/rest/v1/rpc/match_documents"
|
6 |
-
headers = {
|
7 |
-
"apikey": config.SUPABASE_KEY,
|
8 |
-
"Authorization": f"Bearer {config.SUPABASE_KEY}",
|
9 |
-
"Content-Type": "application/json"
|
10 |
-
}
|
11 |
-
json_body = {
|
12 |
-
"query_embedding": embedding,
|
13 |
-
"match_threshold": 0.3,
|
14 |
-
"match_count": 5,
|
15 |
-
"vehicle_filters": vehicle_columns
|
16 |
-
}
|
17 |
-
response = httpx.post(url, headers=headers, json=json_body)
|
18 |
-
return response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,9 +1,14 @@
|
|
1 |
-
fastapi
|
2 |
-
uvicorn
|
3 |
-
python-dotenv
|
4 |
-
httpx
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi==0.104.1
|
2 |
+
uvicorn==0.24.0
|
3 |
+
python-dotenv==1.0.0
|
4 |
+
httpx>=0.24.0,<0.25.0
|
5 |
+
loguru==0.7.2
|
6 |
+
google-auth==2.23.4
|
7 |
+
google-auth-oauthlib==1.1.0
|
8 |
+
google-auth-httplib2==0.1.1
|
9 |
+
google-api-python-client==2.108.0
|
10 |
+
supabase==2.0.3
|
11 |
+
numpy==1.26.2
|
12 |
+
python-multipart==0.0.6
|
13 |
+
tenacity==8.2.3
|
14 |
+
pydantic-settings
|