Spaces:

VietCat
/

FBChatBot

Running

App Files Files Community

VietCat commited on 7 days ago

Commit

f6cdf9d

1 Parent(s): e68c212

refactor chat channel structure

Browse files

Files changed (3) hide show

app/channel_manager.py +13 -0
app/chat_channel.py +32 -0
app/message_processor.py +355 -0

app/channel_manager.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from app.chat_channel import ChatChannel
+class ChannelManager:
+    def __init__(self):
+        self.channels = {}  # {(channel_type, page_id): ChatChannel}
+    def get_or_create_channel(self, channel_type: str, page_id: str) -> ChatChannel:
+        key = (channel_type, page_id)
+        if key not in self.channels:
+            self.channels[key] = ChatChannel(page_id, channel_type)
+        return self.channels[key]
+channel_manager = ChannelManager()

app/chat_channel.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from .supabase_db import SupabaseClient
+from .llm import create_llm_client
+from .reranker import Reranker
+from .embedding import EmbeddingClient
+from .facebook import FacebookClient
+from .config import get_settings
+from app.message_processor import MessageProcessor
+class ChatChannel:
+    def __init__(self, page_id: str, channel_type: str = "facebook"):
+        self.channel_type = channel_type
+        self.page_id = page_id
+        self.page_token = None  # cache token
+        settings = get_settings()
+        self.supabase = SupabaseClient(settings.supabase_url, settings.supabase_key)
+        self.llm = create_llm_client(
+            provider="gemini",
+            base_url=settings.gemini_base_url,
+            model=settings.gemini_models_list[0] if settings.gemini_models_list else "gemini-2.5-flash"
+        )
+        self.reranker = Reranker()
+        self.embedder = EmbeddingClient()
+        self.facebook = FacebookClient(settings.facebook_app_secret)
+        self.message_processor = MessageProcessor(self)
+    def get_page_token(self, force_refresh=False):
+        if self.page_token is None or force_refresh:
+            self.page_token = self.supabase.get_page_token(self.page_id)
+        return self.page_token
+    def invalidate_page_token(self):
+        self.page_token = None

app/message_processor.py ADDED Viewed

	@@ -0,0 +1,355 @@

+from typing import Dict, Any, List
+import asyncio
+import traceback
+from loguru import logger
+class MessageProcessor:
+    def __init__(self, channel):
+        self.channel = channel
+    async def process_message(self, message_data: Dict[str, Any]):
+        # Refactor logic từ main.py vào đây
+        # Lưu ý: self.channel.supabase, self.channel.llm, ...
+        if not message_data or not isinstance(message_data, dict):
+            logger.error(f"[ERROR] Invalid message_data: {message_data}")
+            return
+        required_fields = ["sender_id", "page_id", "text", "timestamp"]
+        for field in required_fields:
+            if field not in message_data:
+                logger.error(f"[ERROR] Missing field {field} in message_data: {message_data}")
+                return
+        sender_id = message_data["sender_id"]
+        page_id = message_data["page_id"]
+        message_text = message_data["text"]
+        timestamp = message_data["timestamp"]
+        attachments = message_data.get('attachments', [])
+        logger.bind(user_id=sender_id, page_id=page_id, message=message_text).info("Processing message")
+        # Nếu không có message_text và attachments, không xử lý
+        if not message_text and not attachments:
+            logger.info(f"[DEBUG] Không có message_text và attachments, không xử lý...")
+            return
+        # Get conversation history (run in thread pool)
+        loop = asyncio.get_event_loop()
+        sheets_client = self.channel.supabase  # Nếu bạn dùng Google Sheets, thay bằng self.channel.sheets nếu có
+        history = []
+        if hasattr(self.channel, 'sheets'):
+            history = await loop.run_in_executor(
+                None, lambda: self.channel.sheets.get_conversation_history(sender_id, page_id)
+            )
+        logger.info(f"[DEBUG] history: {history}")
+        log_kwargs = {
+            'conversation_id': None,
+            'recipient_id': sender_id,
+            'page_id': page_id,
+            'originaltext': message_text,
+            'originalcommand': '',
+            'originalcontent': '',
+            'originalattachments': attachments,
+            'originalvehicle': '',
+            'originalaction': '',
+            'originalpurpose': '',
+            'timestamp': [timestamp],
+            'isdone': False
+        }
+        logger.info(f"[DEBUG] Message cơ bản: {log_kwargs}")
+        conv = None
+        if history:
+            # 1. Chặn duplicate message (trùng sender_id, page_id, timestamp)
+            for row in history:
+                row_timestamps = self.flatten_timestamp(row.get('timestamp', []))
+                if isinstance(row_timestamps, list) and len(row_timestamps) == 1 and isinstance(row_timestamps[0], list):
+                    row_timestamps = row_timestamps[0]
+                if (
+                    str(timestamp) in [str(ts) for ts in row_timestamps]
+                    and str(row.get('recipient_id')) == str(sender_id)
+                    and str(row.get('page_id')) == str(page_id)
+                ):
+                    logger.info("[DUPLICATE] Message duplicate, skipping log.")
+                    return
+                conv = {
+                    'conversation_id': row.get('conversation_id'),
+                    'recipient_id': row.get('recipient_id'),
+                    'page_id': row.get('page_id'),
+                    'originaltext': row.get('originaltext'),
+                    'originalcommand': row.get('originalcommand'),
+                    'originalcontent': row.get('originalcontent'),
+                    'originalattachments': row.get('originalattachments'),
+                    'originalvehicle': row.get('originalvehicle'),
+                    'originalaction': row.get('originalaction'),
+                    'originalpurpose': row.get('originalpurpose'),
+                    'timestamp': row_timestamps,
+                    'isdone': row.get('isdone')
+                }
+        else:
+            # 2. Ghi conversation mới NGAY LẬP TỨC với thông tin cơ bản
+            if hasattr(self.channel, 'sheets'):
+                conv = await loop.run_in_executor(None, lambda: self.channel.sheets.log_conversation(**log_kwargs))
+            else:
+                conv = log_kwargs.copy()
+        if not conv:
+            logger.error("Không thể tạo conversation mới!")
+            return
+        else:
+            logger.info(f"[DEBUG] Message history: {conv}")
+            for key, value in log_kwargs.items():
+                if value not in (None, "", []) and conv.get(key) in (None, "", []):
+                    conv[key] = value
+        # Thêm timestamp mới nếu chưa có
+        conv['timestamp'] = self.flatten_timestamp(conv['timestamp'])
+        if timestamp not in conv['timestamp']:
+            conv['timestamp'].append(timestamp)
+        logger.info(f"[DEBUG] Message history sau update: {conv}")
+        if hasattr(self.channel, 'sheets'):
+            await loop.run_in_executor(None, lambda: self.channel.sheets.log_conversation(**conv))
+        # Get page access token (cache)
+        page_token = self.channel.get_page_token()
+        if page_token:
+            logger.info(f"[DEBUG] page_token: {page_token[:10]} ... {page_token[-10:]}")
+        else:
+            logger.info(f"[DEBUG] page_token: None")
+            logger.error(f"No access token found for page {message_data['page_id']}")
+            return
+        # Gửi message Facebook, nếu lỗi token expired thì invalidate và thử lại một lần
+        try:
+            await self.channel.facebook.send_message(page_token, message_data['sender_id'], "Ok, để mình check. Bạn chờ mình chút xíu nhé!")
+        except Exception as e:
+            if "expired" in str(e).lower():
+                logger.warning("[FACEBOOK] Token expired, invalidate and refresh")
+                self.channel.invalidate_page_token()
+                page_token = self.channel.get_page_token(force_refresh=True)
+                await self.channel.facebook.send_message(page_token, message_data['sender_id'], "Ok, để mình check. Bạn chờ mình chút xíu nhé!")
+            else:
+                raise
+        # Extract command and keywords
+        from app.utils import extract_command, extract_keywords
+        from app.constants import VEHICLE_KEYWORDS
+        command, remaining_text = extract_command(message_text)
+        # Sử dụng LLM để phân tích message_text và extract keywords, mục đích, hành vi vi phạm
+        llm_analysis = await self.channel.llm.analyze(message_text)
+        logger.info(f"[LLM][RAW] Kết quả trả về từ analyze: {llm_analysis}")
+        muc_dich = None
+        hanh_vi_vi_pham = None
+        if isinstance(llm_analysis, dict):
+            keywords = [self.normalize_vehicle_keyword(llm_analysis.get('phuong_tien', ''))]
+            muc_dich = llm_analysis.get('muc_dich')
+            hanh_vi_vi_pham = llm_analysis.get('hanh_vi_vi_pham')
+        elif isinstance(llm_analysis, list) and len(llm_analysis) > 0:
+            keywords = [self.normalize_vehicle_keyword(llm_analysis[0].get('phuong_tien', ''))]
+            muc_dich = llm_analysis[0].get('muc_dich')
+            hanh_vi_vi_pham = llm_analysis[0].get('hanh_vi_vi_pham')
+        else:
+            keywords = extract_keywords(message_text, VEHICLE_KEYWORDS)
+            hanh_vi_vi_pham = message_text
+            for kw in keywords:
+                hanh_vi_vi_pham = hanh_vi_vi_pham.replace(kw, "")
+            hanh_vi_vi_pham = hanh_vi_vi_pham.strip()
+        logger.info(f"[DEBUG] Phương tiện: {keywords} - Hành vi: {hanh_vi_vi_pham} - Mục đích: {muc_dich}")
+        await self.channel.facebook.send_message(page_token, sender_id, f"... đang tìm kiếm hành vi {hanh_vi_vi_pham} .....")
+        # 4. Update lại conversation với thông tin đầy đủ
+        update_kwargs = {
+            'conversation_id': conv['conversation_id'],
+            'recipient_id': sender_id,
+            'page_id': page_id,
+            'originaltext': message_text,
+            'originalcommand': command,
+            'originalcontent': remaining_text,
+            'originalattachments': attachments,
+            'originalvehicle': ','.join(keywords),
+            'originalaction': hanh_vi_vi_pham,
+            'originalpurpose': muc_dich,
+            'timestamp': self.flatten_timestamp(conv['timestamp']),
+            'isdone': False
+        }
+        for key, value in update_kwargs.items():
+            if value not in (None, "", []) and conv.get(key) in (None, "", []):
+                conv[key] = value
+        logger.info(f"[DEBUG] Message history update cuối cùng: {conv}")
+        # 5. Xử lý logic nghiệp vụ
+        response = await self.process_business_logic(conv, page_token)
+        logger.info(f"[DEBUG] Message history sau khi process: {conv}")
+        # 6. Gửi response và cập nhật final state
+        await self.channel.facebook.send_message(page_token, sender_id, response)
+        if hasattr(self.channel, 'sheets'):
+            await loop.run_in_executor(None, lambda: self.channel.sheets.log_conversation(**conv))
+        return
+    def flatten_timestamp(self, ts):
+        flat = []
+        for t in ts:
+            if isinstance(t, list):
+                flat.extend(self.flatten_timestamp(t))
+            else:
+                flat.append(t)
+        return flat
+    def normalize_vehicle_keyword(self, keyword: str) -> str:
+        from app.constants import VEHICLE_KEYWORDS
+        import difflib
+        if not keyword:
+            return ""
+        matches = difflib.get_close_matches(keyword.lower(), [k.lower() for k in VEHICLE_KEYWORDS], n=1, cutoff=0.6)
+        if matches:
+            for k in VEHICLE_KEYWORDS:
+                if k.lower() == matches[0]:
+                    return k
+        return keyword
+    async def process_business_logic(self, log_kwargs: Dict[str, Any], page_token: str) -> str:
+        command = log_kwargs.get('originalcommand', '')
+        vehicle = log_kwargs.get('originalvehicle', '')
+        action = log_kwargs.get('originalaction', '')
+        message = log_kwargs.get('originaltext', '')
+        # Tách vehicle thành list keywords
+        keywords = [kw.strip() for kw in vehicle.split(',') if kw.strip()]
+        if not command:
+            if keywords:
+                # Có thông tin phương tiện
+                if action:
+                    logger.info(f"[DEBUG] tạo embedding: {action}")
+                    embedding = await self.channel.embedder.create_embedding(action)
+                    logger.info(f"[DEBUG] embedding: {embedding[:5]} ... (total {len(embedding)})")
+                    matches = self.channel.supabase.match_documents(
+                        embedding,
+                        vehicle_keywords=keywords,
+                        user_question=action
+                    )
+                    logger.info(f"[DEBUG] matches: {matches}")
+                    if matches:
+                        response = await self.format_search_results(message, matches, page_token, log_kwargs['recipient_id'])
+                    else:
+                        response = "Xin lỗi, tôi không tìm thấy thông tin phù hợp."
+                else:
+                    logger.info(f"[DEBUG] Không có hành vi vi phạm: {message}")
+                    response = "Xin lỗi, tôi không tìm thấy thông tin về hành vi vi phạm trong câu hỏi của bạn."
+                log_kwargs['isdone'] = True
+            else:
+                # Không có thông tin phương tiện
+                response = "Vui lòng cho biết loại phương tiện bạn cần tìm (xe máy, ô tô...)"
+                log_kwargs['isdone'] = False
+        else:
+            # Có command
+            if command == "xong":
+                post_url = await self.create_facebook_post(page_token, log_kwargs['recipient_id'], [log_kwargs])
+                if post_url:
+                    response = f"Bài viết đã được tạo thành công! Bạn có thể xem tại: {post_url}"
+                else:
+                    response = "Đã xảy ra lỗi khi tạo bài viết. Vui lòng thử lại sau."
+                log_kwargs['isdone'] = True
+            else:
+                response = "Vui lòng cung cấp thêm thông tin và gõ lệnh \\xong khi hoàn tất."
+                log_kwargs['isdone'] = False
+        return response
+    async def format_search_results(self, question: str, matches: List[Dict[str, Any]], page_token: str, sender_id: str) -> str:
+        if not matches:
+            return "Không tìm thấy kết quả phù hợp."
+        await self.channel.facebook.send_message(page_token, sender_id, f"Tôi tìm thấy một số quy định rồi .....")
+        try:
+            reranked = await self.channel.reranker.rerank(question, matches, top_k=5)
+            if reranked:
+                matches = reranked
+        except Exception as e:
+            logger.error(f"[RERANK] Lỗi khi rerank: {e}")
+        top = None
+        top_result_text = ""
+        full_result_text = ""
+        def arr_to_str(arr, sep=", "):
+            if not arr:
+                return ""
+            if isinstance(arr, list):
+                return sep.join([str(x) for x in arr if x not in (None, "")])
+            return str(arr)
+        for i, match in enumerate(matches, 1):
+            if not top or (match.get('similarity', 0) > top.get('similarity', 0)):
+                top = match
+            full_result_text += f"\n{match.get('structure', '').strip()}:\n"
+            tieude = (match.get('tieude') or '').strip()
+            noidung = (match.get('noidung') or '').strip()
+            hanhvi = (tieude + "\n" + noidung).strip().replace('\n', ' ')
+            full_result_text += f"Thực hiện hành vi:\n{hanhvi}"
+            canhantu = arr_to_str(match.get('canhantu'))
+            canhanden = arr_to_str(match.get('canhanden'))
+            if canhantu or canhanden:
+                full_result_text += f"\nCá nhân sẽ bị phạt tiền từ {canhantu} VNĐ đến {canhanden} VNĐ"
+            tochuctu = arr_to_str(match.get('tochuctu'))
+            tochucden = arr_to_str(match.get('tochucden'))
+            if tochuctu or tochucden:
+                full_result_text += f"\nTổ chức sẽ bị phạt tiền từ {tochuctu} VNĐ đến {tochucden} VNĐ"
+            hpbsnoidung = arr_to_str(match.get('hpbsnoidung'), sep="; ")
+            if hpbsnoidung:
+                full_result_text += f"\nNgoài việc bị phạt tiền, người vi phạm còn bị: {hpbsnoidung}"
+            bpkpnoidung = arr_to_str(match.get('bpkpnoidung'), sep="; ")
+            if bpkpnoidung:
+                full_result_text += f"\nNgoài ra, người vi phạm còn bị buộc: {bpkpnoidung}"
+        if top and (top.get('tieude') or top.get('noidung')):
+            tieude = (top.get('tieude') or '').strip()
+            noidung = (top.get('noidung') or '').strip()
+            hanhvi = (tieude + "\n" + noidung).strip().replace('\n', ' ')
+            top_result_text += f"Thực hiện hành vi:\n{hanhvi}"
+            canhantu = arr_to_str(top.get('canhantu'))
+            canhanden = arr_to_str(top.get('canhanden'))
+            if canhantu or canhanden:
+                top_result_text += f"\nCá nhân sẽ bị phạt tiền từ {canhantu} VNĐ đến {canhanden} VNĐ"
+            tochuctu = arr_to_str(top.get('tochuctu'))
+            tochucden = arr_to_str(top.get('tochucden'))
+            if tochuctu or tochucden:
+                top_result_text += f"\nTổ chức sẽ bị phạt tiền từ {tochuctu} VNĐ đến {tochucden} VNĐ"
+            hpbsnoidung = arr_to_str(top.get('hpbsnoidung'), sep="; ")
+            if hpbsnoidung:
+                top_result_text += f"\nNgoài việc bị phạt tiền, người vi phạm còn bị: {hpbsnoidung}"
+            bpkpnoidung = arr_to_str(top.get('bpkpnoidung'), sep="; ")
+            if bpkpnoidung:
+                top_result_text += f"\nNgoài ra, người vi phạm còn bị buộc: {bpkpnoidung}"
+        else:
+            result_text = "Không có kết quả phù hợp!"
+        prompt = (
+            "Bạn là một trợ lý AI có kiến thức pháp luật, hãy trả lời câu hỏi dựa trên các đoạn luật sau. "
+            "Chỉ sử dụng thông tin có trong các đoạn, không tự đoán.\n"
+            f"\nCác đoạn luật liên quan:\n{full_result_text}"
+            "\n\nHãy trả lời ngắn gọn, dễ hiểu, trích dẫn rõ ràng thông tin từ các đoạn luật nếu cần."
+            f"\n\nCâu hỏi của người dùng: {question}\n"
+        )
+        await self.channel.facebook.send_message(page_token, sender_id, f"Được rồi, để tôi tóm tắt lại nhé .....")
+        try:
+            answer = await self.channel.llm.generate_text(prompt)
+            if answer and answer.strip():
+                logger.error(f"LLM trả về câu trả lời: \n\tanswer: {answer}")
+                return answer.strip()
+            else:
+                logger.error(f"LLM không trả về câu trả lời phù hợp: \n\tanswer: {answer}")
+        except Exception as e:
+            logger.error(f"LLM không sẵn sàng: {e}\n{traceback.format_exc()}")
+        fallback = "Tóm tắt các đoạn luật liên quan:\n\n"
+        for i, match in enumerate(matches, 1):
+            fallback += f"Đoạn {i}:\n"
+            tieude = (match.get('tieude') or '').strip()
+            noidung = (match.get('noidung') or '').strip()
+            if tieude or noidung:
+                fallback += f"  - Hành vi: {(tieude + ' ' + noidung).strip()}\n"
+            canhantu = arr_to_str(match.get('canhantu'))
+            canhanden = arr_to_str(match.get('canhanden'))
+            if canhantu or canhanden:
+                fallback += f"  - Cá nhân bị phạt tiền từ {canhantu} VNĐ đến {canhanden} VNĐ\n"
+            tochuctu = arr_to_str(match.get('tochuctu'))
+            tochucden = arr_to_str(match.get('tochucden'))
+            if tochuctu or tochucden:
+                fallback += f"  - Tổ chức bị phạt tiền từ {tochuctu} VNĐ đến {tochucden} VNĐ\n"
+            hpbsnoidung = arr_to_str(match.get('hpbsnoidung'), sep="; ")
+            if hpbsnoidung:
+                fallback += f"  - Hình phạt bổ sung: {hpbsnoidung}\n"
+            bpkpnoidung = arr_to_str(match.get('bpkpnoidung'), sep="; ")
+            if bpkpnoidung:
+                fallback += f"  - Biện pháp khắc phục hậu quả: {bpkpnoidung}\n"
+            fallback += "\n"
+        return fallback.strip()
+    async def create_facebook_post(self, page_token: str, sender_id: str, history: List[Dict[str, Any]]) -> str:
+        logger.info(f"[MOCK] Creating Facebook post for sender_id={sender_id} with history={history}")
+        return "https://facebook.com/mock_post_url"