Spaces:

VietCat
/

FBChatBot

Running

App Files Files Community

VietCat commited on 11 days ago

Commit

c3199eb

1 Parent(s): 906da16

add chunker

Browse files

Files changed (4) hide show

app/law_document_chunker.py +372 -0
app/main.py +198 -0
app/supabase_db.py +67 -0
data/ND168-2024.txt +0 -0

app/law_document_chunker.py ADDED Viewed

	@@ -0,0 +1,372 @@

+import re
+import os
+import uuid
+from typing import List, Dict, Optional, Tuple
+from dataclasses import dataclass
+from loguru import logger
+from .supabase_db import SupabaseClient
+from .embedding import EmbeddingClient
+from .config import get_settings
+@dataclass
+class ChunkMetadata:
+    """Metadata cho một chunk."""
+    id: str
+    content: str
+    vanbanid: int
+    cha: Optional[str] = None
+    document_title: str = ""
+    article_number: Optional[int] = None
+    article_title: str = ""
+    clause_number: str = ""
+    sub_clause_letter: str = ""
+    context_summary: str = ""
+class LawDocumentChunker:
+    """Module xử lý chunking văn bản luật và tích hợp với Supabase."""
+    def __init__(self):
+        """Khởi tạo chunker với các regex patterns."""
+        settings = get_settings()
+        self.supabase_client = SupabaseClient(settings.supabase_url, settings.supabase_key)
+        self.embedding_client = EmbeddingClient()
+        # Regex patterns cho các cấp độ cấu trúc
+        self.PHAN_REGEX = r"(Phần|PHẦN|Phần thứ)\s+(\d+|[IVXLCDM]+|nhất|hai|ba|tư|năm|sáu|bảy|tám|chín|mười)\.?\s*\n"
+        self.PHU_LUC_REGEX = r"(Phụ lục|PHỤ LỤC)\s+(\d+|[A-Z]+)\.?\s*\n"
+        self.CHUONG_REGEX = r"(Chương|CHƯƠNG)\s+(\d+|[IVXLCDM]+)\.?\s*.*\n"
+        self.MUC_REGEX = r"(Mục|MỤC)\s+\d+\.?\s*.*\n"
+        self.DIEU_REGEX = r"Điều\s+(\d+)\.\s*(.*)"
+        self.KHOAN_REGEX = r"^\s*(\d+(\.\d+)*)\.\s*(.*)"
+        self.DIEM_REGEX_A = r"^\s*([a-zđ])\)\s*(.*)"
+        self.DIEM_REGEX_NUM = r"^\s*(\d+\.\d+\.\d+)\.\s*(.*)"
+        # Cấu hình chunking
+        self.CHUNK_SIZE = 500
+        self.CHUNK_OVERLAP = 100
+        logger.info("[CHUNKER] Initialized LawDocumentChunker")
+    def _create_data_directory(self):
+        """Tạo thư mục data nếu chưa tồn tại."""
+        data_dir = "data"
+        if not os.path.exists(data_dir):
+            os.makedirs(data_dir)
+            logger.info(f"[CHUNKER] Created directory: {data_dir}")
+        return data_dir
+    def _extract_document_title(self, file_path: str) -> str:
+        """Trích xuất tiêu đề văn bản từ tên file."""
+        filename = os.path.basename(file_path)
+        # Loại bỏ extension
+        name_without_ext = os.path.splitext(filename)[0]
+        # Thay _ bằng khoảng trắng và viết hoa chữ cái đầu
+        title = name_without_ext.replace('_', ' ').title()
+        logger.info(f"[CHUNKER] Extracted document title: {title}")
+        return title
+    def _read_document(self, file_path: str) -> str:
+        """Đọc nội dung văn bản từ file."""
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+            logger.info(f"[CHUNKER] Read document: {file_path}, length: {len(content)}")
+            return content
+        except Exception as e:
+            logger.error(f"[CHUNKER] Error reading file {file_path}: {e}")
+            raise
+    def _detect_structure_level(self, line: str) -> Tuple[str, Optional[str], Optional[str]]:
+        """Phát hiện cấp độ cấu trúc của một dòng."""
+        line = line.strip()
+        # Phần
+        match = re.match(self.PHAN_REGEX, line, re.IGNORECASE)
+        if match:
+            return "PHAN", match.group(1), match.group(2)
+        # Phụ lục
+        match = re.match(self.PHU_LUC_REGEX, line, re.IGNORECASE)
+        if match:
+            return "PHU_LUC", match.group(1), match.group(2)
+        # Chương
+        match = re.match(self.CHUONG_REGEX, line, re.IGNORECASE)
+        if match:
+            return "CHUONG", match.group(1), match.group(2)
+        # Mục
+        match = re.match(self.MUC_REGEX, line, re.IGNORECASE)
+        if match:
+            return "MUC", match.group(1), match.group(2)
+        # Điều
+        match = re.match(self.DIEU_REGEX, line)
+        if match:
+            return "DIEU", match.group(1), match.group(2)
+        # Khoản
+        match = re.match(self.KHOAN_REGEX, line)
+        if match:
+            clause_num = match.group(1)
+            # Kiểm tra không phải điểm (có từ 3 số trở lên)
+            if len(clause_num.split('.')) < 3:
+                return "KHOAN", clause_num, match.group(3)
+        # Điểm chữ cái
+        match = re.match(self.DIEM_REGEX_A, line)
+        if match:
+            return "DIEM", match.group(1), match.group(2)
+        # Điểm số
+        match = re.match(self.DIEM_REGEX_NUM, line)
+        if match:
+            return "DIEM", match.group(1), match.group(2)
+        return "CONTENT", None, None
+    def _create_chunk_metadata(self, content: str, level: str, level_value: Optional[str],
+                              parent_id: Optional[str], vanbanid: int,
+                              document_title: str) -> ChunkMetadata:
+        """Tạo metadata cho chunk."""
+        chunk_id = str(uuid.uuid4())
+        metadata = ChunkMetadata(
+            id=chunk_id,
+            content=content,
+            vanbanid=vanbanid,
+            cha=parent_id,
+            document_title=document_title
+        )
+        # Điền metadata theo cấp độ
+        if level == "DIEU" and level_value:
+            metadata.article_number = int(level_value) if level_value.isdigit() else None
+            metadata.article_title = content.strip()
+        elif level == "KHOAN" and level_value:
+            metadata.clause_number = level_value
+        elif level == "DIEM" and level_value:
+            metadata.sub_clause_letter = level_value
+        return metadata
+    def _split_into_chunks(self, text: str, chunk_size: int, overlap: int) -> List[str]:
+        """Chia text thành các chunk với overlap."""
+        chunks = []
+        start = 0
+        while start < len(text):
+            end = start + chunk_size
+            chunk = text[start:end]
+            # Tìm vị trí kết thúc chunk tốt nhất (cuối câu hoặc cuối từ)
+            if end < len(text):
+                # Tìm dấu chấm hoặc xuống dòng gần nhất
+                last_period = chunk.rfind('.')
+                last_newline = chunk.rfind('\n')
+                best_break = max(last_period, last_newline)
+                if best_break > start + chunk_size * 0.7:  # Chỉ break nếu không quá sớm
+                    end = start + best_break + 1
+                    chunk = text[start:end]
+            chunks.append(chunk)
+            start = end - overlap
+            if start >= len(text):
+                break
+        return chunks
+    def _process_document_recursive(self, content: str, vanbanid: int,
+                                  document_title: str) -> List[ChunkMetadata]:
+        """Xử lý văn bản theo cấu trúc phân cấp."""
+        lines = content.split('\n')
+        chunks = []
+        parent_stack = []  # Stack để theo dõi parent IDs
+        current_parent = None
+        current_chunk_content = ""
+        current_level = "CONTENT"
+        current_level_value = None
+        for line in lines:
+            level, level_value, level_content = self._detect_structure_level(line)
+            # Nếu phát hiện cấp độ mới
+            if level != "CONTENT" and level_value:
+                # Lưu chunk hiện tại nếu có
+                if current_chunk_content.strip():
+                    metadata = self._create_chunk_metadata(
+                        current_chunk_content.strip(),
+                        current_level,
+                        current_level_value,
+                        current_parent,
+                        vanbanid,
+                        document_title
+                    )
+                    chunks.append(metadata)
+                    # Cập nhật parent stack
+                    if level in ["PHAN", "PHU_LUC", "CHUONG", "MUC"]:
+                        # Cấp độ cao, reset stack
+                        parent_stack = [metadata.id]
+                        current_parent = metadata.id
+                    elif level == "DIEU":
+                        # Điều thuộc về cấp độ cao nhất hiện tại
+                        current_parent = parent_stack[-1] if parent_stack else None
+                        parent_stack.append(metadata.id)
+                    elif level in ["KHOAN", "DIEM"]:
+                        # Khoản/Điểm thuộc về Điều hiện tại
+                        current_parent = parent_stack[-1] if parent_stack else None
+                # Bắt đầu chunk mới
+                current_chunk_content = line + "\n"
+                current_level = level
+                current_level_value = level_value
+            else:
+                # Thêm vào chunk hiện tại
+                current_chunk_content += line + "\n"
+                # Kiểm tra nếu chunk quá lớn
+                if len(current_chunk_content) > self.CHUNK_SIZE:
+                    # Chia chunk hiện tại
+                    sub_chunks = self._split_into_chunks(current_chunk_content, self.CHUNK_SIZE, self.CHUNK_OVERLAP)
+                    for i, sub_chunk in enumerate(sub_chunks):
+                        metadata = self._create_chunk_metadata(
+                            sub_chunk.strip(),
+                            current_level,
+                            current_level_value,
+                            current_parent,
+                            vanbanid,
+                            document_title
+                        )
+                        chunks.append(metadata)
+                    current_chunk_content = ""
+        # Lưu chunk cuối cùng
+        if current_chunk_content.strip():
+            metadata = self._create_chunk_metadata(
+                current_chunk_content.strip(),
+                current_level,
+                current_level_value,
+                current_parent,
+                vanbanid,
+                document_title
+            )
+            chunks.append(metadata)
+        logger.info(f"[CHUNKER] Created {len(chunks)} chunks from document")
+        return chunks
+    async def _create_embeddings_for_chunks(self, chunks: List[ChunkMetadata]) -> List[Dict]:
+        """Tạo embeddings cho các chunks."""
+        logger.info(f"[CHUNKER] Creating embeddings for {len(chunks)} chunks")
+        chunk_data = []
+        for chunk in chunks:
+            try:
+                # Tạo embedding
+                embedding = await self.embedding_client.create_embedding(chunk.content)
+                # Chuẩn bị data cho Supabase
+                chunk_dict = {
+                    'id': chunk.id,
+                    'content': chunk.content,
+                    'embedding': embedding,
+                    'vanbanid': chunk.vanbanid,
+                    'cha': chunk.cha,
+                    'document_title': chunk.document_title,
+                    'article_number': chunk.article_number,
+                    'article_title': chunk.article_title,
+                    'clause_number': chunk.clause_number,
+                    'sub_clause_letter': chunk.sub_clause_letter,
+                    'context_summary': chunk.context_summary
+                }
+                chunk_data.append(chunk_dict)
+                logger.debug(f"[CHUNKER] Created embedding for chunk {chunk.id[:8]}...")
+            except Exception as e:
+                logger.error(f"[CHUNKER] Error creating embedding for chunk {chunk.id}: {e}")
+                continue
+        logger.info(f"[CHUNKER] Successfully created embeddings for {len(chunk_data)} chunks")
+        return chunk_data
+    async def _store_chunks_to_supabase(self, chunk_data: List[Dict]) -> bool:
+        """Lưu chunks vào Supabase."""
+        try:
+            logger.info(f"[CHUNKER] Storing {len(chunk_data)} chunks to Supabase")
+            # Lưu từng chunk
+            for chunk in chunk_data:
+                success = self.supabase_client.store_document_chunk(chunk)
+                if not success:
+                    logger.error(f"[CHUNKER] Failed to store chunk {chunk['id']}")
+                    return False
+            logger.info(f"[CHUNKER] Successfully stored all chunks to Supabase")
+            return True
+        except Exception as e:
+            logger.error(f"[CHUNKER] Error storing chunks to Supabase: {e}")
+            return False
+    async def process_law_document(self, file_path: str, document_id: int) -> bool:
+        """
+        Hàm chính để xử lý văn bản luật.
+        Args:
+            file_path: Đường dẫn đến file văn bản luật
+            document_id: ID duy nhất của văn bản luật
+        Returns:
+            bool: True nếu thành công, False nếu thất bại
+        """
+        try:
+            logger.info(f"[CHUNKER] Starting processing for file: {file_path}, document_id: {document_id}")
+            # 1. Tạo thư mục data nếu cần
+            self._create_data_directory()
+            # 2. Kiểm tra file tồn tại
+            if not os.path.exists(file_path):
+                logger.error(f"[CHUNKER] File not found: {file_path}")
+                return False
+            # 3. Đọc văn bản
+            content = self._read_document(file_path)
+            # 4. Trích xuất tiêu đề
+            document_title = self._extract_document_title(file_path)
+            # 5. Xử lý chunking theo cấu trúc
+            chunks = self._process_document_recursive(content, document_id, document_title)
+            if not chunks:
+                logger.warning(f"[CHUNKER] No chunks created for document {document_id}")
+                return False
+            # 6. Tạo embeddings
+            chunk_data = await self._create_embeddings_for_chunks(chunks)
+            if not chunk_data:
+                logger.error(f"[CHUNKER] No embeddings created for document {document_id}")
+                return False
+            # 7. Lưu vào Supabase
+            success = await self._store_chunks_to_supabase(chunk_data)
+            if success:
+                logger.info(f"[CHUNKER] Successfully processed document {document_id} with {len(chunk_data)} chunks")
+            else:
+                logger.error(f"[CHUNKER] Failed to store chunks for document {document_id}")
+            return success
+        except Exception as e:
+            logger.error(f"[CHUNKER] Error processing document {document_id}: {e}")
+            return False

app/main.py CHANGED Viewed

@@ -20,6 +20,7 @@ from .health import router as health_router
 from .llm import create_llm_client
 from .reranker import Reranker
 from .request_limit_manager import RequestLimitManager
 app = FastAPI(title="WeBot Facebook Messenger API")
@@ -74,6 +75,9 @@ llm_client = create_llm_client(
 reranker = Reranker()
 logger.info("[STARTUP] Mount health router...")
 app.include_router(health_router)
@@ -526,6 +530,200 @@ async def create_facebook_post(page_token: str, sender_id: str, history: List[Di
     logger.info(f"[MOCK] Creating Facebook post for sender_id={sender_id} with history={history}")
     return "https://facebook.com/mock_post_url"
 if __name__ == "__main__":
     import uvicorn
     logger.info("[STARTUP] Bắt đầu chạy uvicorn server...")

 from .llm import create_llm_client
 from .reranker import Reranker
 from .request_limit_manager import RequestLimitManager
+from .law_document_chunker import LawDocumentChunker
 app = FastAPI(title="WeBot Facebook Messenger API")
 reranker = Reranker()
+# Khởi tạo LawDocumentChunker
+law_chunker = LawDocumentChunker()
 logger.info("[STARTUP] Mount health router...")
 app.include_router(health_router)
     logger.info(f"[MOCK] Creating Facebook post for sender_id={sender_id} with history={history}")
     return "https://facebook.com/mock_post_url"
+# ==================== DOCUMENT CHUNK MANAGEMENT APIs ====================
+@app.delete("/api/document-chunks/clear")
+@timing_decorator_async
+async def delete_all_document_chunks():
+    """
+    API xóa toàn bộ bảng document_chunks.
+    """
+    try:
+        logger.info("[API] Starting delete all document chunks")
+        success = supabase_client.delete_all_document_chunks()
+        if success:
+            logger.info("[API] Successfully deleted all document chunks")
+            return {"status": "success", "message": "Đã xóa toàn bộ document chunks"}
+        else:
+            logger.error("[API] Failed to delete all document chunks")
+            raise HTTPException(status_code=500, detail="Lỗi khi xóa document chunks")
+    except Exception as e:
+        logger.error(f"[API] Error in delete_all_document_chunks: {e}")
+        raise HTTPException(status_code=500, detail=f"Lỗi: {str(e)}")
+@app.post("/api/document-chunks/update")
+@timing_decorator_async
+async def update_specific_document(file_name: str, document_id: int):
+    """
+    API cập nhật file xác định trong thư mục data.
+    Args:
+        file_name: Tên file trong thư mục data (ví dụ: "luat_giao_thong.txt")
+        document_id: ID văn bản luật
+    """
+    try:
+        logger.info(f"[API] Starting update specific document: {file_name}, document_id: {document_id}")
+        # Kiểm tra file tồn tại
+        file_path = f"data/{file_name}"
+        if not os.path.exists(file_path):
+            logger.error(f"[API] File not found: {file_path}")
+            raise HTTPException(status_code=404, detail=f"File không tồn tại: {file_name}")
+        # Xóa chunks cũ của document_id này (nếu có)
+        logger.info(f"[API] Deleting old chunks for document_id: {document_id}")
+        supabase_client.delete_document_chunks_by_vanbanid(document_id)
+        # Xử lý văn bản mới
+        logger.info(f"[API] Processing document: {file_path}")
+        success = await law_chunker.process_law_document(file_path, document_id)
+        if success:
+            logger.info(f"[API] Successfully updated document: {file_name}")
+            return {
+                "status": "success",
+                "message": f"Đã cập nhật thành công văn bản: {file_name}",
+                "document_id": document_id,
+                "file_name": file_name
+            }
+        else:
+            logger.error(f"[API] Failed to update document: {file_name}")
+            raise HTTPException(status_code=500, detail=f"Lỗi khi xử lý văn bản: {file_name}")
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[API] Error in update_specific_document: {e}")
+        raise HTTPException(status_code=500, detail=f"Lỗi: {str(e)}")
+@app.post("/api/document-chunks/update-all")
+@timing_decorator_async
+async def update_all_documents():
+    """
+    API cập nhật tự động toàn bộ file trong thư mục data.
+    """
+    try:
+        logger.info("[API] Starting update all documents")
+        # Kiểm tra thư mục data tồn tại
+        data_dir = "data"
+        if not os.path.exists(data_dir):
+            logger.warning(f"[API] Data directory not found: {data_dir}")
+            return {
+                "status": "warning",
+                "message": "Thư mục data không tồn tại",
+                "processed_files": [],
+                "failed_files": []
+            }
+        # Lấy danh sách file .txt trong thư mục data
+        txt_files = [f for f in os.listdir(data_dir) if f.endswith('.txt')]
+        if not txt_files:
+            logger.warning("[API] No .txt files found in data directory")
+            return {
+                "status": "warning",
+                "message": "Không tìm thấy file .txt nào trong thư mục data",
+                "processed_files": [],
+                "failed_files": []
+            }
+        logger.info(f"[API] Found {len(txt_files)} .txt files to process")
+        processed_files = []
+        failed_files = []
+        # Xử lý từng file
+        for i, file_name in enumerate(txt_files, 1):
+            try:
+                logger.info(f"[API] Processing file {i}/{len(txt_files)}: {file_name}")
+                # Sử dụng index làm document_id (có thể thay đổi logic này)
+                document_id = i
+                # Xóa chunks cũ của document_id này (nếu có)
+                supabase_client.delete_document_chunks_by_vanbanid(document_id)
+                # Xử lý văn bản
+                file_path = os.path.join(data_dir, file_name)
+                success = await law_chunker.process_law_document(file_path, document_id)
+                if success:
+                    processed_files.append({
+                        "file_name": file_name,
+                        "document_id": document_id,
+                        "status": "success"
+                    })
+                    logger.info(f"[API] Successfully processed: {file_name}")
+                else:
+                    failed_files.append({
+                        "file_name": file_name,
+                        "document_id": document_id,
+                        "status": "failed",
+                        "error": "Processing failed"
+                    })
+                    logger.error(f"[API] Failed to process: {file_name}")
+            except Exception as e:
+                logger.error(f"[API] Error processing {file_name}: {e}")
+                failed_files.append({
+                    "file_name": file_name,
+                    "document_id": i,
+                    "status": "failed",
+                    "error": str(e)
+                })
+        # Tổng kết
+        total_files = len(txt_files)
+        success_count = len(processed_files)
+        failed_count = len(failed_files)
+        logger.info(f"[API] Update all completed: {success_count}/{total_files} files processed successfully")
+        return {
+            "status": "success",
+            "message": f"Đã xử lý {success_count}/{total_files} files thành công",
+            "total_files": total_files,
+            "processed_files": processed_files,
+            "failed_files": failed_files
+        }
+    except Exception as e:
+        logger.error(f"[API] Error in update_all_documents: {e}")
+        raise HTTPException(status_code=500, detail=f"Lỗi: {str(e)}")
+@app.get("/api/document-chunks/status")
+@timing_decorator_async
+async def get_document_chunks_status():
+    """
+    API lấy thông tin trạng thái của document chunks.
+    """
+    try:
+        logger.info("[API] Getting document chunks status")
+        # Lấy thống kê từ Supabase
+        # Note: Cần implement method này trong SupabaseClient nếu cần
+        # Kiểm tra thư mục data
+        data_dir = "data"
+        txt_files = []
+        if os.path.exists(data_dir):
+            txt_files = [f for f in os.listdir(data_dir) if f.endswith('.txt')]
+        return {
+            "status": "success",
+            "data_directory": data_dir,
+            "available_files": txt_files,
+            "file_count": len(txt_files),
+            "message": f"Tìm thấy {len(txt_files)} file .txt trong thư mục data"
+        }
+    except Exception as e:
+        logger.error(f"[API] Error in get_document_chunks_status: {e}")
+        raise HTTPException(status_code=500, detail=f"Lỗi: {str(e)}")
 if __name__ == "__main__":
     import uvicorn
     logger.info("[STARTUP] Bắt đầu chạy uvicorn server...")

app/supabase_db.py CHANGED Viewed

@@ -76,4 +76,71 @@ class SupabaseClient:
             return bool(response.data)
         except Exception as e:
             logger.error(f"Error storing embedding: {e}")
             return False

             return bool(response.data)
         except Exception as e:
             logger.error(f"Error storing embedding: {e}")
+            return False
+    @timing_decorator_sync
+    def store_document_chunk(self, chunk_data: Dict[str, Any]) -> bool:
+        """
+        Lưu document chunk vào Supabase.
+        Input: chunk_data (dict) - chứa tất cả thông tin chunk
+        Output: bool (True nếu thành công, False nếu lỗi)
+        """
+        try:
+            response = self.client.table('document_chunks').insert(chunk_data).execute()
+            if response.data:
+                logger.info(f"Successfully stored chunk {chunk_data.get('id', 'unknown')}")
+                return True
+            else:
+                logger.error(f"Failed to store chunk {chunk_data.get('id', 'unknown')}")
+                return False
+        except Exception as e:
+            logger.error(f"Error storing document chunk: {e}")
+            return False
+    @timing_decorator_sync
+    def delete_all_document_chunks(self) -> bool:
+        """
+        Xóa toàn bộ bảng document_chunks.
+        Output: bool (True nếu thành công, False nếu lỗi)
+        """
+        try:
+            response = self.client.table('document_chunks').delete().neq('id', '').execute()
+            logger.info(f"Successfully deleted all document chunks")
+            return True
+        except Exception as e:
+            logger.error(f"Error deleting all document chunks: {e}")
+            return False
+    @timing_decorator_sync
+    def get_document_chunks_by_vanbanid(self, vanbanid: int) -> List[Dict[str, Any]]:
+        """
+        Lấy tất cả chunks của một văn bản theo vanbanid.
+        Input: vanbanid (int)
+        Output: List[Dict] - danh sách chunks
+        """
+        try:
+            response = self.client.table('document_chunks').select('*').eq('vanbanid', vanbanid).execute()
+            if response.data:
+                logger.info(f"Found {len(response.data)} chunks for vanbanid {vanbanid}")
+                return response.data
+            return []
+        except Exception as e:
+            logger.error(f"Error getting document chunks for vanbanid {vanbanid}: {e}")
+            return []
+    @timing_decorator_sync
+    def delete_document_chunks_by_vanbanid(self, vanbanid: int) -> bool:
+        """
+        Xóa tất cả chunks của một văn bản theo vanbanid.
+        Input: vanbanid (int)
+        Output: bool (True nếu thành công, False nếu lỗi)
+        """
+        try:
+            response = self.client.table('document_chunks').delete().eq('vanbanid', vanbanid).execute()
+            logger.info(f"Successfully deleted all chunks for vanbanid {vanbanid}")
+            return True
+        except Exception as e:
+            logger.error(f"Error deleting chunks for vanbanid {vanbanid}: {e}")
             return False

data/ND168-2024.txt ADDED Viewed

The diff for this file is too large to render. See raw diff