fisherman611 commited on
Commit
ee4b9a9
·
verified ·
1 Parent(s): 9c9785e

Update config.py

Browse files
Files changed (1) hide show
  1. config.py +10 -6
config.py CHANGED
@@ -14,10 +14,12 @@ class Config:
14
  # QDrant Configuration
15
  QDRANT_URL = os.getenv("QDRANT_URL")
16
  QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
17
- COLLECTION_NAME = "final_vietnamese_legal_corpus"
18
-
19
  # Embedding configuration
20
- EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
 
 
 
21
 
22
  # Text Processing Configuration
23
  CHUNK_SIZE = 512
@@ -29,14 +31,16 @@ class Config:
29
  STOPWORDS_PATH = "stopwords.txt"
30
 
31
  # RAG Configuration
32
- TOP_K_RETRIEVAL = 15
33
  BM25_TOP_K = 20
 
 
34
  SIMILARITY_THRESHOLD = 0.25
35
 
36
  # Reranker Configuration
37
  ENABLE_RERANKING = True
38
  RERANKER_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
39
- RERANKER_TOP_K = 10
40
  RERANK_BEFORE_RETRIEVAL_TOP_K = 25
41
  USE_SCORE_FUSION = True
42
  RERANKER_FUSION_ALPHA = 0.8
@@ -94,4 +98,4 @@ Thông tin tham khảo:
94
 
95
  Câu hỏi: {question}
96
 
97
- Trả lời:"""
 
14
  # QDrant Configuration
15
  QDRANT_URL = os.getenv("QDRANT_URL")
16
  QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
17
+
 
18
  # Embedding configuration
19
+ # EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
20
+ EMBEDDING_MODEL = "bkai-foundation-models/vietnamese-bi-encoder"
21
+
22
+ COLLECTION_NAME = "final_vietnamese_legal_corpus" if EMBEDDING_MODEL == "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" else "bkai_biencoder_vietnamese_legal_corpus"
23
 
24
  # Text Processing Configuration
25
  CHUNK_SIZE = 512
 
31
  STOPWORDS_PATH = "stopwords.txt"
32
 
33
  # RAG Configuration
34
+ TOP_K_RETRIEVAL = 20
35
  BM25_TOP_K = 20
36
+ BM25_B = 0.65
37
+ BM25_K1 = 1.2
38
  SIMILARITY_THRESHOLD = 0.25
39
 
40
  # Reranker Configuration
41
  ENABLE_RERANKING = True
42
  RERANKER_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
43
+ RERANKER_TOP_K = 20
44
  RERANK_BEFORE_RETRIEVAL_TOP_K = 25
45
  USE_SCORE_FUSION = True
46
  RERANKER_FUSION_ALPHA = 0.8
 
98
 
99
  Câu hỏi: {question}
100
 
101
+ Trả lời:"""