yuchenlin commited on
Commit
8a7cd1b
1 Parent(s): e5cf043

Update BM25S model

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  corpus.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  corpus.jsonl filter=lfs diff=lfs merge=lfs -text
37
+ corpus.mmindex.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -92,9 +92,9 @@ This dataset was created using the following data:
92
 
93
  | Statistic | Value |
94
  | --- | --- |
95
- | Number of documents | 1000000 |
96
- | Number of tokens | 8343647 |
97
- | Average tokens per document | 8.34 |
98
 
99
  ## Parameters
100
 
 
92
 
93
  | Statistic | Value |
94
  | --- | --- |
95
+ | Number of documents | 920259 |
96
+ | Number of tokens | 7882267 |
97
+ | Average tokens per document | 8.57 |
98
 
99
  ## Parameters
100
 
corpus.jsonl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dedb8f127bc5ae7f520c88ea128375cdfa7b795fb3b653a749a6e8fc581a8914
3
- size 99478681
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cfe6b7642bef860a82d624ce38a81b31507c63752c3f21b7e1cacd4b30c8b81
3
+ size 2212195225
corpus.mmindex.json CHANGED
The diff for this file is too large to render. See raw diff
 
data.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bb01f19b6579a60076c88379941d3b5c334c8b9b3c3fa78ee8e5637f86201e0
3
- size 33374716
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e11e7256d901bdf0c5d2ea3364a856b49e604b5f02f79adc854f8b561974a11
3
+ size 31529196
indices.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65f269ce58e2f4dd80fd1272f5186a4557d6e74e1fe4e750851c4dd243d5c8e4
3
- size 33374716
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6de571332e56bbcd2fb8a7d5faab6472ebe31d1ceecb0c2f1b4717e2dd4131a
3
+ size 31529196
indptr.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c1169384fab70b47e8ca8fbcc1c84255d2c8766b9249e457a998ba050f7b1b1
3
- size 459428
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f0fbf15ede6567282bebf05381469a94b8f9faa50cdafe98a8df0088cabd07b
3
+ size 459360
params.index.json CHANGED
@@ -6,6 +6,6 @@
6
  "idf_method": "lucene",
7
  "dtype": "float32",
8
  "int_dtype": "int32",
9
- "num_docs": 1000000,
10
  "version": "0.1.7"
11
  }
 
6
  "idf_method": "lucene",
7
  "dtype": "float32",
8
  "int_dtype": "int32",
9
+ "num_docs": 920259,
10
  "version": "0.1.7"
11
  }
vocab.index.json CHANGED
The diff for this file is too large to render. See raw diff