Update BM25S model
Browse files- .gitattributes +1 -0
- README.md +3 -3
- corpus.jsonl +2 -2
- corpus.mmindex.json +0 -0
- data.csc.index.npy +2 -2
- indices.csc.index.npy +2 -2
- indptr.csc.index.npy +2 -2
- params.index.json +1 -1
- vocab.index.json +0 -0
.gitattributes
CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
corpus.jsonl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
corpus.jsonl filter=lfs diff=lfs merge=lfs -text
|
37 |
+
corpus.mmindex.json filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -92,9 +92,9 @@ This dataset was created using the following data:
|
|
92 |
|
93 |
| Statistic | Value |
|
94 |
| --- | --- |
|
95 |
-
| Number of documents |
|
96 |
-
| Number of tokens |
|
97 |
-
| Average tokens per document | 8.
|
98 |
|
99 |
## Parameters
|
100 |
|
|
|
92 |
|
93 |
| Statistic | Value |
|
94 |
| --- | --- |
|
95 |
+
| Number of documents | 920259 |
|
96 |
+
| Number of tokens | 7882267 |
|
97 |
+
| Average tokens per document | 8.57 |
|
98 |
|
99 |
## Parameters
|
100 |
|
corpus.jsonl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cfe6b7642bef860a82d624ce38a81b31507c63752c3f21b7e1cacd4b30c8b81
|
3 |
+
size 2212195225
|
corpus.mmindex.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data.csc.index.npy
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e11e7256d901bdf0c5d2ea3364a856b49e604b5f02f79adc854f8b561974a11
|
3 |
+
size 31529196
|
indices.csc.index.npy
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6de571332e56bbcd2fb8a7d5faab6472ebe31d1ceecb0c2f1b4717e2dd4131a
|
3 |
+
size 31529196
|
indptr.csc.index.npy
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f0fbf15ede6567282bebf05381469a94b8f9faa50cdafe98a8df0088cabd07b
|
3 |
+
size 459360
|
params.index.json
CHANGED
@@ -6,6 +6,6 @@
|
|
6 |
"idf_method": "lucene",
|
7 |
"dtype": "float32",
|
8 |
"int_dtype": "int32",
|
9 |
-
"num_docs":
|
10 |
"version": "0.1.7"
|
11 |
}
|
|
|
6 |
"idf_method": "lucene",
|
7 |
"dtype": "float32",
|
8 |
"int_dtype": "int32",
|
9 |
+
"num_docs": 920259,
|
10 |
"version": "0.1.7"
|
11 |
}
|
vocab.index.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|