Commit
·
62621fc
1
Parent(s):
bd96138
Upload folder using huggingface_hub
Browse files- data/sentence_retrieval/pairwise_ranking/config.json +11 -0
- data/sentence_retrieval/pairwise_ranking/dataset_dict.json +1 -0
- data/sentence_retrieval/pairwise_ranking/test/data-00000-of-00001.arrow +3 -0
- data/sentence_retrieval/pairwise_ranking/test/dataset_info.json +50 -0
- data/sentence_retrieval/pairwise_ranking/test/state.json +13 -0
- data/sentence_retrieval/pairwise_ranking/train/data-00000-of-00001.arrow +3 -0
- data/sentence_retrieval/pairwise_ranking/train/dataset_info.json +50 -0
- data/sentence_retrieval/pairwise_ranking/train/state.json +13 -0
data/sentence_retrieval/pairwise_ranking/config.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"claim_dataset_path": "data/claim_dataset_v2",
|
3 |
+
"wiki_dataset_path": "data/wiki_dataset",
|
4 |
+
"output_dir": "data/sentence_retrieval/pairwise_ranking_v2",
|
5 |
+
"top_k": 3,
|
6 |
+
"min_score": 10.0,
|
7 |
+
"return_by_noun": true,
|
8 |
+
"merge_adjacent": true,
|
9 |
+
"return_unmerged": true,
|
10 |
+
"seed": 42
|
11 |
+
}
|
data/sentence_retrieval/pairwise_ranking/dataset_dict.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"splits": ["train", "test"]}
|
data/sentence_retrieval/pairwise_ranking/test/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23178a383b9025f46d18b1087100d7e48470f396e844724b241f96181caeeee7
|
3 |
+
size 8388656
|
data/sentence_retrieval/pairwise_ranking/test/dataset_info.json
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"builder_name": "generator",
|
3 |
+
"citation": "",
|
4 |
+
"config_name": "default",
|
5 |
+
"dataset_size": 1907090,
|
6 |
+
"description": "",
|
7 |
+
"download_checksums": {},
|
8 |
+
"download_size": 0,
|
9 |
+
"features": {
|
10 |
+
"positive_claim": {
|
11 |
+
"dtype": "string",
|
12 |
+
"_type": "Value"
|
13 |
+
},
|
14 |
+
"positive_sentence": {
|
15 |
+
"feature": {
|
16 |
+
"dtype": "string",
|
17 |
+
"_type": "Value"
|
18 |
+
},
|
19 |
+
"_type": "Sequence"
|
20 |
+
},
|
21 |
+
"negative_claim": {
|
22 |
+
"dtype": "string",
|
23 |
+
"_type": "Value"
|
24 |
+
},
|
25 |
+
"negative_sentence": {
|
26 |
+
"feature": {
|
27 |
+
"dtype": "string",
|
28 |
+
"_type": "Value"
|
29 |
+
},
|
30 |
+
"_type": "Sequence"
|
31 |
+
}
|
32 |
+
},
|
33 |
+
"homepage": "",
|
34 |
+
"license": "",
|
35 |
+
"size_in_bytes": 1907090,
|
36 |
+
"splits": {
|
37 |
+
"train": {
|
38 |
+
"name": "train",
|
39 |
+
"num_bytes": 1907090,
|
40 |
+
"num_examples": 11620,
|
41 |
+
"dataset_name": "generator"
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"version": {
|
45 |
+
"version_str": "0.0.0",
|
46 |
+
"major": 0,
|
47 |
+
"minor": 0,
|
48 |
+
"patch": 0
|
49 |
+
}
|
50 |
+
}
|
data/sentence_retrieval/pairwise_ranking/test/state.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00001.arrow"
|
5 |
+
}
|
6 |
+
],
|
7 |
+
"_fingerprint": "1a6a5afe498a255c",
|
8 |
+
"_format_columns": null,
|
9 |
+
"_format_kwargs": {},
|
10 |
+
"_format_type": null,
|
11 |
+
"_output_all_columns": false,
|
12 |
+
"_split": "train"
|
13 |
+
}
|
data/sentence_retrieval/pairwise_ranking/train/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a46b3bd8e995b97ce1cdd75414ed49230e62debc730b30200f37329810e2b07
|
3 |
+
size 74367848
|
data/sentence_retrieval/pairwise_ranking/train/dataset_info.json
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"builder_name": "generator",
|
3 |
+
"citation": "",
|
4 |
+
"config_name": "default",
|
5 |
+
"dataset_size": 1907090,
|
6 |
+
"description": "",
|
7 |
+
"download_checksums": {},
|
8 |
+
"download_size": 0,
|
9 |
+
"features": {
|
10 |
+
"positive_claim": {
|
11 |
+
"dtype": "string",
|
12 |
+
"_type": "Value"
|
13 |
+
},
|
14 |
+
"positive_sentence": {
|
15 |
+
"feature": {
|
16 |
+
"dtype": "string",
|
17 |
+
"_type": "Value"
|
18 |
+
},
|
19 |
+
"_type": "Sequence"
|
20 |
+
},
|
21 |
+
"negative_claim": {
|
22 |
+
"dtype": "string",
|
23 |
+
"_type": "Value"
|
24 |
+
},
|
25 |
+
"negative_sentence": {
|
26 |
+
"feature": {
|
27 |
+
"dtype": "string",
|
28 |
+
"_type": "Value"
|
29 |
+
},
|
30 |
+
"_type": "Sequence"
|
31 |
+
}
|
32 |
+
},
|
33 |
+
"homepage": "",
|
34 |
+
"license": "",
|
35 |
+
"size_in_bytes": 1907090,
|
36 |
+
"splits": {
|
37 |
+
"train": {
|
38 |
+
"name": "train",
|
39 |
+
"num_bytes": 1907090,
|
40 |
+
"num_examples": 11620,
|
41 |
+
"dataset_name": "generator"
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"version": {
|
45 |
+
"version_str": "0.0.0",
|
46 |
+
"major": 0,
|
47 |
+
"minor": 0,
|
48 |
+
"patch": 0
|
49 |
+
}
|
50 |
+
}
|
data/sentence_retrieval/pairwise_ranking/train/state.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00001.arrow"
|
5 |
+
}
|
6 |
+
],
|
7 |
+
"_fingerprint": "1c80317fa3b1799d",
|
8 |
+
"_format_columns": null,
|
9 |
+
"_format_kwargs": {},
|
10 |
+
"_format_type": null,
|
11 |
+
"_output_all_columns": false,
|
12 |
+
"_split": "train"
|
13 |
+
}
|