Spaces:
Running
Running
add hunyuan and qwen3
Browse files- README.md +1 -1
- playground_examples.py +1 -1
- stats/character_stats.json +114 -0
- stats/compression_rate.json +648 -0
- stats/compression_rate/Qwen.Qwen3-4B-Instruct-2507 @ cc100.ar.diff.json +0 -0
- stats/compression_rate/Qwen.Qwen3-4B-Instruct-2507 @ cc100.de.diff.json +109 -0
- stats/compression_rate/Qwen.Qwen3-4B-Instruct-2507 @ cc100.fa.diff.json +248 -0
- stats/compression_rate/Qwen.Qwen3-4B-Instruct-2507 @ cc100.fr.diff.json +105 -0
- stats/compression_rate/Qwen.Qwen3-4B-Instruct-2507 @ cc100.ja.diff.json +1046 -0
- stats/compression_rate/Qwen.Qwen3-4B-Instruct-2507 @ cc100.ko.diff.json +216 -0
- stats/compression_rate/Qwen.Qwen3-Embedding-0.6B @ cc100.ar.diff.json +0 -0
- stats/compression_rate/Qwen.Qwen3-Embedding-0.6B @ cc100.de.diff.json +109 -0
- stats/compression_rate/Qwen.Qwen3-Embedding-0.6B @ cc100.fa.diff.json +248 -0
- stats/compression_rate/Qwen.Qwen3-Embedding-0.6B @ cc100.fr.diff.json +105 -0
- stats/compression_rate/Qwen.Qwen3-Embedding-0.6B @ cc100.ja.diff.json +1046 -0
- stats/compression_rate/Qwen.Qwen3-Embedding-0.6B @ cc100.ko.diff.json +216 -0
- stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.ar.diff.json +0 -0
- stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.de.diff.json +0 -0
- stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.en.diff.json +1325 -0
- stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.es.diff.json +0 -0
- stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.fa.diff.json +0 -0
- stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.fr.diff.json +0 -0
- stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.ja.diff.json +82 -0
- stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.ko.diff.json +0 -0
- stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.zh-Hans.diff.json +373 -0
- stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.ar.diff.json +0 -0
- stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.de.diff.json +0 -0
- stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.en.diff.json +1325 -0
- stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.es.diff.json +0 -0
- stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.fa.diff.json +0 -0
- stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.fr.diff.json +0 -0
- stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.ja.diff.json +82 -0
- stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.ko.diff.json +0 -0
- stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.zh-Hans.diff.json +373 -0
- vocab.py +7 -0
README.md
CHANGED
@@ -33,7 +33,7 @@ python app.py
|
|
33 |
```sh
|
34 |
python compression_util.py # cache compression
|
35 |
python character_util.py # cache character
|
36 |
-
python stats/sample.py #
|
37 |
git add stats/compression_rate/*
|
38 |
git add -u .
|
39 |
|
|
|
33 |
```sh
|
34 |
python compression_util.py # cache compression
|
35 |
python character_util.py # cache character
|
36 |
+
python stats/sample.py # sample stats of compression
|
37 |
git add stats/compression_rate/*
|
38 |
git add -u .
|
39 |
|
playground_examples.py
CHANGED
@@ -23,7 +23,7 @@ from datasets import load_dataset
|
|
23 |
default_user_input = """\
|
24 |
Replace this text in the input field to see how tokenization works.
|
25 |
Buenos días!
|
26 |
-
|
27 |
ラグビーワールドカップ2023フランス"""
|
28 |
# default_tokenizer_name_1 = "Meta/llama3"
|
29 |
# default_tokenizer_name_1 = "gradientai/Llama-3-8B-Instruct-Gradient-1048k"
|
|
|
23 |
default_user_input = """\
|
24 |
Replace this text in the input field to see how tokenization works.
|
25 |
Buenos días!
|
26 |
+
Tokenizer 是自然语言处理(NLP)中的一个关键组件,它的主要作用是将人类语言文本转换为计算机可以理解的数字表示形式。
|
27 |
ラグビーワールドカップ2023フランス"""
|
28 |
# default_tokenizer_name_1 = "Meta/llama3"
|
29 |
# default_tokenizer_name_1 = "gradientai/Llama-3-8B-Instruct-Gradient-1048k"
|
stats/character_stats.json
CHANGED
@@ -2280,5 +2280,119 @@
|
|
2280 |
"len(ja-kana)": "1,2,11",
|
2281 |
"num(ko)": 2365,
|
2282 |
"len(ko)": "1,2,8"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2283 |
}
|
2284 |
}
|
|
|
2280 |
"len(ja-kana)": "1,2,11",
|
2281 |
"num(ko)": 2365,
|
2282 |
"len(ko)": "1,2,8"
|
2283 |
+
},
|
2284 |
+
"Qwen/Qwen3-4B-Instruct-2507": {
|
2285 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-4B-Instruct-2507</a>",
|
2286 |
+
"organization": "Alibaba",
|
2287 |
+
"vocab_size": 151669,
|
2288 |
+
"num(digit)": 10,
|
2289 |
+
"len(digit)": "1,1,1",
|
2290 |
+
"num(space)": 55883,
|
2291 |
+
"len(space)": "1,6,128",
|
2292 |
+
"num(ar)": 4018,
|
2293 |
+
"len(ar)": "1,3,12",
|
2294 |
+
"num(zh)": 25557,
|
2295 |
+
"len(zh)": "1,2,7",
|
2296 |
+
"num(ja)": 27206,
|
2297 |
+
"len(ja)": "1,2,11",
|
2298 |
+
"num(ja-kana)": 2089,
|
2299 |
+
"len(ja-kana)": "1,3,11",
|
2300 |
+
"num(ko)": 3495,
|
2301 |
+
"len(ko)": "1,1,5"
|
2302 |
+
},
|
2303 |
+
"Qwen/Qwen3-Embedding-0.6B": {
|
2304 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-Embedding-0.6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-Embedding-0.6B</a>",
|
2305 |
+
"organization": "Alibaba",
|
2306 |
+
"vocab_size": 151669,
|
2307 |
+
"num(digit)": 10,
|
2308 |
+
"len(digit)": "1,1,1",
|
2309 |
+
"num(space)": 55883,
|
2310 |
+
"len(space)": "1,6,128",
|
2311 |
+
"num(ar)": 4018,
|
2312 |
+
"len(ar)": "1,3,12",
|
2313 |
+
"num(zh)": 25557,
|
2314 |
+
"len(zh)": "1,2,7",
|
2315 |
+
"num(ja)": 27206,
|
2316 |
+
"len(ja)": "1,2,11",
|
2317 |
+
"num(ja-kana)": 2089,
|
2318 |
+
"len(ja-kana)": "1,3,11",
|
2319 |
+
"num(ko)": 3495,
|
2320 |
+
"len(ko)": "1,1,5"
|
2321 |
+
},
|
2322 |
+
"openbmb/MiniCPM-V-4": {
|
2323 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/openbmb/MiniCPM-V-4\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">MiniCPM-V-4</a>",
|
2324 |
+
"organization": "OpenBMB",
|
2325 |
+
"vocab_size": 73448,
|
2326 |
+
"num(digit)": 992,
|
2327 |
+
"len(digit)": "1,14,14",
|
2328 |
+
"num(space)": 40,
|
2329 |
+
"len(space)": "1,1,15",
|
2330 |
+
"num(ar)": 41,
|
2331 |
+
"len(ar)": "1,1,2",
|
2332 |
+
"num(zh)": 28322,
|
2333 |
+
"len(zh)": "1,2,16",
|
2334 |
+
"num(ja)": 28462,
|
2335 |
+
"len(ja)": "1,2,16",
|
2336 |
+
"num(ja-kana)": 140,
|
2337 |
+
"len(ja-kana)": "1,1,2",
|
2338 |
+
"num(ko)": 92,
|
2339 |
+
"len(ko)": "1,1,2"
|
2340 |
+
},
|
2341 |
+
"tencent/Hunyuan-0.5B-Instruct": {
|
2342 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-0.5B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-0.5B-Instruct</a>",
|
2343 |
+
"organization": "Tencent",
|
2344 |
+
"vocab_size": 120818,
|
2345 |
+
"num(digit)": 1110,
|
2346 |
+
"len(digit)": "1,3,3",
|
2347 |
+
"num(space)": 38807,
|
2348 |
+
"len(space)": "1,7,512",
|
2349 |
+
"num(ar)": 716,
|
2350 |
+
"len(ar)": "1,3,8",
|
2351 |
+
"num(zh)": 45459,
|
2352 |
+
"len(zh)": "1,2,31",
|
2353 |
+
"num(ja)": 45835,
|
2354 |
+
"len(ja)": "1,2,31",
|
2355 |
+
"num(ja-kana)": 404,
|
2356 |
+
"len(ja-kana)": "1,2,6",
|
2357 |
+
"num(ko)": 909,
|
2358 |
+
"len(ko)": "1,2,5"
|
2359 |
+
},
|
2360 |
+
"tencent/Hunyuan-1.8B-Instruct": {
|
2361 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-1.8B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-1.8B-Instruct</a>",
|
2362 |
+
"organization": "Tencent",
|
2363 |
+
"vocab_size": 120818,
|
2364 |
+
"num(digit)": 1110,
|
2365 |
+
"len(digit)": "1,3,3",
|
2366 |
+
"num(space)": 38807,
|
2367 |
+
"len(space)": "1,7,512",
|
2368 |
+
"num(ar)": 716,
|
2369 |
+
"len(ar)": "1,3,8",
|
2370 |
+
"num(zh)": 45459,
|
2371 |
+
"len(zh)": "1,2,31",
|
2372 |
+
"num(ja)": 45835,
|
2373 |
+
"len(ja)": "1,2,31",
|
2374 |
+
"num(ja-kana)": 404,
|
2375 |
+
"len(ja-kana)": "1,2,6",
|
2376 |
+
"num(ko)": 909,
|
2377 |
+
"len(ko)": "1,2,5"
|
2378 |
+
},
|
2379 |
+
"zai-org/GLM-4.5": {
|
2380 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/zai-org/GLM-4.5\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">GLM-4.5</a>",
|
2381 |
+
"organization": "Zai",
|
2382 |
+
"vocab_size": 151365,
|
2383 |
+
"num(digit)": 541,
|
2384 |
+
"len(digit)": "1,3,6",
|
2385 |
+
"num(space)": 62955,
|
2386 |
+
"len(space)": "1,6,512",
|
2387 |
+
"num(ar)": 2004,
|
2388 |
+
"len(ar)": "1,3,10",
|
2389 |
+
"num(zh)": 28642,
|
2390 |
+
"len(zh)": "1,2,22",
|
2391 |
+
"num(ja)": 29491,
|
2392 |
+
"len(ja)": "1,2,22",
|
2393 |
+
"num(ja-kana)": 964,
|
2394 |
+
"len(ja-kana)": "1,2,7",
|
2395 |
+
"num(ko)": 565,
|
2396 |
+
"len(ko)": "1,2,5"
|
2397 |
}
|
2398 |
}
|
stats/compression_rate.json
CHANGED
@@ -12310,5 +12310,653 @@
|
|
12310 |
"oov_ratio": 0.0,
|
12311 |
"_oov_charset": "[]",
|
12312 |
"lossless": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12313 |
}
|
12314 |
}
|
|
|
12310 |
"oov_ratio": 0.0,
|
12311 |
"_oov_charset": "[]",
|
12312 |
"lossless": true
|
12313 |
+
},
|
12314 |
+
"Qwen/Qwen3-4B-Instruct-2507 @ cc100/ar": {
|
12315 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-4B-Instruct-2507</a>",
|
12316 |
+
"organization": "Alibaba",
|
12317 |
+
"vocab_size": 151669,
|
12318 |
+
"_n_bytes": 2813283,
|
12319 |
+
"_n_tokens": 614959,
|
12320 |
+
"_n_chars": 1560987,
|
12321 |
+
"_n_oov_chars": 0,
|
12322 |
+
"oov_ratio": 0.0,
|
12323 |
+
"_oov_charset": "[]",
|
12324 |
+
"lossless": false
|
12325 |
+
},
|
12326 |
+
"Qwen/Qwen3-4B-Instruct-2507 @ cc100/de": {
|
12327 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-4B-Instruct-2507</a>",
|
12328 |
+
"organization": "Alibaba",
|
12329 |
+
"vocab_size": 151669,
|
12330 |
+
"_n_bytes": 1814876,
|
12331 |
+
"_n_tokens": 503561,
|
12332 |
+
"_n_chars": 1784021,
|
12333 |
+
"_n_oov_chars": 0,
|
12334 |
+
"oov_ratio": 0.0,
|
12335 |
+
"_oov_charset": "[]",
|
12336 |
+
"lossless": false
|
12337 |
+
},
|
12338 |
+
"Qwen/Qwen3-4B-Instruct-2507 @ cc100/en": {
|
12339 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-4B-Instruct-2507</a>",
|
12340 |
+
"organization": "Alibaba",
|
12341 |
+
"vocab_size": 151669,
|
12342 |
+
"_n_bytes": 1124813,
|
12343 |
+
"_n_tokens": 257983,
|
12344 |
+
"_n_chars": 1121360,
|
12345 |
+
"_n_oov_chars": 0,
|
12346 |
+
"oov_ratio": 0.0,
|
12347 |
+
"_oov_charset": "[]",
|
12348 |
+
"lossless": true
|
12349 |
+
},
|
12350 |
+
"Qwen/Qwen3-4B-Instruct-2507 @ cc100/es": {
|
12351 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-4B-Instruct-2507</a>",
|
12352 |
+
"organization": "Alibaba",
|
12353 |
+
"vocab_size": 151669,
|
12354 |
+
"_n_bytes": 1664455,
|
12355 |
+
"_n_tokens": 434264,
|
12356 |
+
"_n_chars": 1630297,
|
12357 |
+
"_n_oov_chars": 0,
|
12358 |
+
"oov_ratio": 0.0,
|
12359 |
+
"_oov_charset": "[]",
|
12360 |
+
"lossless": true
|
12361 |
+
},
|
12362 |
+
"Qwen/Qwen3-4B-Instruct-2507 @ cc100/fa": {
|
12363 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-4B-Instruct-2507</a>",
|
12364 |
+
"organization": "Alibaba",
|
12365 |
+
"vocab_size": 151669,
|
12366 |
+
"_n_bytes": 2054052,
|
12367 |
+
"_n_tokens": 643421,
|
12368 |
+
"_n_chars": 1145876,
|
12369 |
+
"_n_oov_chars": 0,
|
12370 |
+
"oov_ratio": 0.0,
|
12371 |
+
"_oov_charset": "[]",
|
12372 |
+
"lossless": false
|
12373 |
+
},
|
12374 |
+
"Qwen/Qwen3-4B-Instruct-2507 @ cc100/fr": {
|
12375 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-4B-Instruct-2507</a>",
|
12376 |
+
"organization": "Alibaba",
|
12377 |
+
"vocab_size": 151669,
|
12378 |
+
"_n_bytes": 1540504,
|
12379 |
+
"_n_tokens": 413637,
|
12380 |
+
"_n_chars": 1484970,
|
12381 |
+
"_n_oov_chars": 0,
|
12382 |
+
"oov_ratio": 0.0,
|
12383 |
+
"_oov_charset": "[]",
|
12384 |
+
"lossless": false
|
12385 |
+
},
|
12386 |
+
"Qwen/Qwen3-4B-Instruct-2507 @ cc100/ja": {
|
12387 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-4B-Instruct-2507</a>",
|
12388 |
+
"organization": "Alibaba",
|
12389 |
+
"vocab_size": 151669,
|
12390 |
+
"_n_bytes": 1774770,
|
12391 |
+
"_n_tokens": 377144,
|
12392 |
+
"_n_chars": 603065,
|
12393 |
+
"_n_oov_chars": 0,
|
12394 |
+
"oov_ratio": 0.0,
|
12395 |
+
"_oov_charset": "[]",
|
12396 |
+
"lossless": false
|
12397 |
+
},
|
12398 |
+
"Qwen/Qwen3-4B-Instruct-2507 @ cc100/ko": {
|
12399 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-4B-Instruct-2507</a>",
|
12400 |
+
"organization": "Alibaba",
|
12401 |
+
"vocab_size": 151669,
|
12402 |
+
"_n_bytes": 1524839,
|
12403 |
+
"_n_tokens": 457492,
|
12404 |
+
"_n_chars": 655190,
|
12405 |
+
"_n_oov_chars": 25,
|
12406 |
+
"oov_ratio": 3.815687052610693e-05,
|
12407 |
+
"_oov_charset": "[\"陸\", \"立\", \"樂\", \"流\", \"金\", \"理\", \"良\", \"梁\", \"樂\", \"龍\", \"靈\", \"女\", \"累\", \"不\", \"識\"]",
|
12408 |
+
"lossless": false
|
12409 |
+
},
|
12410 |
+
"Qwen/Qwen3-4B-Instruct-2507 @ cc100/zh-Hans": {
|
12411 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-4B-Instruct-2507</a>",
|
12412 |
+
"organization": "Alibaba",
|
12413 |
+
"vocab_size": 151669,
|
12414 |
+
"_n_bytes": 2633047,
|
12415 |
+
"_n_tokens": 589211,
|
12416 |
+
"_n_chars": 927311,
|
12417 |
+
"_n_oov_chars": 0,
|
12418 |
+
"oov_ratio": 0.0,
|
12419 |
+
"_oov_charset": "[]",
|
12420 |
+
"lossless": true
|
12421 |
+
},
|
12422 |
+
"Qwen/Qwen3-Embedding-0.6B @ cc100/ar": {
|
12423 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-Embedding-0.6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-Embedding-0.6B</a>",
|
12424 |
+
"organization": "Alibaba",
|
12425 |
+
"vocab_size": 151669,
|
12426 |
+
"_n_bytes": 2813283,
|
12427 |
+
"_n_tokens": 614959,
|
12428 |
+
"_n_chars": 1560987,
|
12429 |
+
"_n_oov_chars": 0,
|
12430 |
+
"oov_ratio": 0.0,
|
12431 |
+
"_oov_charset": "[]",
|
12432 |
+
"lossless": false
|
12433 |
+
},
|
12434 |
+
"Qwen/Qwen3-Embedding-0.6B @ cc100/de": {
|
12435 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-Embedding-0.6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-Embedding-0.6B</a>",
|
12436 |
+
"organization": "Alibaba",
|
12437 |
+
"vocab_size": 151669,
|
12438 |
+
"_n_bytes": 1814876,
|
12439 |
+
"_n_tokens": 503561,
|
12440 |
+
"_n_chars": 1784021,
|
12441 |
+
"_n_oov_chars": 0,
|
12442 |
+
"oov_ratio": 0.0,
|
12443 |
+
"_oov_charset": "[]",
|
12444 |
+
"lossless": false
|
12445 |
+
},
|
12446 |
+
"Qwen/Qwen3-Embedding-0.6B @ cc100/en": {
|
12447 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-Embedding-0.6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-Embedding-0.6B</a>",
|
12448 |
+
"organization": "Alibaba",
|
12449 |
+
"vocab_size": 151669,
|
12450 |
+
"_n_bytes": 1124813,
|
12451 |
+
"_n_tokens": 257983,
|
12452 |
+
"_n_chars": 1121360,
|
12453 |
+
"_n_oov_chars": 0,
|
12454 |
+
"oov_ratio": 0.0,
|
12455 |
+
"_oov_charset": "[]",
|
12456 |
+
"lossless": true
|
12457 |
+
},
|
12458 |
+
"Qwen/Qwen3-Embedding-0.6B @ cc100/es": {
|
12459 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-Embedding-0.6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-Embedding-0.6B</a>",
|
12460 |
+
"organization": "Alibaba",
|
12461 |
+
"vocab_size": 151669,
|
12462 |
+
"_n_bytes": 1664455,
|
12463 |
+
"_n_tokens": 434264,
|
12464 |
+
"_n_chars": 1630297,
|
12465 |
+
"_n_oov_chars": 0,
|
12466 |
+
"oov_ratio": 0.0,
|
12467 |
+
"_oov_charset": "[]",
|
12468 |
+
"lossless": true
|
12469 |
+
},
|
12470 |
+
"Qwen/Qwen3-Embedding-0.6B @ cc100/fa": {
|
12471 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-Embedding-0.6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-Embedding-0.6B</a>",
|
12472 |
+
"organization": "Alibaba",
|
12473 |
+
"vocab_size": 151669,
|
12474 |
+
"_n_bytes": 2054052,
|
12475 |
+
"_n_tokens": 643421,
|
12476 |
+
"_n_chars": 1145876,
|
12477 |
+
"_n_oov_chars": 0,
|
12478 |
+
"oov_ratio": 0.0,
|
12479 |
+
"_oov_charset": "[]",
|
12480 |
+
"lossless": false
|
12481 |
+
},
|
12482 |
+
"Qwen/Qwen3-Embedding-0.6B @ cc100/fr": {
|
12483 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-Embedding-0.6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-Embedding-0.6B</a>",
|
12484 |
+
"organization": "Alibaba",
|
12485 |
+
"vocab_size": 151669,
|
12486 |
+
"_n_bytes": 1540504,
|
12487 |
+
"_n_tokens": 413637,
|
12488 |
+
"_n_chars": 1484970,
|
12489 |
+
"_n_oov_chars": 0,
|
12490 |
+
"oov_ratio": 0.0,
|
12491 |
+
"_oov_charset": "[]",
|
12492 |
+
"lossless": false
|
12493 |
+
},
|
12494 |
+
"Qwen/Qwen3-Embedding-0.6B @ cc100/ja": {
|
12495 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-Embedding-0.6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-Embedding-0.6B</a>",
|
12496 |
+
"organization": "Alibaba",
|
12497 |
+
"vocab_size": 151669,
|
12498 |
+
"_n_bytes": 1774770,
|
12499 |
+
"_n_tokens": 377144,
|
12500 |
+
"_n_chars": 603065,
|
12501 |
+
"_n_oov_chars": 0,
|
12502 |
+
"oov_ratio": 0.0,
|
12503 |
+
"_oov_charset": "[]",
|
12504 |
+
"lossless": false
|
12505 |
+
},
|
12506 |
+
"Qwen/Qwen3-Embedding-0.6B @ cc100/ko": {
|
12507 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-Embedding-0.6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-Embedding-0.6B</a>",
|
12508 |
+
"organization": "Alibaba",
|
12509 |
+
"vocab_size": 151669,
|
12510 |
+
"_n_bytes": 1524839,
|
12511 |
+
"_n_tokens": 457492,
|
12512 |
+
"_n_chars": 655190,
|
12513 |
+
"_n_oov_chars": 25,
|
12514 |
+
"oov_ratio": 3.815687052610693e-05,
|
12515 |
+
"_oov_charset": "[\"陸\", \"立\", \"樂\", \"流\", \"金\", \"理\", \"良\", \"梁\", \"樂\", \"龍\", \"靈\", \"女\", \"累\", \"不\", \"識\"]",
|
12516 |
+
"lossless": false
|
12517 |
+
},
|
12518 |
+
"Qwen/Qwen3-Embedding-0.6B @ cc100/zh-Hans": {
|
12519 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen3-Embedding-0.6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen3-Embedding-0.6B</a>",
|
12520 |
+
"organization": "Alibaba",
|
12521 |
+
"vocab_size": 151669,
|
12522 |
+
"_n_bytes": 2633047,
|
12523 |
+
"_n_tokens": 589211,
|
12524 |
+
"_n_chars": 927311,
|
12525 |
+
"_n_oov_chars": 0,
|
12526 |
+
"oov_ratio": 0.0,
|
12527 |
+
"_oov_charset": "[]",
|
12528 |
+
"lossless": true
|
12529 |
+
},
|
12530 |
+
"openbmb/MiniCPM-V-4 @ cc100/ar": {
|
12531 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/openbmb/MiniCPM-V-4\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">MiniCPM-V-4</a>",
|
12532 |
+
"organization": "OpenBMB",
|
12533 |
+
"vocab_size": 73448,
|
12534 |
+
"_n_bytes": 2813283,
|
12535 |
+
"_n_tokens": 1456173,
|
12536 |
+
"_n_chars": 1560987,
|
12537 |
+
"_n_oov_chars": 0,
|
12538 |
+
"oov_ratio": 0.0,
|
12539 |
+
"_oov_charset": "[]",
|
12540 |
+
"lossless": true
|
12541 |
+
},
|
12542 |
+
"openbmb/MiniCPM-V-4 @ cc100/de": {
|
12543 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/openbmb/MiniCPM-V-4\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">MiniCPM-V-4</a>",
|
12544 |
+
"organization": "OpenBMB",
|
12545 |
+
"vocab_size": 73448,
|
12546 |
+
"_n_bytes": 1814876,
|
12547 |
+
"_n_tokens": 665530,
|
12548 |
+
"_n_chars": 1784021,
|
12549 |
+
"_n_oov_chars": 0,
|
12550 |
+
"oov_ratio": 0.0,
|
12551 |
+
"_oov_charset": "[]",
|
12552 |
+
"lossless": true
|
12553 |
+
},
|
12554 |
+
"openbmb/MiniCPM-V-4 @ cc100/en": {
|
12555 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/openbmb/MiniCPM-V-4\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">MiniCPM-V-4</a>",
|
12556 |
+
"organization": "OpenBMB",
|
12557 |
+
"vocab_size": 73448,
|
12558 |
+
"_n_bytes": 1124813,
|
12559 |
+
"_n_tokens": 273935,
|
12560 |
+
"_n_chars": 1121360,
|
12561 |
+
"_n_oov_chars": 0,
|
12562 |
+
"oov_ratio": 0.0,
|
12563 |
+
"_oov_charset": "[]",
|
12564 |
+
"lossless": true
|
12565 |
+
},
|
12566 |
+
"openbmb/MiniCPM-V-4 @ cc100/es": {
|
12567 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/openbmb/MiniCPM-V-4\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">MiniCPM-V-4</a>",
|
12568 |
+
"organization": "OpenBMB",
|
12569 |
+
"vocab_size": 73448,
|
12570 |
+
"_n_bytes": 1664455,
|
12571 |
+
"_n_tokens": 558746,
|
12572 |
+
"_n_chars": 1630297,
|
12573 |
+
"_n_oov_chars": 0,
|
12574 |
+
"oov_ratio": 0.0,
|
12575 |
+
"_oov_charset": "[]",
|
12576 |
+
"lossless": true
|
12577 |
+
},
|
12578 |
+
"openbmb/MiniCPM-V-4 @ cc100/fa": {
|
12579 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/openbmb/MiniCPM-V-4\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">MiniCPM-V-4</a>",
|
12580 |
+
"organization": "OpenBMB",
|
12581 |
+
"vocab_size": 73448,
|
12582 |
+
"_n_bytes": 2054052,
|
12583 |
+
"_n_tokens": 1115665,
|
12584 |
+
"_n_chars": 1145876,
|
12585 |
+
"_n_oov_chars": 0,
|
12586 |
+
"oov_ratio": 0.0,
|
12587 |
+
"_oov_charset": "[]",
|
12588 |
+
"lossless": true
|
12589 |
+
},
|
12590 |
+
"openbmb/MiniCPM-V-4 @ cc100/fr": {
|
12591 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/openbmb/MiniCPM-V-4\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">MiniCPM-V-4</a>",
|
12592 |
+
"organization": "OpenBMB",
|
12593 |
+
"vocab_size": 73448,
|
12594 |
+
"_n_bytes": 1540504,
|
12595 |
+
"_n_tokens": 523781,
|
12596 |
+
"_n_chars": 1484970,
|
12597 |
+
"_n_oov_chars": 0,
|
12598 |
+
"oov_ratio": 0.0,
|
12599 |
+
"_oov_charset": "[]",
|
12600 |
+
"lossless": true
|
12601 |
+
},
|
12602 |
+
"openbmb/MiniCPM-V-4 @ cc100/ja": {
|
12603 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/openbmb/MiniCPM-V-4\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">MiniCPM-V-4</a>",
|
12604 |
+
"organization": "OpenBMB",
|
12605 |
+
"vocab_size": 73448,
|
12606 |
+
"_n_bytes": 1774770,
|
12607 |
+
"_n_tokens": 591146,
|
12608 |
+
"_n_chars": 603065,
|
12609 |
+
"_n_oov_chars": 0,
|
12610 |
+
"oov_ratio": 0.0,
|
12611 |
+
"_oov_charset": "[]",
|
12612 |
+
"lossless": true
|
12613 |
+
},
|
12614 |
+
"openbmb/MiniCPM-V-4 @ cc100/ko": {
|
12615 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/openbmb/MiniCPM-V-4\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">MiniCPM-V-4</a>",
|
12616 |
+
"organization": "OpenBMB",
|
12617 |
+
"vocab_size": 73448,
|
12618 |
+
"_n_bytes": 1524839,
|
12619 |
+
"_n_tokens": 973049,
|
12620 |
+
"_n_chars": 655190,
|
12621 |
+
"_n_oov_chars": 0,
|
12622 |
+
"oov_ratio": 0.0,
|
12623 |
+
"_oov_charset": "[]",
|
12624 |
+
"lossless": true
|
12625 |
+
},
|
12626 |
+
"openbmb/MiniCPM-V-4 @ cc100/zh-Hans": {
|
12627 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/openbmb/MiniCPM-V-4\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">MiniCPM-V-4</a>",
|
12628 |
+
"organization": "OpenBMB",
|
12629 |
+
"vocab_size": 73448,
|
12630 |
+
"_n_bytes": 2633047,
|
12631 |
+
"_n_tokens": 594634,
|
12632 |
+
"_n_chars": 927311,
|
12633 |
+
"_n_oov_chars": 0,
|
12634 |
+
"oov_ratio": 0.0,
|
12635 |
+
"_oov_charset": "[]",
|
12636 |
+
"lossless": true
|
12637 |
+
},
|
12638 |
+
"tencent/Hunyuan-0.5B-Instruct @ cc100/ar": {
|
12639 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-0.5B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-0.5B-Instruct</a>",
|
12640 |
+
"organization": "Tencent",
|
12641 |
+
"vocab_size": 120818,
|
12642 |
+
"_n_bytes": 2813283,
|
12643 |
+
"_n_tokens": 741999,
|
12644 |
+
"_n_chars": 1560987,
|
12645 |
+
"_n_oov_chars": 0,
|
12646 |
+
"oov_ratio": 0.0,
|
12647 |
+
"_oov_charset": "[]",
|
12648 |
+
"lossless": false
|
12649 |
+
},
|
12650 |
+
"tencent/Hunyuan-0.5B-Instruct @ cc100/de": {
|
12651 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-0.5B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-0.5B-Instruct</a>",
|
12652 |
+
"organization": "Tencent",
|
12653 |
+
"vocab_size": 120818,
|
12654 |
+
"_n_bytes": 1814876,
|
12655 |
+
"_n_tokens": 543757,
|
12656 |
+
"_n_chars": 1784021,
|
12657 |
+
"_n_oov_chars": 0,
|
12658 |
+
"oov_ratio": 0.0,
|
12659 |
+
"_oov_charset": "[]",
|
12660 |
+
"lossless": false
|
12661 |
+
},
|
12662 |
+
"tencent/Hunyuan-0.5B-Instruct @ cc100/en": {
|
12663 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-0.5B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-0.5B-Instruct</a>",
|
12664 |
+
"organization": "Tencent",
|
12665 |
+
"vocab_size": 120818,
|
12666 |
+
"_n_bytes": 1124813,
|
12667 |
+
"_n_tokens": 258324,
|
12668 |
+
"_n_chars": 1121360,
|
12669 |
+
"_n_oov_chars": 0,
|
12670 |
+
"oov_ratio": 0.0,
|
12671 |
+
"_oov_charset": "[]",
|
12672 |
+
"lossless": false
|
12673 |
+
},
|
12674 |
+
"tencent/Hunyuan-0.5B-Instruct @ cc100/es": {
|
12675 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-0.5B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-0.5B-Instruct</a>",
|
12676 |
+
"organization": "Tencent",
|
12677 |
+
"vocab_size": 120818,
|
12678 |
+
"_n_bytes": 1664455,
|
12679 |
+
"_n_tokens": 478626,
|
12680 |
+
"_n_chars": 1630297,
|
12681 |
+
"_n_oov_chars": 0,
|
12682 |
+
"oov_ratio": 0.0,
|
12683 |
+
"_oov_charset": "[]",
|
12684 |
+
"lossless": false
|
12685 |
+
},
|
12686 |
+
"tencent/Hunyuan-0.5B-Instruct @ cc100/fa": {
|
12687 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-0.5B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-0.5B-Instruct</a>",
|
12688 |
+
"organization": "Tencent",
|
12689 |
+
"vocab_size": 120818,
|
12690 |
+
"_n_bytes": 2054052,
|
12691 |
+
"_n_tokens": 628431,
|
12692 |
+
"_n_chars": 1145876,
|
12693 |
+
"_n_oov_chars": 0,
|
12694 |
+
"oov_ratio": 0.0,
|
12695 |
+
"_oov_charset": "[]",
|
12696 |
+
"lossless": false
|
12697 |
+
},
|
12698 |
+
"tencent/Hunyuan-0.5B-Instruct @ cc100/fr": {
|
12699 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-0.5B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-0.5B-Instruct</a>",
|
12700 |
+
"organization": "Tencent",
|
12701 |
+
"vocab_size": 120818,
|
12702 |
+
"_n_bytes": 1540504,
|
12703 |
+
"_n_tokens": 438575,
|
12704 |
+
"_n_chars": 1484970,
|
12705 |
+
"_n_oov_chars": 0,
|
12706 |
+
"oov_ratio": 0.0,
|
12707 |
+
"_oov_charset": "[]",
|
12708 |
+
"lossless": false
|
12709 |
+
},
|
12710 |
+
"tencent/Hunyuan-0.5B-Instruct @ cc100/ja": {
|
12711 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-0.5B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-0.5B-Instruct</a>",
|
12712 |
+
"organization": "Tencent",
|
12713 |
+
"vocab_size": 120818,
|
12714 |
+
"_n_bytes": 1774770,
|
12715 |
+
"_n_tokens": 464604,
|
12716 |
+
"_n_chars": 603065,
|
12717 |
+
"_n_oov_chars": 0,
|
12718 |
+
"oov_ratio": 0.0,
|
12719 |
+
"_oov_charset": "[]",
|
12720 |
+
"lossless": false
|
12721 |
+
},
|
12722 |
+
"tencent/Hunyuan-0.5B-Instruct @ cc100/ko": {
|
12723 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-0.5B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-0.5B-Instruct</a>",
|
12724 |
+
"organization": "Tencent",
|
12725 |
+
"vocab_size": 120818,
|
12726 |
+
"_n_bytes": 1524839,
|
12727 |
+
"_n_tokens": 483060,
|
12728 |
+
"_n_chars": 655190,
|
12729 |
+
"_n_oov_chars": 0,
|
12730 |
+
"oov_ratio": 0.0,
|
12731 |
+
"_oov_charset": "[]",
|
12732 |
+
"lossless": false
|
12733 |
+
},
|
12734 |
+
"tencent/Hunyuan-0.5B-Instruct @ cc100/zh-Hans": {
|
12735 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-0.5B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-0.5B-Instruct</a>",
|
12736 |
+
"organization": "Tencent",
|
12737 |
+
"vocab_size": 120818,
|
12738 |
+
"_n_bytes": 2633047,
|
12739 |
+
"_n_tokens": 533924,
|
12740 |
+
"_n_chars": 927311,
|
12741 |
+
"_n_oov_chars": 0,
|
12742 |
+
"oov_ratio": 0.0,
|
12743 |
+
"_oov_charset": "[]",
|
12744 |
+
"lossless": false
|
12745 |
+
},
|
12746 |
+
"tencent/Hunyuan-1.8B-Instruct @ cc100/ar": {
|
12747 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-1.8B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-1.8B-Instruct</a>",
|
12748 |
+
"organization": "Tencent",
|
12749 |
+
"vocab_size": 120818,
|
12750 |
+
"_n_bytes": 2813283,
|
12751 |
+
"_n_tokens": 741999,
|
12752 |
+
"_n_chars": 1560987,
|
12753 |
+
"_n_oov_chars": 0,
|
12754 |
+
"oov_ratio": 0.0,
|
12755 |
+
"_oov_charset": "[]",
|
12756 |
+
"lossless": false
|
12757 |
+
},
|
12758 |
+
"tencent/Hunyuan-1.8B-Instruct @ cc100/de": {
|
12759 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-1.8B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-1.8B-Instruct</a>",
|
12760 |
+
"organization": "Tencent",
|
12761 |
+
"vocab_size": 120818,
|
12762 |
+
"_n_bytes": 1814876,
|
12763 |
+
"_n_tokens": 543757,
|
12764 |
+
"_n_chars": 1784021,
|
12765 |
+
"_n_oov_chars": 0,
|
12766 |
+
"oov_ratio": 0.0,
|
12767 |
+
"_oov_charset": "[]",
|
12768 |
+
"lossless": false
|
12769 |
+
},
|
12770 |
+
"tencent/Hunyuan-1.8B-Instruct @ cc100/en": {
|
12771 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-1.8B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-1.8B-Instruct</a>",
|
12772 |
+
"organization": "Tencent",
|
12773 |
+
"vocab_size": 120818,
|
12774 |
+
"_n_bytes": 1124813,
|
12775 |
+
"_n_tokens": 258324,
|
12776 |
+
"_n_chars": 1121360,
|
12777 |
+
"_n_oov_chars": 0,
|
12778 |
+
"oov_ratio": 0.0,
|
12779 |
+
"_oov_charset": "[]",
|
12780 |
+
"lossless": false
|
12781 |
+
},
|
12782 |
+
"tencent/Hunyuan-1.8B-Instruct @ cc100/es": {
|
12783 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-1.8B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-1.8B-Instruct</a>",
|
12784 |
+
"organization": "Tencent",
|
12785 |
+
"vocab_size": 120818,
|
12786 |
+
"_n_bytes": 1664455,
|
12787 |
+
"_n_tokens": 478626,
|
12788 |
+
"_n_chars": 1630297,
|
12789 |
+
"_n_oov_chars": 0,
|
12790 |
+
"oov_ratio": 0.0,
|
12791 |
+
"_oov_charset": "[]",
|
12792 |
+
"lossless": false
|
12793 |
+
},
|
12794 |
+
"tencent/Hunyuan-1.8B-Instruct @ cc100/fa": {
|
12795 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-1.8B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-1.8B-Instruct</a>",
|
12796 |
+
"organization": "Tencent",
|
12797 |
+
"vocab_size": 120818,
|
12798 |
+
"_n_bytes": 2054052,
|
12799 |
+
"_n_tokens": 628431,
|
12800 |
+
"_n_chars": 1145876,
|
12801 |
+
"_n_oov_chars": 0,
|
12802 |
+
"oov_ratio": 0.0,
|
12803 |
+
"_oov_charset": "[]",
|
12804 |
+
"lossless": false
|
12805 |
+
},
|
12806 |
+
"tencent/Hunyuan-1.8B-Instruct @ cc100/fr": {
|
12807 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-1.8B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-1.8B-Instruct</a>",
|
12808 |
+
"organization": "Tencent",
|
12809 |
+
"vocab_size": 120818,
|
12810 |
+
"_n_bytes": 1540504,
|
12811 |
+
"_n_tokens": 438575,
|
12812 |
+
"_n_chars": 1484970,
|
12813 |
+
"_n_oov_chars": 0,
|
12814 |
+
"oov_ratio": 0.0,
|
12815 |
+
"_oov_charset": "[]",
|
12816 |
+
"lossless": false
|
12817 |
+
},
|
12818 |
+
"tencent/Hunyuan-1.8B-Instruct @ cc100/ja": {
|
12819 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-1.8B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-1.8B-Instruct</a>",
|
12820 |
+
"organization": "Tencent",
|
12821 |
+
"vocab_size": 120818,
|
12822 |
+
"_n_bytes": 1774770,
|
12823 |
+
"_n_tokens": 464604,
|
12824 |
+
"_n_chars": 603065,
|
12825 |
+
"_n_oov_chars": 0,
|
12826 |
+
"oov_ratio": 0.0,
|
12827 |
+
"_oov_charset": "[]",
|
12828 |
+
"lossless": false
|
12829 |
+
},
|
12830 |
+
"tencent/Hunyuan-1.8B-Instruct @ cc100/ko": {
|
12831 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-1.8B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-1.8B-Instruct</a>",
|
12832 |
+
"organization": "Tencent",
|
12833 |
+
"vocab_size": 120818,
|
12834 |
+
"_n_bytes": 1524839,
|
12835 |
+
"_n_tokens": 483060,
|
12836 |
+
"_n_chars": 655190,
|
12837 |
+
"_n_oov_chars": 0,
|
12838 |
+
"oov_ratio": 0.0,
|
12839 |
+
"_oov_charset": "[]",
|
12840 |
+
"lossless": false
|
12841 |
+
},
|
12842 |
+
"tencent/Hunyuan-1.8B-Instruct @ cc100/zh-Hans": {
|
12843 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tencent/Hunyuan-1.8B-Instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Hunyuan-1.8B-Instruct</a>",
|
12844 |
+
"organization": "Tencent",
|
12845 |
+
"vocab_size": 120818,
|
12846 |
+
"_n_bytes": 2633047,
|
12847 |
+
"_n_tokens": 533924,
|
12848 |
+
"_n_chars": 927311,
|
12849 |
+
"_n_oov_chars": 0,
|
12850 |
+
"oov_ratio": 0.0,
|
12851 |
+
"_oov_charset": "[]",
|
12852 |
+
"lossless": false
|
12853 |
+
},
|
12854 |
+
"zai-org/GLM-4.5 @ cc100/ar": {
|
12855 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/zai-org/GLM-4.5\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">GLM-4.5</a>",
|
12856 |
+
"organization": "Zai",
|
12857 |
+
"vocab_size": 151365,
|
12858 |
+
"_n_bytes": 2813283,
|
12859 |
+
"_n_tokens": 659822,
|
12860 |
+
"_n_chars": 1560987,
|
12861 |
+
"_n_oov_chars": 0,
|
12862 |
+
"oov_ratio": 0.0,
|
12863 |
+
"_oov_charset": "[]",
|
12864 |
+
"lossless": true
|
12865 |
+
},
|
12866 |
+
"zai-org/GLM-4.5 @ cc100/de": {
|
12867 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/zai-org/GLM-4.5\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">GLM-4.5</a>",
|
12868 |
+
"organization": "Zai",
|
12869 |
+
"vocab_size": 151365,
|
12870 |
+
"_n_bytes": 1814876,
|
12871 |
+
"_n_tokens": 457973,
|
12872 |
+
"_n_chars": 1784021,
|
12873 |
+
"_n_oov_chars": 0,
|
12874 |
+
"oov_ratio": 0.0,
|
12875 |
+
"_oov_charset": "[]",
|
12876 |
+
"lossless": true
|
12877 |
+
},
|
12878 |
+
"zai-org/GLM-4.5 @ cc100/en": {
|
12879 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/zai-org/GLM-4.5\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">GLM-4.5</a>",
|
12880 |
+
"organization": "Zai",
|
12881 |
+
"vocab_size": 151365,
|
12882 |
+
"_n_bytes": 1124813,
|
12883 |
+
"_n_tokens": 255149,
|
12884 |
+
"_n_chars": 1121360,
|
12885 |
+
"_n_oov_chars": 0,
|
12886 |
+
"oov_ratio": 0.0,
|
12887 |
+
"_oov_charset": "[]",
|
12888 |
+
"lossless": true
|
12889 |
+
},
|
12890 |
+
"zai-org/GLM-4.5 @ cc100/es": {
|
12891 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/zai-org/GLM-4.5\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">GLM-4.5</a>",
|
12892 |
+
"organization": "Zai",
|
12893 |
+
"vocab_size": 151365,
|
12894 |
+
"_n_bytes": 1664455,
|
12895 |
+
"_n_tokens": 405519,
|
12896 |
+
"_n_chars": 1630297,
|
12897 |
+
"_n_oov_chars": 0,
|
12898 |
+
"oov_ratio": 0.0,
|
12899 |
+
"_oov_charset": "[]",
|
12900 |
+
"lossless": true
|
12901 |
+
},
|
12902 |
+
"zai-org/GLM-4.5 @ cc100/fa": {
|
12903 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/zai-org/GLM-4.5\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">GLM-4.5</a>",
|
12904 |
+
"organization": "Zai",
|
12905 |
+
"vocab_size": 151365,
|
12906 |
+
"_n_bytes": 2054052,
|
12907 |
+
"_n_tokens": 453136,
|
12908 |
+
"_n_chars": 1145876,
|
12909 |
+
"_n_oov_chars": 0,
|
12910 |
+
"oov_ratio": 0.0,
|
12911 |
+
"_oov_charset": "[]",
|
12912 |
+
"lossless": true
|
12913 |
+
},
|
12914 |
+
"zai-org/GLM-4.5 @ cc100/fr": {
|
12915 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/zai-org/GLM-4.5\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">GLM-4.5</a>",
|
12916 |
+
"organization": "Zai",
|
12917 |
+
"vocab_size": 151365,
|
12918 |
+
"_n_bytes": 1540504,
|
12919 |
+
"_n_tokens": 391279,
|
12920 |
+
"_n_chars": 1484970,
|
12921 |
+
"_n_oov_chars": 0,
|
12922 |
+
"oov_ratio": 0.0,
|
12923 |
+
"_oov_charset": "[]",
|
12924 |
+
"lossless": true
|
12925 |
+
},
|
12926 |
+
"zai-org/GLM-4.5 @ cc100/ja": {
|
12927 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/zai-org/GLM-4.5\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">GLM-4.5</a>",
|
12928 |
+
"organization": "Zai",
|
12929 |
+
"vocab_size": 151365,
|
12930 |
+
"_n_bytes": 1774770,
|
12931 |
+
"_n_tokens": 410014,
|
12932 |
+
"_n_chars": 603065,
|
12933 |
+
"_n_oov_chars": 0,
|
12934 |
+
"oov_ratio": 0.0,
|
12935 |
+
"_oov_charset": "[]",
|
12936 |
+
"lossless": true
|
12937 |
+
},
|
12938 |
+
"zai-org/GLM-4.5 @ cc100/ko": {
|
12939 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/zai-org/GLM-4.5\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">GLM-4.5</a>",
|
12940 |
+
"organization": "Zai",
|
12941 |
+
"vocab_size": 151365,
|
12942 |
+
"_n_bytes": 1524839,
|
12943 |
+
"_n_tokens": 532358,
|
12944 |
+
"_n_chars": 655190,
|
12945 |
+
"_n_oov_chars": 0,
|
12946 |
+
"oov_ratio": 0.0,
|
12947 |
+
"_oov_charset": "[]",
|
12948 |
+
"lossless": true
|
12949 |
+
},
|
12950 |
+
"zai-org/GLM-4.5 @ cc100/zh-Hans": {
|
12951 |
+
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/zai-org/GLM-4.5\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">GLM-4.5</a>",
|
12952 |
+
"organization": "Zai",
|
12953 |
+
"vocab_size": 151365,
|
12954 |
+
"_n_bytes": 2633047,
|
12955 |
+
"_n_tokens": 556219,
|
12956 |
+
"_n_chars": 927311,
|
12957 |
+
"_n_oov_chars": 0,
|
12958 |
+
"oov_ratio": 0.0,
|
12959 |
+
"_oov_charset": "[]",
|
12960 |
+
"lossless": true
|
12961 |
}
|
12962 |
}
|
stats/compression_rate/Qwen.Qwen3-4B-Instruct-2507 @ cc100.ar.diff.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
stats/compression_rate/Qwen.Qwen3-4B-Instruct-2507 @ cc100.de.diff.json
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "Erstmals erreichte eine Raumsonde die Ceres. Mit den beiden Kameras an Bord erkunden die Wissenschaftler aus dem Max-Planck-Institut für Sonnensystemforschung in Göttingen die dunkle Oberfläche des Zwergplaneten. Wassereis haben sie schon entdeckt. Aber ruht tief unter den Kratern auch noch ein Ozean?",
|
4 |
+
"decoded_text": "Erstmals erreichte eine Raumsonde die Ceres. Mit den beiden Kameras an Bord erkunden die Wissenschaftler aus dem Max-Planck-Institut für Sonnensystemforschung in Göttingen die dunkle Oberfläche des Zwergplaneten. Wassereis haben sie schon entdeckt. Aber ruht tief unter den Kratern auch noch ein Ozean?",
|
5 |
+
"diff": [
|
6 |
+
"replace text[134:136] --> decoded_text[134:135] 'ü' --> 'ü'"
|
7 |
+
],
|
8 |
+
"n_oov_chars": 0,
|
9 |
+
"oov_ratio": 0.0,
|
10 |
+
"oov_charset": "[]"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"text": "Der Vergleich mit der bemannten Mondlandung mag ein wenig übertrieben erscheinen, doch zweifellos gehört Rosetta zu den kühnsten Unternehmen der Raumfahrt: Zum ersten Mal in der Geschichte begleitet eine Sonde einen Kometen auf seiner Bahn um die Sonne und soll Mitte November den Lander Philae auf dessen Oberfläche absetzen. Bei der Auswertung der Bilder und Daten von 67P/Churyumov-Gerasimenko, so der Name des Schweifsterns, sitzen Wissenschaftler des Göttinger Max-Planck-Instituts für Sonnensystemforschung in der ersten Reihe.",
|
14 |
+
"decoded_text": "Der Vergleich mit der bemannten Mondlandung mag ein wenig übertrieben erscheinen, doch zweifellos gehört Rosetta zu den kühnsten Unternehmen der Raumfahrt: Zum ersten Mal in der Geschichte begleitet eine Sonde einen Kometen auf seiner Bahn um die Sonne und soll Mitte November den Lander Philae auf dessen Oberfläche absetzen. Bei der Auswertung der Bilder und Daten von 67P/Churyumov-Gerasimenko, so der Name des Schweifsterns, sitzen Wissenschaftler des Göttinger Max-Planck-Instituts für Sonnensystemforschung in der ersten Reihe.",
|
15 |
+
"diff": [
|
16 |
+
"replace text[488:490] --> decoded_text[488:489] 'ü' --> 'ü'"
|
17 |
+
],
|
18 |
+
"n_oov_chars": 0,
|
19 |
+
"oov_ratio": 0.0,
|
20 |
+
"oov_charset": "[]"
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"text": "Die Sonne ist der wichtigste Energiespender der Erde und Motor des Klimas. Doch sie schickt mal mehr, mal weniger Licht zur Erde. Astronomen um Natalie Krivova erfassen am Max-Planck-Institut für Sonnensystemforschung in Göttingen diese Schwankungen der Sonnenstrahlung in Modellen, um herauszufinden, ob die Veränderungen zur Erderwärmung beitragen oder ob sie ihr entgegenwirken.",
|
24 |
+
"decoded_text": "Die Sonne ist der wichtigste Energiespender der Erde und Motor des Klimas. Doch sie schickt mal mehr, mal weniger Licht zur Erde. Astronomen um Natalie Krivova erfassen am Max-Planck-Institut für Sonnensystemforschung in Göttingen diese Schwankungen der Sonnenstrahlung in Modellen, um herauszufinden, ob die Veränderungen zur Erderwärmung beitragen oder ob sie ihr entgegenwirken.",
|
25 |
+
"diff": [
|
26 |
+
"replace text[193:195] --> decoded_text[193:194] 'ü' --> 'ü'"
|
27 |
+
],
|
28 |
+
"n_oov_chars": 0,
|
29 |
+
"oov_ratio": 0.0,
|
30 |
+
"oov_charset": "[]"
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"text": "Zum Glückwunsch haben Sie in den nächsten Wochen eine doppelte Gelegenheit:",
|
34 |
+
"decoded_text": "Zum Glückwunsch haben Sie in den nächsten Wochen eine doppelte Gelegenheit:",
|
35 |
+
"diff": [
|
36 |
+
"replace text[6:8] --> decoded_text[6:7] 'ü' --> 'ü'",
|
37 |
+
"replace text[35:37] --> decoded_text[34:35] 'ä' --> 'ä'"
|
38 |
+
],
|
39 |
+
"n_oov_chars": 0,
|
40 |
+
"oov_ratio": 0.0,
|
41 |
+
"oov_charset": "[]"
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"text": "Zum einen haben wir im März einen neuen Kirchenvorstand gewählt: Jüngere und Ältere, Erfahrene und Neue mischen sich zu einer Gruppe, die die Verantwortung für die Gemeinde übernimmt. Ich bitte Sie und Euch alle, allen Kandidat*nnen zu danken, sie auf der Straße anzusprechen und zu beglückwünschen: Denn es ist nicht selbstverständlich, für ein Amt von sechs Jahren Dauer zu kandidieren.",
|
45 |
+
"decoded_text": "Zum einen haben wir im März einen neuen Kirchenvorstand gewählt: Jüngere und Ältere, Erfahrene und Neue mischen sich zu einer Gruppe, die die Verantwortung für die Gemeinde übernimmt. Ich bitte Sie und Euch alle, allen Kandidat*nnen zu danken, sie auf der Straße anzusprechen und zu beglückwünschen: Denn es ist nicht selbstverständlich, für ein Amt von sechs Jahren Dauer zu kandidieren.",
|
46 |
+
"diff": [
|
47 |
+
"replace text[24:26] --> decoded_text[24:25] 'ä' --> 'ä'",
|
48 |
+
"replace text[61:63] --> decoded_text[60:61] 'ä' --> 'ä'",
|
49 |
+
"replace text[69:71] --> decoded_text[67:68] 'ü' --> 'ü'",
|
50 |
+
"replace text[81:83] --> decoded_text[78:79] 'Ä' --> 'Ä'",
|
51 |
+
"replace text[162:164] --> decoded_text[158:159] 'ü' --> 'ü'",
|
52 |
+
"replace text[180:182] --> decoded_text[175:176] 'ü' --> 'ü'",
|
53 |
+
"replace text[295:297] --> decoded_text[289:290] 'ü' --> 'ü'",
|
54 |
+
"replace text[301:303] --> decoded_text[294:295] 'ü' --> 'ü'",
|
55 |
+
"replace text[340:342] --> decoded_text[332:333] 'ä' --> 'ä'",
|
56 |
+
"replace text[352:354] --> decoded_text[343:344] 'ü' --> 'ü'"
|
57 |
+
],
|
58 |
+
"n_oov_chars": 0,
|
59 |
+
"oov_ratio": 0.0,
|
60 |
+
"oov_charset": "[]"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"text": "Darüber freuen wir uns. Und wenn Sie je manden von denen kennen, freuen die sich, wenn Sie sie ansprechen und darin bestärken.",
|
64 |
+
"decoded_text": "Darüber freuen wir uns. Und wenn Sie je manden von denen kennen, freuen die sich, wenn Sie sie ansprechen und darin bestärken.",
|
65 |
+
"diff": [
|
66 |
+
"replace text[3:5] --> decoded_text[3:4] 'ü' --> 'ü'",
|
67 |
+
"replace text[122:124] --> decoded_text[121:122] 'ä' --> 'ä'"
|
68 |
+
],
|
69 |
+
"n_oov_chars": 0,
|
70 |
+
"oov_ratio": 0.0,
|
71 |
+
"oov_charset": "[]"
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"text": "Unsere Kirchengemeinde lebt davon, dass manche eine Aufgabe haben oder eine besondere Zeit erleben – wie den Konfirmandenunterricht. Die anderen aber daran teilhaben und sie unterstützen und ihnen Glück wünschen. Und Sie werden merken, dass es Sie selbst beglücken kann, anderen Glück zu wünschen!",
|
75 |
+
"decoded_text": "Unsere Kirchengemeinde lebt davon, dass manche eine Aufgabe haben oder eine besondere Zeit erleben – wie den Konfirmandenunterricht. Die anderen aber daran teilhaben und sie unterstützen und ihnen Glück wünschen. Und Sie werden merken, dass es Sie selbst beglücken kann, anderen Glück zu wünschen!",
|
76 |
+
"diff": [
|
77 |
+
"replace text[184:186] --> decoded_text[184:185] 'ü' --> 'ü'",
|
78 |
+
"replace text[203:205] --> decoded_text[202:203] 'ü' --> 'ü'",
|
79 |
+
"replace text[209:211] --> decoded_text[207:208] 'ü' --> 'ü'",
|
80 |
+
"replace text[265:267] --> decoded_text[262:263] 'ü' --> 'ü'",
|
81 |
+
"replace text[288:290] --> decoded_text[284:285] 'ü' --> 'ü'",
|
82 |
+
"replace text[297:299] --> decoded_text[292:293] 'ü' --> 'ü'"
|
83 |
+
],
|
84 |
+
"n_oov_chars": 0,
|
85 |
+
"oov_ratio": 0.0,
|
86 |
+
"oov_charset": "[]"
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"text": "‹ Herzlichen Dank für all Ihre Spenden!",
|
90 |
+
"decoded_text": "‹ Herzlichen Dank für all Ihre Spenden!",
|
91 |
+
"diff": [
|
92 |
+
"replace text[19:21] --> decoded_text[19:20] 'ü' --> 'ü'"
|
93 |
+
],
|
94 |
+
"n_oov_chars": 0,
|
95 |
+
"oov_ratio": 0.0,
|
96 |
+
"oov_charset": "[]"
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"text": "Bitte beachte: Du kannst die an uns erteile Einwilligung auch jederzeit widerrufen. Nutze dazu einfach unsere Kontaktmöglichkeiten. Durch den Widerruf wird die Rechtmäßigkeit der bis dahin erfolgten Verarbeitung nicht berührt.",
|
100 |
+
"decoded_text": "Bitte beachte: Du kannst die an uns erteile Einwilligung auch jederzeit widerrufen. Nutze dazu einfach unsere Kontaktmöglichkeiten. Durch den Widerruf wird die Rechtmäßigkeit der bis dahin erfolgten Verarbeitung nicht berührt.",
|
101 |
+
"diff": [
|
102 |
+
"replace text[166:168] --> decoded_text[166:167] 'ä' --> 'ä'",
|
103 |
+
"replace text[222:224] --> decoded_text[221:222] 'ü' --> 'ü'"
|
104 |
+
],
|
105 |
+
"n_oov_chars": 0,
|
106 |
+
"oov_ratio": 0.0,
|
107 |
+
"oov_charset": "[]"
|
108 |
+
}
|
109 |
+
]
|
stats/compression_rate/Qwen.Qwen3-4B-Instruct-2507 @ cc100.fa.diff.json
ADDED
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "رئيس حوزه علميه اصفهان گفت: دليل نزول اکثر مصائب رفتار خودِ انسان هاست؛ فقر، بيماري هاي لاعلاج و ناامني هاي اجتماعي، همه و همه نتيجه گناهاني است که اکثر افراد جامعه مرتکب مي شوند. به گزارش رسا، حضرت آيت الله حسين مظاهري در جلسه تفسير قرآن صبح ديروز که در مسجد اميرالمؤمنين(ع) خيابان جي برگزار شد، گفت: هميشه خير و صلاح افراد در بهره برداري بيشتر از دنيا نيست. وي در ادامه تفسير آيه صد و پنجاه و پنجم سوره بقره که مي فرمايد«وَلَنَبْلُوَنَّکُمْ بِشَيْءٍ مِنَ الْخَوْفِ وَالْجُوعِ وَنَقْصٍ مِنَ الأمْوَالِ وَالأنْفُسِ وَالثَّمَرَاتِ وَبَشِّرِ الصَّابِرِينَ» افزود: بلاها و مصائب با اين دنيا عجين شده است و اين بلاها در همه زمينه هاي فردي و اجتماعي جريان دارد و رفتار مردم در برابر اين بلاها و مصائب به دو بخش تقسيم مي شود، عده اي در برابر مصائب جزع و فزع مي کنند و گاهي منکر همه اعتقادات و باورهايشان مي شوند و عده اي در برابر همين بلاها صبر پيشه مي کنند و همه مصائب را از سوي خداوند مي بينند و به فعل خداوند راضي هستند. حضرت آيت الله مظاهري برخي از مصائب را از الطاف خفيه الهي دانست و خاطرنشان کرد: خداوند بر افرادي که در برابر مصائب صبر پيشه",
|
4 |
+
"decoded_text": "رئيس حوزه علميه اصفهان گفت: دليل نزول اکثر مصائب رفتار خودِ انسان هاست؛ فقر، بيماري هاي لاعلاج و ناامني هاي اجتماعي، همه و همه نتيجه گناهاني است که اکثر افراد جامعه مرتکب مي شوند. به گزارش رسا، حضرت آيت الله حسين مظاهري در جلسه تفسير قرآن صبح ديروز که در مسجد اميرالمؤمنين(ع) خيابان جي برگزار شد، گفت: هميشه خير و صلاح افراد در بهره برداري بيشتر از دنيا نيست. وي در ادامه تفسير آيه صد و پنجاه و پنجم سوره بقره که مي فرمايد«وَلَنَبْلُوَنَّکُمْ بِشَيْءٍ مِنَ الْخَوْفِ وَالْجُوعِ وَنَقْصٍ مِنَ الأمْوَالِ وَالأنْفُسِ وَالثَّمَرَاتِ وَبَشِّرِ الصَّابِرِينَ» افزود: بلاها و مصائب با اين دنيا عجين شده است و اين بلاها در همه زمينه هاي فردي و اجتماعي جريان دارد و رفتار مردم در برابر اين بلاها و مصائب به دو بخش تقسيم مي شود، عده اي در برابر مصائب جزع و فزع مي کنند و گاهي منکر همه اعتقادات و باورهايشان مي شوند و عده اي در برابر همين بلاها صبر پيشه مي کنند و همه مصائب را از سوي خداوند مي بينند و به فعل خداوند راضي هستند. حضرت آيت الله مظاهري برخي از مصائب را از الطاف خفيه الهي دانست و خاطرنشان کرد: خداوند بر افرادي که در برابر مصائب صبر پيشه",
|
5 |
+
"diff": [
|
6 |
+
"insert text[436:436] --> decoded_text[436:437] '' --> 'َ'",
|
7 |
+
"delete text[437:438] --> decoded_text[438:438] 'َ' --> ''",
|
8 |
+
"insert text[520:520] --> decoded_text[520:521] '' --> 'َ'",
|
9 |
+
"delete text[521:522] --> decoded_text[522:522] 'َ' --> ''",
|
10 |
+
"insert text[543:543] --> decoded_text[543:544] '' --> 'َ'",
|
11 |
+
"delete text[544:545] --> decoded_text[545:545] 'َ' --> ''"
|
12 |
+
],
|
13 |
+
"n_oov_chars": 0,
|
14 |
+
"oov_ratio": 0.0,
|
15 |
+
"oov_charset": "[]"
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"text": "«رِجَالٌ لا تُلْهِیهِمْ تجَارَةٌ وَ لا بَیْعٌ عَن ذِکْرِ اللهِ وَ إِقَامِ الصَّلَوةِ وَ إِیتَاءِ الزَّکَوةِ ـ مردانی که نه تجارت و نه معاملهای آنان را از یاد خدا و برپاداشتن نماز و ادای زکات غافل نمیکند.»[1]",
|
19 |
+
"decoded_text": "«رِجَالٌ لا تُلْهِیهِمْ تجَارَةٌ وَ لا بَیْعٌ عَن ذِکْرِ اللهِ وَ إِقَامِ الصَّلَوةِ وَ إِیتَاءِ الزَّکَوةِ ـ مردانی که نه تجارت و نه معاملهای آنان را از یاد خدا و برپاداشتن نماز و ادای زکات غافل نمیکند.»[1]",
|
20 |
+
"diff": [
|
21 |
+
"insert text[77:77] --> decoded_text[77:78] '' --> 'َ'",
|
22 |
+
"delete text[78:79] --> decoded_text[79:79] 'َ' --> ''",
|
23 |
+
"insert text[100:100] --> decoded_text[100:101] '' --> 'َ'",
|
24 |
+
"delete text[101:102] --> decoded_text[102:102] 'َ' --> ''"
|
25 |
+
],
|
26 |
+
"n_oov_chars": 0,
|
27 |
+
"oov_ratio": 0.0,
|
28 |
+
"oov_charset": "[]"
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"text": "«وَ إِمَّا یَنْزَغَنَّکَ مِنَ الشَّیْطانِ نَزْغٌ فَاسْتَعِذْ بِاللهِ إِنَّهُ سَمِیعٌ عَلِیمٌ ـ و هر گاه وسوسهای از شیطان به تو رسد، به خدا پناه ببر که او شنوای داناست.»[19]",
|
32 |
+
"decoded_text": "«وَ إِمَّا یَنْزَغَنَّکَ مِنَ الشَّیْطانِ نَزْغٌ فَاسْتَعِذْ بِاللهِ إِنَّهُ سَمِیعٌ عَلِیمٌ ـ و هر گاه وسوسهای از شیطان به تو رسد، به خدا پناه ببر که او شنوای داناست.»[19]",
|
33 |
+
"diff": [
|
34 |
+
"insert text[7:7] --> decoded_text[7:8] '' --> 'َ'",
|
35 |
+
"delete text[8:9] --> decoded_text[9:9] 'َ' --> ''",
|
36 |
+
"insert text[20:20] --> decoded_text[20:21] '' --> 'َ'",
|
37 |
+
"delete text[21:22] --> decoded_text[22:22] 'َ' --> ''",
|
38 |
+
"insert text[33:33] --> decoded_text[33:34] '' --> 'َ'",
|
39 |
+
"delete text[34:35] --> decoded_text[35:35] 'َ' --> ''",
|
40 |
+
"insert text[72:72] --> decoded_text[72:73] '' --> 'َ'",
|
41 |
+
"delete text[73:74] --> decoded_text[74:74] 'َ' --> ''"
|
42 |
+
],
|
43 |
+
"n_oov_chars": 0,
|
44 |
+
"oov_ratio": 0.0,
|
45 |
+
"oov_charset": "[]"
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"text": "«وَ قُلْ رَبِّ أَعُوذُ بِکَ مِنْ هَمَزاتِ الشَّیَاطِینِ* وَ أَعُوذُ بِکَ رَبِّ أَنْ یَحْضُرُونِ ـ بگو ای پروردگار من؛ پناه میبرم به تو از وسوسههای شیاطین* و پناه میبرم به تو ای پروردگار من؛ از اینکه حاضر شوند.»([20]",
|
49 |
+
"decoded_text": "«وَ قُلْ رَبِّ أَعُوذُ بِکَ مِنْ هَمَزاتِ الشَّیَاطِینِ* وَ أَعُوذُ بِکَ رَبِّ أَنْ یَحْضُرُونِ ـ بگو ای پروردگار من؛ پناه میبرم به تو از وسوسههای شیاطین* و پناه میبرم به تو ای پروردگار من؛ از اینکه حاضر شوند.»([20]",
|
50 |
+
"diff": [
|
51 |
+
"insert text[12:12] --> decoded_text[12:13] '' --> 'ِ'",
|
52 |
+
"delete text[13:14] --> decoded_text[14:14] 'ِ' --> ''",
|
53 |
+
"insert text[45:45] --> decoded_text[45:46] '' --> 'َ'",
|
54 |
+
"delete text[46:47] --> decoded_text[47:47] 'َ' --> ''",
|
55 |
+
"insert text[76:76] --> decoded_text[76:77] '' --> 'ِ'",
|
56 |
+
"delete text[77:78] --> decoded_text[78:78] 'ِ' --> ''"
|
57 |
+
],
|
58 |
+
"n_oov_chars": 0,
|
59 |
+
"oov_ratio": 0.0,
|
60 |
+
"oov_charset": "[]"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"text": "امیرالمؤمنین علیهالسلام فرمودهاند: «هنگامی که شیطان یکی از شما را وسوسه نمود، باید به خدا پناه ببرد و بگوید: آمَنْتُ بِاللهِ وَ بِرَسُولِهِ مُخْلِصاً لَهُ الدِّینَ»[23]",
|
64 |
+
"decoded_text": "امیرالمؤمنین علیهالسلام فرمودهاند: «هنگامی که شیطان یکی از شما را وسوسه نمود، باید به خدا پناه ببرد و بگوید: آمَنْتُ بِاللهِ وَ بِرَسُولِهِ مُخْلِصاً لَهُ الدِّینَ»[23]",
|
65 |
+
"diff": [
|
66 |
+
"insert text[161:161] --> decoded_text[161:162] '' --> 'ِ'",
|
67 |
+
"delete text[162:163] --> decoded_text[163:163] 'ِ' --> ''"
|
68 |
+
],
|
69 |
+
"n_oov_chars": 0,
|
70 |
+
"oov_ratio": 0.0,
|
71 |
+
"oov_charset": "[]"
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"text": "حضرت صادق علیهالسلام فرمود: مردی خدمت رسول خدا آمد و گفت: یا رسول الله؛ از وسوسهای که در نماز به من القا میشود شکایت دارم. حتی اینکه نمیدانم چهقدر نماز کردم از زیاده یا کم. حضرت فرمودند: «وقتی داخل نماز شدی، انگشت سبّابهی دست راستت را به ران پای چپت بزن و سپس بگو: بِسْمِ اللهِ وَ بِاللهِ تَوَکَّلْتُ عَلَی اللهِ أَعُوذُ بِاللهِ السَّمِیعِ الْعَلِیمِ مِنَ الشَّیْطَانِ الرَّجِیمِ ـ پس همانا او را دور و از خود منع و طرد کنی.»[24]",
|
75 |
+
"decoded_text": "حضرت صادق علیهالسلام فرمود: مردی خدمت رسول خدا آمد و گفت: یا رسول الله؛ از وسوسهای که در نماز به من القا میشود شکایت دارم. حتی اینکه نمیدانم چهقدر نماز کردم از زیاده یا کم. حضرت فرمودند: «وقتی داخل نماز شدی، انگشت سبّابهی دست راستت را به ران پای چپت بزن و سپس بگو: بِسْمِ اللهِ وَ بِاللهِ تَوَکَّلْتُ عَلَی اللهِ أَعُوذُ بِاللهِ السَّمِیعِ الْعَلِیمِ مِنَ الشَّیْطَانِ الرَّجِیمِ ـ پس همانا او را دور و از خود منع و طرد کنی.»[24]",
|
76 |
+
"diff": [
|
77 |
+
"insert text[301:301] --> decoded_text[301:302] '' --> 'َ'",
|
78 |
+
"delete text[302:303] --> decoded_text[303:303] 'َ' --> ''",
|
79 |
+
"insert text[339:339] --> decoded_text[339:340] '' --> 'َ'",
|
80 |
+
"delete text[340:341] --> decoded_text[341:341] 'َ' --> ''",
|
81 |
+
"insert text[366:366] --> decoded_text[366:367] '' --> 'َ'",
|
82 |
+
"delete text[367:368] --> decoded_text[368:368] 'َ' --> ''",
|
83 |
+
"insert text[379:379] --> decoded_text[379:380] '' --> 'َ'",
|
84 |
+
"delete text[380:381] --> decoded_text[381:381] 'َ' --> ''"
|
85 |
+
],
|
86 |
+
"n_oov_chars": 0,
|
87 |
+
"oov_ratio": 0.0,
|
88 |
+
"oov_charset": "[]"
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"text": "آموزگاران بهترین مردم اند . هرگاه یاد [خدا] کهنه مى شود، تجدیدش مى کنند . به آنان عطا کنید ولى آنها را به مزدورى نگیرید که بر آنان سخت مى آید. کد خبر: ۷۴۸۰۲. تاریخ: ۱۸ اردیبهشت ۱۳۹۴ - ۰۹:۲۱. رسول خدا صلى الله علیه و آله :. المُعَلِّمونَ خَیرُ النّاسِ کُلَّما أخلَقَ الذِّکرُ جَدَّدوهُ، أعطوهُم ولا تَستَأجِروهُم فَتُحرِجوهُم؛. آموزگاران بهترین مردم اند . هرگاه یاد [خدا] کهنه مى شود، تجدیدش. مى کنند . به آنان عطا کنید ولى آنها را به مزدورى نگیرید که بر آنان سخت. مى آید. الفردوس : ۴ / ۱۹۳ / ۶۵۹۷ . علم و حکمت ج 2، ص 626. امام هادی علیه السلام:.",
|
92 |
+
"decoded_text": "آموزگاران بهترین مردم اند . هرگاه یاد [خدا] کهنه مى شود، تجدیدش مى کنند . به آنان عطا کنید ولى آنها را به مزدورى نگیرید که بر آنان سخت مى آید. کد خبر: ۷۴۸۰۲. تاریخ: ۱۸ اردیبهشت ۱۳۹۴ - ۰۹:۲۱. رسول خدا صلى الله علیه و آله :. المُعَلِّمونَ خَیرُ النّاسِ کُلَّما أخلَقَ الذِّکرُ جَدَّدوهُ، أعطوهُم ولا تَستَأجِروهُم فَتُحرِجوهُم؛. آموزگاران بهترین مردم اند . هرگاه یاد [خدا] کهنه مى شود، تجدیدش. مى کنند . به آنان عطا کنید ولى آنها را به مزدورى نگیرید که بر آنان سخت. مى آید. الفردوس : ۴ / ۱۹۳ / ۶۵۹۷ . علم و حکمت ج 2، ص 626. امام هادی علیه السلام:.",
|
93 |
+
"diff": [
|
94 |
+
"insert text[230:230] --> decoded_text[230:231] '' --> 'ِ'",
|
95 |
+
"insert text[231:231] --> decoded_text[232:249] '' --> 'مونَ خَیرُ النّاس'",
|
96 |
+
"replace text[232:246] --> decoded_text[250:255] 'مونَ خَیرُ الن' --> ' کُلَ'",
|
97 |
+
"replace text[247:249] --> decoded_text[256:269] 'اس' --> 'ما أخلَقَ الذ'",
|
98 |
+
"delete text[250:254] --> decoded_text[270:270] ' کُل' --> ''",
|
99 |
+
"replace text[255:269] --> decoded_text[271:279] 'َما أخلَقَ الذ' --> 'کرُ جَدَ'",
|
100 |
+
"delete text[270:280] --> decoded_text[280:280] 'ِکرُ جَدَّ' --> ''"
|
101 |
+
],
|
102 |
+
"n_oov_chars": 0,
|
103 |
+
"oov_ratio": 0.0,
|
104 |
+
"oov_charset": "[]"
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"text": "آن گاه که در روز قیامت برانگیخته شوم، گناهکاران امّت پیامبر اسلام را شفاعت خواهم کرد. کد خبر: ۷۲۳۰۷. تاریخ: ۲۱ فروردین ۱۳۹۴ - ۰۶:۰۰. حضرت فاطمه علیها السلام :. إذا حُشِرتُ یَومَ القِیامَةِ أشفَعُ عُصاةَ اُمَّةِ النَّبِیِّ صلی الله علیه و آله؛. آن گاه که در روز قیامت برانگیخته شوم، گناهکاران امّت پیامبر اسلام را شفاعت خواهم کرد. إحقاق الحقّ، ج 19، ص 129 ؛ آینه یادها ص 172. امام على علیه السلام:. ضادُّوا التَّوانِیَ بِالعَزمِ؛. از راه تصمیم راسخ گرفتن، با سستى نبرد کنید. عیون الحکم والمواعظ: ص ۳۱۰ ح ۵۴۵۴ / میزان الحکمه: ج10 ص134.",
|
108 |
+
"decoded_text": "آن گاه که در روز قیامت برانگیخته شوم، گن��هکاران امّت پیامبر اسلام را شفاعت خواهم کرد. کد خبر: ۷۲۳۰۷. تاریخ: ۲۱ فروردین ۱۳۹۴ - ۰۶:۰۰. حضرت فاطمه علیها السلام :. إذا حُشِرتُ یَومَ القِیامَةِ أشفَعُ عُصاةَ اُمَّةِ النَّبِیِّ صلی الله علیه و آله؛. آن گاه که در روز قیامت برانگیخته شوم، گناهکاران امّت پیامبر اسلام را شفاعت خواهم کرد. إحقاق الحقّ، ج 19، ص 129 ؛ آینه یادها ص 172. امام على علیه السلام:. ضادُّوا التَّوانِیَ بِالعَزمِ؛. از راه تصمیم راسخ گرفتن، با سستى نبرد کنید. عیون الحکم والمواعظ: ص ۳۱۰ ح ۵۴۵۴ / میزان الحکمه: ج10 ص134.",
|
109 |
+
"diff": [
|
110 |
+
"replace text[207:209] --> decoded_text[207:209] 'َّ' --> 'َّ'",
|
111 |
+
"replace text[215:222] --> decoded_text[215:222] 'َّبِیِّ' --> 'َّبِیِّ'",
|
112 |
+
"delete text[402:403] --> decoded_text[402:402] 'ّ' --> ''",
|
113 |
+
"replace text[404:412] --> decoded_text[403:412] 'وا التَّ' --> 'ّوا التَّ'"
|
114 |
+
],
|
115 |
+
"n_oov_chars": 0,
|
116 |
+
"oov_ratio": 0.0,
|
117 |
+
"oov_charset": "[]"
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"text": "آن گاه که وقت خروج قائم مىشود، منادىاى از آسمان ندا مىدهد: «اى مردم ! مدّت حکومت جبّاران بر شما، به پایان رسید و بهترین فرد امّت محمّد، حکومت را به دست گرفته است، پس به مکّه بروید». کد خبر: ۷۱۵۹۷. تاریخ: ۱۲ فروردین ۱۳۹۴ - ۰۶:۰۰. پیامبر صلى الله علیه وآله:. إذا. کانَ عِندَ خُروجِ القائِمِ یُنادی مُنادٍ مِنَ السَّماءِ: أیُّهَا. النّاسُ! قَطَعَ عَنکُم مُدَّةُ الجَبّارینَ ووَلِیَ الأَمرَ خَیرُ اُمَّةِ. مُحَمَّدٍ فَالحَقوا بِمَکَّةَ؛. آن گاه که وقت خروج قائم مىشود، منادىاى از آسمان ندا مىدهد:. «اى مردم ! مدّت حکومت جبّاران بر شما، به",
|
121 |
+
"decoded_text": "آن گاه که وقت خروج قائم مىشود، منادىاى از آسمان ندا مىدهد: «اى مردم ! مدّت حکومت جبّاران بر شما، به پایان رسید و بهترین فرد امّت محمّد، حکومت را به دست گرفته است، پس به مکّه بروید». کد خبر: ۷۱۵۹۷. تاریخ: ۱۲ فروردین ۱۳۹۴ - ۰۶:۰۰. پیامبر صلى الله علیه وآله:. إذا. کانَ عِندَ خُروجِ القائِمِ یُنادی مُنادٍ مِنَ السَّماءِ: أیُّهَا. النّاسُ! قَطَعَ عَنکُم مُدَّةُ الجَبّارینَ ووَلِیَ الأَمرَ خَیرُ اُمَّةِ. مُحَمَّدٍ فَالحَقوا بِمَکَّةَ؛. آن گاه که وقت خروج قائم مىشود، منادىاى از آسمان ندا مىدهد:. «اى مردم ! مدّت حکومت جبّاران بر شما، به",
|
122 |
+
"diff": [
|
123 |
+
"replace text[321:323] --> decoded_text[321:323] 'َّ' --> 'َّ'",
|
124 |
+
"replace text[331:333] --> decoded_text[331:333] 'ُّ' --> 'ُّ'",
|
125 |
+
"replace text[364:366] --> decoded_text[364:366] 'َّ' --> 'َّ'",
|
126 |
+
"replace text[406:408] --> decoded_text[406:408] 'َّ' --> 'َّ'",
|
127 |
+
"replace text[417:419] --> decoded_text[417:419] 'َّ' --> 'َّ'",
|
128 |
+
"replace text[437:439] --> decoded_text[437:439] 'َّ' --> 'َّ'"
|
129 |
+
],
|
130 |
+
"n_oov_chars": 0,
|
131 |
+
"oov_ratio": 0.0,
|
132 |
+
"oov_charset": "[]"
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"text": "آن که سازش و مدارا را ترک کند، ناگوارى به او روى آورَد. کد خبر: ۷۳۵۳۸. تاریخ: ۰۲ اردیبهشت ۱۳۹۴ - ۰۶:۰۰. امام جواد(سلام الله علیه):. مَن هَجَرَ الْمُداراةَ قَاربَهُ المَکرُوهُ؛. آن که سازش و مدارا را ترک کند، ناگوارى به او روى آورَد. بحارالأنوار، ج 68، ص 341. پیامبر خدا(صلی الله علیه و آله):. لا تَخَفْ فِی اللَّهِ لَومَةَ لائمٍ؛. در راه خدا از ملامت و نکوهش ملامتگران نترس. معانى الأخبار، ص 335.",
|
136 |
+
"decoded_text": "آن که سازش و مدارا را ترک کند، ناگوارى به او روى آورَد. کد خبر: ۷۳۵۳۸. تاریخ: ۰۲ اردیبهشت ۱۳۹۴ - ۰۶:۰۰. امام جواد(سلام الله علیه):. مَن هَجَرَ الْمُداراةَ قَاربَهُ المَکرُوهُ؛. آن که سازش و مدارا را ترک کند، ناگوارى به او روى آورَد. بحارالأنوار، ج 68، ص 341. پیامبر خدا(صلی الله علیه و آله):. لا تَخَفْ فِی اللَّهِ لَومَةَ لائمٍ؛. در راه خدا از ملامت و نکوهش ملامتگران نترس. معانى الأخبار، ص 335.",
|
137 |
+
"diff": [
|
138 |
+
"insert text[310:310] --> decoded_text[310:311] '' --> 'َ'",
|
139 |
+
"delete text[311:312] --> decoded_text[312:312] 'َ' --> ''"
|
140 |
+
],
|
141 |
+
"n_oov_chars": 0,
|
142 |
+
"oov_ratio": 0.0,
|
143 |
+
"oov_charset": "[]"
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"text": "آنچه را به تو ربطی ندارد، رها کن و به کارى که رستگارت کند، مشغول شو. کد خبر: ۶۸۸۵۸. تاریخ: ۲۶ بهمن ۱۳۹۳ - ۰۶:۰۰. امام علی علیه السلام:. دَع ما لا یَعنِیکَ، وَ اشتَغِل بِمُهِمِّکَ الَّذی یُنجِیکَ؛. آنچه را به تو ربطی ندارد، رها کن و به کارى که رستگارت کند، مشغول شو. غرر الحکم: ح ۵۱۳۳/ گزیده غررالحکم و دررالکلم، ص52. امام على علیهالسلام :. عَظِّمُوا أقدارَکُم بِالتَّغافُلِ عَنِ الدَّنِیِّ مِنَ الاُْمُورِ ؛. با بى توجهى به امور پست، بر ارزش خود بیفزایید . تحف العقول ، ص 224.",
|
147 |
+
"decoded_text": "آنچه را به تو ربطی ندارد، رها کن و به کارى که رستگارت کند، مشغول شو. کد خبر: ۶۸۸۵۸. تاریخ: ۲۶ بهمن ۱۳۹۳ - ۰۶:۰۰. امام علی علیه السلام:. دَع ما لا یَعنِیکَ، وَ اشتَغِل بِمُهِمِّکَ الَّذی یُنجِیکَ؛. آنچه را به تو ربطی ندارد، رها کن و به کارى که رستگارت کند، مشغول شو. غرر الحکم: ح ۵۱۳۳/ گزیده غررالحکم و دررالکلم، ص52. امام على علیهالسلام :. عَظِّمُوا أقدارَکُم بِالتَّغافُلِ عَنِ الدَّنِیِّ مِنَ الاُْمُورِ ؛. با بى توجهى به امور پست، بر ارزش خود بیفزایید . تحف العقول ، ص 224.",
|
148 |
+
"diff": [
|
149 |
+
"replace text[174:183] --> decoded_text[174:183] 'ِّکَ الَّ' --> 'ِّکَ الَّ'",
|
150 |
+
"replace text[344:346] --> decoded_text[344:346] 'ِّ' --> 'ِّ'",
|
151 |
+
"replace text[366:368] --> decoded_text[366:368] 'َّ' --> 'َّ'",
|
152 |
+
"replace text[383:399] --> decoded_text[383:400] 'َّنِیِّ مِنَ الا' --> 'َّنِیِّ مِنَ الاُ'",
|
153 |
+
"delete text[400:401] --> decoded_text[401:401] 'ُ' --> ''"
|
154 |
+
],
|
155 |
+
"n_oov_chars": 0,
|
156 |
+
"oov_ratio": 0.0,
|
157 |
+
"oov_charset": "[]"
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"text": "اگر به آنچه تو را به آن فرمان میدهیم عمل کنی و از آنچه برحذر میداریم دوری کنی ، از شیعیان مایی و الّا هرگز. کد خبر: ۷۱۵۲۴. تاریخ: ۰۳ فروردین ۱۳۹۴ - ۰۶:۰۰. حضرت فاطمه علیها السلام :. إنْ کُنتَ تَعمَلُ بِما أمَرناکَ و تَنتَهی عَمّا زَجَرناکَ عَنهُ فَأنتَ مِن شیعَتِنا و إلّا فَلا؛. اگر به آنچه تو را به آن فرمان میدهیم عمل کنی و از آنچه برحذر میداریم دوری کنی ، از شیعیان مایی و الّا هرگز. بحار الأنوار ، ج ۶۸ ، ص ۱۵۵ . امام على سلام الله علیه:. تَبارَکَ. اللّهُ الَّذی . . . أنشَأَ السَّحابَ الثِّقالَ ، فَأَهطَلَ دِیَمَها. وعَدَّدَ قِسَمَها",
|
161 |
+
"decoded_text": "اگر به آنچه تو را به آن فرمان میدهیم عمل کنی و از آنچه برحذر میداریم دوری کنی ، از شیعیان مایی و الّا هرگز. کد خبر: ۷۱۵۲۴. تاریخ: ۰۳ فروردین ۱۳۹۴ - ۰۶:۰۰. حضرت فاطمه علیها السلام :. إنْ کُنتَ تَعمَلُ بِما أمَرناکَ و تَنتَهی عَمّا زَجَرناکَ عَنهُ فَأنتَ مِن شیعَتِنا و إلّا فَلا؛. اگر به آنچه تو را به آن فرمان میدهیم عمل کنی و از آنچه برحذر میداریم دوری کنی ، از شیعیان مایی و الّا هرگز. بحار الأنوار ، ج ۶۸ ، ص ۱۵۵ . امام على سلام الله علیه:. تَبارَکَ. اللّهُ الَّذی . . . أنشَأَ السَّحابَ الثِّقالَ ، فَأَهطَلَ دِیَمَها. وعَدَّدَ قِسَمَها",
|
162 |
+
"diff": [
|
163 |
+
"replace text[470:472] --> decoded_text[470:472] 'َّ' --> 'َّ'",
|
164 |
+
"replace text[491:493] --> decoded_text[491:493] 'َّ' --> 'َّ'",
|
165 |
+
"delete text[501:502] --> decoded_text[501:501] 'ّ' --> ''",
|
166 |
+
"insert text[503:503] --> decoded_text[502:503] '' --> 'ّ'",
|
167 |
+
"replace text[534:536] --> decoded_text[534:536] 'َّ' --> 'َّ'"
|
168 |
+
],
|
169 |
+
"n_oov_chars": 0,
|
170 |
+
"oov_ratio": 0.0,
|
171 |
+
"oov_charset": "[]"
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"text": "اگر مىتوانى، همیشه با وضو باش. کد خبر: ۷۱۵۰۰. تاریخ: ۲۹ اسفند ۱۳۹۳ - ۱۲:۰۷. پیامبر اکرم (صلی الله علیه و آله):. إنِ استَطَعتَ أن تَکونَ أبَداً عَلى وُضوءٍ فَافعَل. اگر مىتوانى، همیشه با وضو باش. حکمت نامه پیامبر اعظم(ص): ج9- ص306 - ح 7205. امام باقر (سلام الله علیه) :. الکَسَلُ یُضِرُّ بِالدِّینِ والدُّنیا. تنبلى به دین و دنیا ضرر مىزند. میزان الحکمة: ج10- ص131- ح 17769.",
|
175 |
+
"decoded_text": "اگر مىتوانى، همیشه با وضو باش. کد خبر: ۷۱۵۰۰. تاریخ: ۲۹ اسفند ۱۳۹۳ - ۱۲:۰۷. پیامبر اکرم (صلی الله علیه و آله):. إنِ استَطَعتَ أن تَکونَ أبَداً عَلى وُضوءٍ فَافعَل. اگر مىتوانى، همیشه با وضو باش. حکمت نامه پیامبر اعظم(ص): ج9- ص306 - ح 7205. امام باقر (سلام الله علیه) :. الکَسَلُ یُضِرُّ بِالدِّینِ والدُّنیا. تنبلى به دین و دنیا ضرر مىزند. میزان الحکمة: ج10- ص131- ح 17769.",
|
176 |
+
"diff": [
|
177 |
+
"insert text[288:288] --> decoded_text[288:289] '' --> 'ُ'",
|
178 |
+
"replace text[289:296] --> decoded_text[290:297] 'ُ بِالد' --> ' بِالدِ'",
|
179 |
+
"replace text[297:306] --> decoded_text[298:307] 'ِینِ والد' --> 'ینِ والدُ'",
|
180 |
+
"delete text[307:308] --> decoded_text[308:308] 'ُ' --> ''"
|
181 |
+
],
|
182 |
+
"n_oov_chars": 0,
|
183 |
+
"oov_ratio": 0.0,
|
184 |
+
"oov_charset": "[]"
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"text": "اگر مىتوانید هر روز را نوروز کنید؛ یعنى در راه خدا به یکدیگر هدیه بدهید و با یکدیگر پیوند داشته باشید. کد خبر: ۷۱۵۰۱. تاریخ: ۰۱ فروردین ۱۳۹۴ - ۰۶:۰۰. پیامبر اکرم صلی الله علیه وآله. فَنَیرِزُوا إن قَدَرتُم کُلَّ یَومٍ یَعنی تَهادَوا و تَواصَلُوا فِی اللَّهِ؛. اگر مىتوانید هر روز را نوروز کنید؛ یعنى در راه خدا به یکدیگر هدیه بدهید و با یکدیگر پیوند داشته باشید. دعائم الإسلام: ج 2، ص 326. امام صادق سلام الله علیه. إنَّ یَومَ النَّیروزِ هُوَ الیَومُ الّذى أخَذَ اللَّهُ فیهِ مَواثیقَ العِبادِ أن یَعبُدوهُ. روز نوروز همان روزى است که خداوند از بند",
|
188 |
+
"decoded_text": "اگر مىتوانید هر روز را نوروز کنید؛ یعنى در راه خدا به یکدیگر هدیه بدهید و با یکدیگر پیوند داشته باشید. کد خبر: ۷۱۵۰۱. تاریخ: ۰۱ فروردین ۱۳۹۴ - ۰۶:۰۰. پیامبر اکرم صلی الله علیه وآله. فَنَیرِزُوا إن قَدَرتُم کُلَّ یَومٍ یَعنی تَهادَوا و تَواصَلُوا فِی اللَّهِ؛. اگر مىتوانید هر روز را نوروز کنید؛ یعنى در راه خدا به یکدیگر هدیه بدهید و با یکدیگر پیوند داشته باشید. دعائم الإسلام: ج 2، ص 326. امام صادق سلام الله علیه. إنَّ یَومَ النَّیروزِ هُوَ الیَومُ الّذى أخَذَ اللَّهُ فیهِ مَواثیقَ العِبادِ أن یَعبُدوهُ. روز نوروز همان روزى است که خداوند از بند",
|
189 |
+
"diff": [
|
190 |
+
"insert text[210:210] --> decoded_text[210:211] '' --> 'َ'",
|
191 |
+
"delete text[211:212] --> decoded_text[212:212] 'َ' --> ''",
|
192 |
+
"insert text[254:254] --> decoded_text[254:255] '' --> 'َ'",
|
193 |
+
"delete text[255:256] --> decoded_text[256:256] 'َ' --> ''",
|
194 |
+
"insert text[420:420] --> decoded_text[420:421] '' --> 'َ'",
|
195 |
+
"replace text[421:432] --> decoded_text[422:433] 'َ یَومَ الن' --> ' یَومَ النَ'",
|
196 |
+
"delete text[433:434] --> decoded_text[434:434] 'َ' --> ''",
|
197 |
+
"insert text[468:468] --> decoded_text[468:469] '' --> 'َ'",
|
198 |
+
"delete text[469:470] --> decoded_text[470:470] 'َ' --> ''"
|
199 |
+
],
|
200 |
+
"n_oov_chars": 0,
|
201 |
+
"oov_ratio": 0.0,
|
202 |
+
"oov_charset": "[]"
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"text": "امام باقر علیه السلام:. أحسِن؛ فَإِنّى لَم أرَ شَیئاً قَطُّ أشَدَّ طَلَباً ولا أسرَعَ دَرکاً مِن حَسَنَةٍ مُحدَثَةٍ لِذَنبٍ قَدیمٍ؛. نیکى کن؛ که بهراستى هرگز چیزى مانند کار نیکِ جدید را ندیدهام. که اینچنین، به تعقیب یک گناه قدیم برآید و با سرعت، خود را به آن برساند. [و آن را محو سازد]. علل الشرائع: ص ۵۹۹ ح ۴۹ / حکمتنامه حضرت عبدالعظیم الحسنی علیه السلام، ص175. امام صادق سلام الله علیه :. کَثرَةُ النَّومِ مَذهَبَةٌ للدِّینِ والدُّنیا؛. پرخوابى، دین و دنیا را از بین مىبرد. کافی : ج 5، ص 84، ح 1 / میزان الحکمة: ج 12 ، ص 493.",
|
206 |
+
"decoded_text": "امام باقر علیه السلام:. أحسِن؛ فَإِنّى لَم أرَ شَیئاً قَطُّ أشَدَّ طَلَباً ولا أسرَعَ دَرکاً مِن حَسَنَةٍ مُحدَثَةٍ لِذَنبٍ قَدیمٍ؛. نیکى کن؛ که بهراستى هرگز چیزى مانند کار نیکِ جدید را ندیدهام. که اینچنین، به تعقیب یک گناه قدیم برآید و با سرعت، خود را به آن برساند. [و آن را محو سازد]. علل الشرائع: ص ۵۹۹ ح ۴۹ / حکمتنامه حضرت عبدالعظیم الحسنی علیه السلام، ص175. امام صادق سلام الله علیه :. کَثرَةُ النَّومِ مَذهَبَةٌ للدِّینِ والدُّنیا؛. پرخوابى، دین و دنیا را از بین مىبرد. کافی : ج 5، ص 84، ح 1 / میزان الحکمة: ج 12 ، ص 493.",
|
207 |
+
"diff": [
|
208 |
+
"insert text[58:58] --> decoded_text[58:59] '' --> 'ُ'",
|
209 |
+
"delete text[59:60] --> decoded_text[60:60] 'ُ' --> ''",
|
210 |
+
"insert text[65:65] --> decoded_text[65:66] '' --> 'َ'",
|
211 |
+
"delete text[66:67] --> decoded_text[67:67] 'َ' --> ''",
|
212 |
+
"insert text[408:408] --> decoded_text[408:409] '' --> 'َ'",
|
213 |
+
"delete text[409:410] --> decoded_text[410:410] 'َ' --> ''",
|
214 |
+
"insert text[427:427] --> decoded_text[427:428] '' --> 'ِ'",
|
215 |
+
"replace text[428:437] --> decoded_text[429:438] 'ِینِ والد' --> 'ینِ والدُ'",
|
216 |
+
"delete text[438:439] --> decoded_text[439:439] 'ُ' --> ''"
|
217 |
+
],
|
218 |
+
"n_oov_chars": 0,
|
219 |
+
"oov_ratio": 0.0,
|
220 |
+
"oov_charset": "[]"
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"text": "امام باقر علیه السلام:. أقرَبُ ما یَکونُ العَبدُ مِنَ اللَّهِ إذا کانَ فِی الصَّلاةِ؛. نزدیکترین حالت بنده به خدا، هنگامى است که در نماز است. دعائم الإسلام: ج ۱ ص ۱۳۴/ شناختنامه نماز: ج1 ص204. پیامبر اکرم صلی الله علیه و آله:. فإنَّ خِیارَکُم خِیارُکُم لِأهلِهِ. براستى بهترین شما کسى است که براى خانواده اش بهتر باشد . بحار الأنوار :ج 5 ، ص 268 ، ح79 / میزان الحکمة : ج 5 ، ص 101.",
|
224 |
+
"decoded_text": "امام باقر علیه السلام:. أقرَبُ ما یَکونُ العَبدُ مِنَ اللَّهِ إذا کانَ فِی الصَّلاةِ؛. نزدیکترین حالت بنده به خدا، هنگامى است که در نماز است. دعائم الإسلام: ج ۱ ص ۱۳۴/ شناختنامه نماز: ج1 ص204. پیامبر اکرم صلی الله علیه و آله:. فإنَّ خِیارَکُم خِیارُکُم لِأهلِهِ. براستى بهترین شما کسى است که براى خانواده اش بهتر باشد . بحار الأنوار :ج 5 ، ص 268 ، ح79 / میزان الحکمة : ج 5 ، ص 101.",
|
225 |
+
"diff": [
|
226 |
+
"insert text[57:57] --> decoded_text[57:58] '' --> 'َ'",
|
227 |
+
"delete text[58:59] --> decoded_text[59:59] 'َ' --> ''",
|
228 |
+
"insert text[78:78] --> decoded_text[78:79] '' --> 'َ'",
|
229 |
+
"delete text[79:80] --> decoded_text[80:80] 'َ' --> ''",
|
230 |
+
"insert text[232:232] --> decoded_text[232:233] '' --> 'َ'",
|
231 |
+
"delete text[233:234] --> decoded_text[234:234] 'َ' --> ''"
|
232 |
+
],
|
233 |
+
"n_oov_chars": 0,
|
234 |
+
"oov_ratio": 0.0,
|
235 |
+
"oov_charset": "[]"
|
236 |
+
},
|
237 |
+
{
|
238 |
+
"text": "قالیباف افزود: یا حدیث معروفِ «مَن اَصبَحَ وَ لَم یَهتَمَّ بِاُمورِ المُسلِمینَ فَلَیسَ بِمُسلِم» و نمونه های مشابه آن، جزو بیّنات اسلام است؛ یعنى اسلام انسان را اینجور خواسته است که مسئول باشد؛ هم نسبت به خود، هم نسبت به نزدیکان خود، هم نسبت به جامعهى خود، هم نسبت به بشریّت.",
|
239 |
+
"decoded_text": "قالیباف افزود: یا حدیث معروفِ «مَن اَصبَحَ وَ لَم یَهتَمَّ بِاُمورِ المُسلِمینَ فَلَیسَ بِمُسلِم» و نمونه های مشابه آن، جزو بیّنات اسلام است؛ یعنى اسلام انسان را اینجور خواسته است که مسئول باشد؛ هم نسبت به خود، هم نسبت به نزدیکان خود، هم نسبت به جامعهى خود، هم نسبت به بشریّت.",
|
240 |
+
"diff": [
|
241 |
+
"insert text[56:56] --> decoded_text[56:57] '' --> 'َ'",
|
242 |
+
"delete text[57:58] --> decoded_text[58:58] 'َ' --> ''"
|
243 |
+
],
|
244 |
+
"n_oov_chars": 0,
|
245 |
+
"oov_ratio": 0.0,
|
246 |
+
"oov_charset": "[]"
|
247 |
+
}
|
248 |
+
]
|
stats/compression_rate/Qwen.Qwen3-4B-Instruct-2507 @ cc100.fr.diff.json
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "Une Agence conseil en Communication et Coopération Décentralisée.",
|
4 |
+
"decoded_text": "Une Agence conseil en Communication et Coopération Décentralisée.",
|
5 |
+
"diff": [
|
6 |
+
"replace text[43:45] --> decoded_text[43:44] 'é' --> 'é'",
|
7 |
+
"replace text[53:55] --> decoded_text[52:53] 'é' --> 'é'",
|
8 |
+
"replace text[64:66] --> decoded_text[62:63] 'é' --> 'é'"
|
9 |
+
],
|
10 |
+
"n_oov_chars": 0,
|
11 |
+
"oov_ratio": 0.0,
|
12 |
+
"oov_charset": "[]"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"text": "Une équipe de consultants expérimentés pour accompagner, conseiller tout type de coopération économique, sociale, culturelle et durable, en lien avec l’Afrique.",
|
16 |
+
"decoded_text": "Une équipe de consultants expérimentés pour accompagner, conseiller tout type de coopération économique, sociale, culturelle et durable, en lien avec l’Afrique.",
|
17 |
+
"diff": [
|
18 |
+
"replace text[4:6] --> decoded_text[4:5] 'é' --> 'é'",
|
19 |
+
"replace text[30:32] --> decoded_text[29:30] 'é' --> 'é'",
|
20 |
+
"replace text[38:40] --> decoded_text[36:37] 'é' --> 'é'",
|
21 |
+
"replace text[88:90] --> decoded_text[85:86] 'é' --> 'é'",
|
22 |
+
"replace text[97:99] --> decoded_text[93:94] 'é' --> 'é'"
|
23 |
+
],
|
24 |
+
"n_oov_chars": 0,
|
25 |
+
"oov_ratio": 0.0,
|
26 |
+
"oov_charset": "[]"
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"text": "Des compétences confirmées dans les domaines des médias et de la communication publique.",
|
30 |
+
"decoded_text": "Des compétences confirmées dans les domaines des médias et de la communication publique.",
|
31 |
+
"diff": [
|
32 |
+
"replace text[8:10] --> decoded_text[8:9] 'é' --> 'é'",
|
33 |
+
"replace text[24:26] --> decoded_text[23:24] 'é' --> 'é'",
|
34 |
+
"replace text[52:54] --> decoded_text[50:51] 'é' --> 'é'"
|
35 |
+
],
|
36 |
+
"n_oov_chars": 0,
|
37 |
+
"oov_ratio": 0.0,
|
38 |
+
"oov_charset": "[]"
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"text": "Une bonne connaissance du réseau des décideurs publics et privés, en France et ailleurs dans le monde, principalement pour l’Afrique.",
|
42 |
+
"decoded_text": "Une bonne connaissance du réseau des décideurs publics et privés, en France et ailleurs dans le monde, principalement pour l’Afrique.",
|
43 |
+
"diff": [
|
44 |
+
"replace text[27:29] --> decoded_text[27:28] 'é' --> 'é'",
|
45 |
+
"replace text[39:41] --> decoded_text[38:39] 'é' --> 'é'",
|
46 |
+
"replace text[64:66] --> decoded_text[62:63] 'é' --> 'é'"
|
47 |
+
],
|
48 |
+
"n_oov_chars": 0,
|
49 |
+
"oov_ratio": 0.0,
|
50 |
+
"oov_charset": "[]"
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"text": "La tribune internationale pour parler des atouts de la Côte d’Ivoire et lui offrir les meilleures conditions des approches nouvelles des politiques de coopération française et européenne.",
|
54 |
+
"decoded_text": "La tribune internationale pour parler des atouts de la Côte d’Ivoire et lui offrir les meilleures conditions des approches nouvelles des politiques de coopération française et européenne.",
|
55 |
+
"diff": [
|
56 |
+
"replace text[56:58] --> decoded_text[56:57] 'ô' --> 'ô'",
|
57 |
+
"replace text[156:158] --> decoded_text[155:156] 'é' --> 'é'",
|
58 |
+
"replace text[169:171] --> decoded_text[167:168] 'ç' --> 'ç'",
|
59 |
+
"replace text[184:186] --> decoded_text[181:182] 'é' --> 'é'"
|
60 |
+
],
|
61 |
+
"n_oov_chars": 0,
|
62 |
+
"oov_ratio": 0.0,
|
63 |
+
"oov_charset": "[]"
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"text": "La rencontre entre les pouvoirs publics et décideurs français, les autorités ivoiriennes, les hommes d'affaires, entrepreneurs et investisseurs, venant de tous horizons.",
|
67 |
+
"decoded_text": "La rencontre entre les pouvoirs publics et décideurs français, les autorités ivoiriennes, les hommes d'affaires, entrepreneurs et investisseurs, venant de tous horizons.",
|
68 |
+
"diff": [
|
69 |
+
"replace text[44:46] --> decoded_text[44:45] 'é' --> 'é'",
|
70 |
+
"replace text[58:60] --> decoded_text[57:58] 'ç' --> 'ç'",
|
71 |
+
"replace text[76:78] --> decoded_text[74:75] 'é' --> 'é'"
|
72 |
+
],
|
73 |
+
"n_oov_chars": 0,
|
74 |
+
"oov_ratio": 0.0,
|
75 |
+
"oov_charset": "[]"
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"text": "Parler des opportunités de la relation privilégiée de la coopération française et européenne avec la Côte d’Ivoire.",
|
79 |
+
"decoded_text": "Parler des opportunités de la relation privilégiée de la coopération française et européenne avec la Côte d’Ivoire.",
|
80 |
+
"diff": [
|
81 |
+
"replace text[21:23] --> decoded_text[21:22] 'é' --> 'é'",
|
82 |
+
"replace text[46:48] --> decoded_text[45:46] 'é' --> 'é'",
|
83 |
+
"replace text[50:52] --> decoded_text[48:49] 'é' --> 'é'",
|
84 |
+
"replace text[64:66] --> decoded_text[61:62] 'é' --> 'é'",
|
85 |
+
"replace text[77:79] --> decoded_text[73:74] 'ç' --> 'ç'",
|
86 |
+
"replace text[92:94] --> decoded_text[87:88] 'é' --> 'é'",
|
87 |
+
"replace text[108:110] --> decoded_text[102:103] 'ô' --> 'ô'"
|
88 |
+
],
|
89 |
+
"n_oov_chars": 0,
|
90 |
+
"oov_ratio": 0.0,
|
91 |
+
"oov_charset": "[]"
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"text": "Faciliter les discussions entre les acteurs de la coopération décentralisée.",
|
95 |
+
"decoded_text": "Faciliter les discussions entre les acteurs de la coopération décentralisée.",
|
96 |
+
"diff": [
|
97 |
+
"replace text[54:56] --> decoded_text[54:55] 'é' --> 'é'",
|
98 |
+
"replace text[64:66] --> decoded_text[63:64] 'é' --> 'é'",
|
99 |
+
"replace text[75:77] --> decoded_text[73:74] 'é' --> 'é'"
|
100 |
+
],
|
101 |
+
"n_oov_chars": 0,
|
102 |
+
"oov_ratio": 0.0,
|
103 |
+
"oov_charset": "[]"
|
104 |
+
}
|
105 |
+
]
|
stats/compression_rate/Qwen.Qwen3-4B-Instruct-2507 @ cc100.ja.diff.json
ADDED
@@ -0,0 +1,1046 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "点を取れる部分をメインに勉強を行いました。とても勉強になったので、チャレンジして良かったです。仕事で活かせるように今後も勉強を続けて行きたいです。",
|
4 |
+
"decoded_text": "点を取れる部分をメインに勉強を行いました。とても勉強になったので、チャレンジして良かったです。仕事で活かせるように今後も勉強を続けて行きたいです。",
|
5 |
+
"diff": [
|
6 |
+
"replace text[31:33] --> decoded_text[31:32] 'で' --> 'で'",
|
7 |
+
"replace text[38:40] --> decoded_text[37:38] 'ジ' --> 'ジ'",
|
8 |
+
"replace text[46:48] --> decoded_text[44:45] 'で' --> 'で'",
|
9 |
+
"replace text[52:54] --> decoded_text[49:50] 'で' --> 'で'",
|
10 |
+
"replace text[74:76] --> decoded_text[70:71] 'で' --> 'で'"
|
11 |
+
],
|
12 |
+
"n_oov_chars": 0,
|
13 |
+
"oov_ratio": 0.0,
|
14 |
+
"oov_charset": "[]"
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"text": "ドローンは今後、土木・農業など様々な分野での利用が見込まれるので、ぜひチャレンジしてみてください。",
|
18 |
+
"decoded_text": "ドローンは今後、土木・農業など様々な分野での利用が見込まれるので、ぜひチャレンジしてみてください。",
|
19 |
+
"diff": [
|
20 |
+
"replace text[0:2] --> decoded_text[0:1] 'ド' --> 'ド'",
|
21 |
+
"replace text[15:17] --> decoded_text[14:15] 'ど' --> 'ど'",
|
22 |
+
"replace text[22:24] --> decoded_text[20:21] 'で' --> 'で'",
|
23 |
+
"replace text[27:29] --> decoded_text[24:25] 'が' --> 'が'",
|
24 |
+
"replace text[35:37] --> decoded_text[31:32] 'で' --> 'で'",
|
25 |
+
"replace text[38:40] --> decoded_text[33:34] 'ぜ' --> 'ぜ'",
|
26 |
+
"replace text[45:47] --> decoded_text[39:40] 'ジ' --> 'ジ'",
|
27 |
+
"replace text[52:54] --> decoded_text[45:46] 'だ' --> 'だ'"
|
28 |
+
],
|
29 |
+
"n_oov_chars": 0,
|
30 |
+
"oov_ratio": 0.0,
|
31 |
+
"oov_charset": "[]"
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"text": "覚える範囲が多かったので、要点を絞って取り組みました。合格して良かったです。内定先で今後使う話が出てきたら率先して取り組んで行きたいです。",
|
35 |
+
"decoded_text": "覚える範囲が多かったので、要点を絞って取り組みました。合格して良かったです。内定先で今後使う話が出てきたら率先して取り組んで行きたいです。",
|
36 |
+
"diff": [
|
37 |
+
"replace text[5:7] --> decoded_text[5:6] 'が' --> 'が'",
|
38 |
+
"replace text[12:14] --> decoded_text[11:12] 'で' --> 'で'",
|
39 |
+
"replace text[37:39] --> decoded_text[35:36] 'で' --> 'で'",
|
40 |
+
"replace text[44:46] --> decoded_text[41:42] 'で' --> 'で'",
|
41 |
+
"replace text[51:53] --> decoded_text[47:48] 'が' --> 'が'",
|
42 |
+
"replace text[66:68] --> decoded_text[61:62] 'で' --> 'で'",
|
43 |
+
"replace text[72:74] --> decoded_text[66:67] 'で' --> 'で'"
|
44 |
+
],
|
45 |
+
"n_oov_chars": 0,
|
46 |
+
"oov_ratio": 0.0,
|
47 |
+
"oov_charset": "[]"
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"text": "先生が全面的にサポートして下さるので安心して勉強できます。分からない時は先生になんでも質問してください。",
|
51 |
+
"decoded_text": "先生が全面的にサポートして下さるので安心して勉強できます。分からない時は先生になんでも質問してください。",
|
52 |
+
"diff": [
|
53 |
+
"replace text[2:4] --> decoded_text[2:3] 'が' --> 'が'",
|
54 |
+
"replace text[9:11] --> decoded_text[8:9] 'ポ' --> 'ポ'",
|
55 |
+
"replace text[19:21] --> decoded_text[17:18] 'で' --> 'で'",
|
56 |
+
"replace text[27:29] --> decoded_text[24:25] 'で' --> 'で'",
|
57 |
+
"replace text[45:47] --> decoded_text[41:42] 'で' --> 'で'",
|
58 |
+
"replace text[53:55] --> decoded_text[48:49] 'だ' --> 'だ'"
|
59 |
+
],
|
60 |
+
"n_oov_chars": 0,
|
61 |
+
"oov_ratio": 0.0,
|
62 |
+
"oov_charset": "[]"
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"text": "過去問題をたくさん解くことで問題の傾向を分析しました。しっかり勉強して臨んだので、合格する自信がありました。この資格を活かし、工事測量などで、正確な値を出すことで適切な指示を出せるようにしていきたいです。",
|
66 |
+
"decoded_text": "過去問題をたくさん解くことで問題の傾向を分析しました。しっかり勉強して臨んだので、合格する自信がありました。この資格を活かし、工事測量などで、正確な値を出すことで適切な指示を出せるようにしていきたいです。",
|
67 |
+
"diff": [
|
68 |
+
"replace text[13:15] --> decoded_text[13:14] 'で' --> 'で'",
|
69 |
+
"replace text[38:40] --> decoded_text[37:38] 'だ' --> 'だ'",
|
70 |
+
"replace text[41:43] --> decoded_text[39:40] 'で' --> 'で'",
|
71 |
+
"replace text[50:52] --> decoded_text[47:48] 'が' --> 'が'",
|
72 |
+
"replace text[72:76] --> decoded_text[68:70] 'どで' --> 'どで'",
|
73 |
+
"replace text[86:88] --> decoded_text[80:81] 'で' --> 'で'",
|
74 |
+
"replace text[106:108] --> decoded_text[99:100] 'で' --> 'で'"
|
75 |
+
],
|
76 |
+
"n_oov_chars": 0,
|
77 |
+
"oov_ratio": 0.0,
|
78 |
+
"oov_charset": "[]"
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"text": "しっかり勉強したら必ず自分のためになりますよ。将来の夢を持ち、それを実現できるように今できる事を頑張ってください。",
|
82 |
+
"decoded_text": "しっかり勉強したら必ず自分のためになりますよ。将来の夢を持ち、それを実現できるように今できる事を頑張ってください。",
|
83 |
+
"diff": [
|
84 |
+
"replace text[10:12] --> decoded_text[10:11] 'ず' --> 'ず'",
|
85 |
+
"replace text[37:39] --> decoded_text[36:37] 'で' --> 'で'",
|
86 |
+
"replace text[45:47] --> decoded_text[43:44] 'で' --> 'で'",
|
87 |
+
"replace text[56:58] --> decoded_text[53:54] 'だ' --> 'だ'"
|
88 |
+
],
|
89 |
+
"n_oov_chars": 0,
|
90 |
+
"oov_ratio": 0.0,
|
91 |
+
"oov_charset": "[]"
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"text": "まずは、自分が何をしたいのか考え、そしてそこから、自分の目標に向かって資格取得を目指してください。せっかくの取得チャンスを無駄にしないでください。",
|
95 |
+
"decoded_text": "まずは、自分が何をしたいのか考え、そしてそこから、自分の目標に向かって資格取得を目指してください。せっかくの取得チャンスを無駄にしないでください。",
|
96 |
+
"diff": [
|
97 |
+
"replace text[1:3] --> decoded_text[1:2] 'ず' --> 'ず'",
|
98 |
+
"replace text[7:9] --> decoded_text[6:7] 'が' --> 'が'",
|
99 |
+
"replace text[47:49] --> decoded_text[45:46] 'だ' --> 'だ'",
|
100 |
+
"replace text[70:72] --> decoded_text[67:68] 'で' --> 'で'",
|
101 |
+
"replace text[73:75] --> decoded_text[69:70] 'だ' --> 'だ'"
|
102 |
+
],
|
103 |
+
"n_oov_chars": 0,
|
104 |
+
"oov_ratio": 0.0,
|
105 |
+
"oov_charset": "[]"
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"text": "過去問題を解き、答え合わせをした後、自分がどこを間違っていたか、なぜ間違えたか等、徹底的に勉強しました。次は測量士補や、ドローン検定等の資格を目指し、頑張りたいと思います。",
|
109 |
+
"decoded_text": "過去問題を解き、答え合わせをした後、自分がどこを間違っていたか、なぜ間違えたか等、徹底的に勉強しました。次は測量士補や、ドローン検定等の資格を目指し、頑張りたいと思います。",
|
110 |
+
"diff": [
|
111 |
+
"replace text[20:24] --> decoded_text[20:22] 'がど' --> 'がど'",
|
112 |
+
"replace text[35:37] --> decoded_text[33:34] 'ぜ' --> 'ぜ'",
|
113 |
+
"replace text[63:65] --> decoded_text[60:61] 'ド' --> 'ド'"
|
114 |
+
],
|
115 |
+
"n_oov_chars": 0,
|
116 |
+
"oov_ratio": 0.0,
|
117 |
+
"oov_charset": "[]"
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"text": "細かい事でも、疑問に思った事は必ず先生に質問してください。",
|
121 |
+
"decoded_text": "細かい事でも、疑問に思った事は必ず先生に質問してください。",
|
122 |
+
"diff": [
|
123 |
+
"replace text[4:6] --> decoded_text[4:5] 'で' --> 'で'",
|
124 |
+
"replace text[17:19] --> decoded_text[16:17] 'ず' --> 'ず'",
|
125 |
+
"replace text[27:29] --> decoded_text[25:26] 'だ' --> 'だ'"
|
126 |
+
],
|
127 |
+
"n_oov_chars": 0,
|
128 |
+
"oov_ratio": 0.0,
|
129 |
+
"oov_charset": "[]"
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"text": "過去問を解き続け、頭に知識と問題の傾向を叩き込みました。合格して本当に良かったです。 試験を受けるチャンスがあり、思い切ってチャレンジして良かったと思います。",
|
133 |
+
"decoded_text": "過去問を解き続け、頭に知識と問題の傾向を叩き込みました。合格して本当に良かったです。 試験を受けるチャンスがあり、思い切ってチャレンジして良かったと思います。",
|
134 |
+
"diff": [
|
135 |
+
"replace text[39:41] --> decoded_text[39:40] 'で' --> 'で'",
|
136 |
+
"replace text[54:56] --> decoded_text[53:54] 'が' --> 'が'"
|
137 |
+
],
|
138 |
+
"n_oov_chars": 0,
|
139 |
+
"oov_ratio": 0.0,
|
140 |
+
"oov_charset": "[]"
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"text": "何事もまずは挑戦してみてください。分からない事は先生が教えてくださいます。安心して勉強に取り組んでください。",
|
144 |
+
"decoded_text": "何事もまずは挑戦してみてください。分からない事は先生が教えてくださいます。安心して勉強に取り組んでください。",
|
145 |
+
"diff": [
|
146 |
+
"replace text[4:6] --> decoded_text[4:5] 'ず' --> 'ず'",
|
147 |
+
"replace text[14:16] --> decoded_text[13:14] 'だ' --> 'だ'",
|
148 |
+
"replace text[28:30] --> decoded_text[26:27] 'が' --> 'が'",
|
149 |
+
"replace text[34:36] --> decoded_text[31:32] 'だ' --> 'だ'",
|
150 |
+
"replace text[52:54] --> decoded_text[48:49] 'で' --> 'で'",
|
151 |
+
"replace text[55:57] --> decoded_text[50:51] 'だ' --> 'だ'"
|
152 |
+
],
|
153 |
+
"n_oov_chars": 0,
|
154 |
+
"oov_ratio": 0.0,
|
155 |
+
"oov_charset": "[]"
|
156 |
+
},
|
157 |
+
{
|
158 |
+
"text": "日々の勉強を大切にし、試験当日は集中して受験しました。試験一か月前までは、ひたすら過去問を解いて自分の弱いところを集中的に勉強しました。この学校に入学して1番の目標だったので取得できて嬉しいです。",
|
159 |
+
"decoded_text": "日々の勉強を大切にし、試験当日は集中して受験しました。試験一か月前までは、ひたすら過去問を解いて自分の弱いところを集中的に勉強しました。この学校に入学して1番の目標だったので取得できて嬉しいです。",
|
160 |
+
"diff": [
|
161 |
+
"replace text[34:36] --> decoded_text[34:35] 'で' --> 'で'",
|
162 |
+
"replace text[83:85] --> decoded_text[82:83] 'だ' --> 'だ'",
|
163 |
+
"replace text[88:90] --> decoded_text[86:87] 'で' --> 'で'",
|
164 |
+
"replace text[92:94] --> decoded_text[89:90] 'で' --> 'で'",
|
165 |
+
"replace text[99:101] --> decoded_text[95:96] 'で' --> 'で'"
|
166 |
+
],
|
167 |
+
"n_oov_chars": 0,
|
168 |
+
"oov_ratio": 0.0,
|
169 |
+
"oov_charset": "[]"
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"text": "何回も過去問を解き、傾向を掴みました。合格して嬉しいです。さらに上の資格を目指し頑張りたいと思います。",
|
173 |
+
"decoded_text": "何回も過去問を解き、傾向を掴みました。合格して嬉しいです。さらに上の資格を目指し頑張りたいと思います。",
|
174 |
+
"diff": [
|
175 |
+
"replace text[26:28] --> decoded_text[26:27] 'で' --> 'で'"
|
176 |
+
],
|
177 |
+
"n_oov_chars": 0,
|
178 |
+
"oov_ratio": 0.0,
|
179 |
+
"oov_charset": "[]"
|
180 |
+
},
|
181 |
+
{
|
182 |
+
"text": "受けるからには合格するほうが良いので積極的に勉強してください。そして後悔しないように頑張ってください。",
|
183 |
+
"decoded_text": "受けるからには合格するほうが良いので積極的に勉強してください。そして後悔しないように頑張ってください。",
|
184 |
+
"diff": [
|
185 |
+
"replace text[13:15] --> decoded_text[13:14] 'が' --> 'が'",
|
186 |
+
"replace text[18:20] --> decoded_text[17:18] 'で' --> 'で'",
|
187 |
+
"replace text[29:31] --> decoded_text[27:28] 'だ' --> 'だ'",
|
188 |
+
"replace text[50:52] --> decoded_text[47:48] 'だ' --> 'だ'"
|
189 |
+
],
|
190 |
+
"n_oov_chars": 0,
|
191 |
+
"oov_ratio": 0.0,
|
192 |
+
"oov_charset": "[]"
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"text": "分野ごとに勉強を行いました。土木業界ではこの資格がないと仕事の範囲が狭くなるので、合格して嬉しいです。実地試験に合格したら次は1級を目指し、頑張ろうと思います。",
|
196 |
+
"decoded_text": "分野ごとに勉強を行いました。土木業界ではこの資格がないと仕事の範囲が狭くなるので、合格して嬉しいです。実地試験に合格したら次は1級を目指し、頑張ろうと思います。",
|
197 |
+
"diff": [
|
198 |
+
"replace text[2:4] --> decoded_text[2:3] 'ご' --> 'ご'",
|
199 |
+
"replace text[19:21] --> decoded_text[18:19] 'で' --> 'で'",
|
200 |
+
"replace text[26:28] --> decoded_text[24:25] 'が' --> 'が'",
|
201 |
+
"replace text[36:38] --> decoded_text[33:34] 'が' --> 'が'",
|
202 |
+
"replace text[43:45] --> decoded_text[39:40] 'で' --> 'で'",
|
203 |
+
"replace text[53:55] --> decoded_text[48:49] 'で' --> 'で'"
|
204 |
+
],
|
205 |
+
"n_oov_chars": 0,
|
206 |
+
"oov_ratio": 0.0,
|
207 |
+
"oov_charset": "[]"
|
208 |
+
},
|
209 |
+
{
|
210 |
+
"text": "難しい試験ですが、勉強を続けているとコツがつかめてきます。諦めずに頑張ってください。",
|
211 |
+
"decoded_text": "難しい試験ですが、勉強を続けているとコツがつかめてきます。諦めずに頑張ってください。",
|
212 |
+
"diff": [
|
213 |
+
"replace text[5:7] --> decoded_text[5:6] 'で' --> 'で'",
|
214 |
+
"replace text[8:10] --> decoded_text[7:8] 'が' --> 'が'",
|
215 |
+
"replace text[22:24] --> decoded_text[20:21] 'が' --> 'が'",
|
216 |
+
"replace text[34:36] --> decoded_text[31:32] 'ず' --> 'ず'",
|
217 |
+
"replace text[42:44] --> decoded_text[38:39] 'だ' --> 'だ'"
|
218 |
+
],
|
219 |
+
"n_oov_chars": 0,
|
220 |
+
"oov_ratio": 0.0,
|
221 |
+
"oov_charset": "[]"
|
222 |
+
},
|
223 |
+
{
|
224 |
+
"text": "過去問を解くことで対策をする事ができました。学科試験に合格したので、次は実地試験に合格して、現場を任せてもらえるようになりたいです。",
|
225 |
+
"decoded_text": "過去問を解くことで対策をする事ができました。学科試験に合格したので、次は実地試験に合格して、現場を任せてもらえるようになりたい��す。",
|
226 |
+
"diff": [
|
227 |
+
"replace text[8:10] --> decoded_text[8:9] 'で' --> 'で'",
|
228 |
+
"replace text[16:20] --> decoded_text[15:17] 'がで' --> 'がで'",
|
229 |
+
"replace text[35:37] --> decoded_text[32:33] 'で' --> 'で'",
|
230 |
+
"replace text[67:69] --> decoded_text[63:64] 'で' --> 'で'"
|
231 |
+
],
|
232 |
+
"n_oov_chars": 0,
|
233 |
+
"oov_ratio": 0.0,
|
234 |
+
"oov_charset": "[]"
|
235 |
+
},
|
236 |
+
{
|
237 |
+
"text": "取りたい資格は積極的に勉強し、自信を持って頑張ってください。わからない事は必ず先生が教えてくださいます。",
|
238 |
+
"decoded_text": "取りたい資格は積極的に勉強し、自信を持って頑張ってください。わからない事は必ず先生が教えてくださいます。",
|
239 |
+
"diff": [
|
240 |
+
"replace text[26:28] --> decoded_text[26:27] 'だ' --> 'だ'",
|
241 |
+
"replace text[39:41] --> decoded_text[38:39] 'ず' --> 'ず'",
|
242 |
+
"replace text[43:45] --> decoded_text[41:42] 'が' --> 'が'",
|
243 |
+
"replace text[49:51] --> decoded_text[46:47] 'だ' --> 'だ'"
|
244 |
+
],
|
245 |
+
"n_oov_chars": 0,
|
246 |
+
"oov_ratio": 0.0,
|
247 |
+
"oov_charset": "[]"
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"text": "放課後の勉強会を頑張りました。2級土木施工管理技術検定は、土木技術者になるための第一歩だと思います。合格して本当に嬉しいです。",
|
251 |
+
"decoded_text": "放課後の勉強会を頑張りました。2級土木施工管理技術検定は、土木技術者になるための第一歩だと思います。合格して本当に嬉しいです。",
|
252 |
+
"diff": [
|
253 |
+
"replace text[43:45] --> decoded_text[43:44] 'だ' --> 'だ'",
|
254 |
+
"replace text[61:63] --> decoded_text[60:61] 'で' --> 'で'"
|
255 |
+
],
|
256 |
+
"n_oov_chars": 0,
|
257 |
+
"oov_ratio": 0.0,
|
258 |
+
"oov_charset": "[]"
|
259 |
+
},
|
260 |
+
{
|
261 |
+
"text": "遊び、バイト、 勉強、いろいろありますが、区切りを付けて頑張ってください。",
|
262 |
+
"decoded_text": "遊び、バイト、 勉強、いろいろありますが、区切りを付けて頑張ってください。",
|
263 |
+
"diff": [
|
264 |
+
"replace text[1:3] --> decoded_text[1:2] 'び' --> 'び'",
|
265 |
+
"replace text[4:6] --> decoded_text[3:4] 'バ' --> 'バ'",
|
266 |
+
"replace text[35:37] --> decoded_text[33:34] 'だ' --> 'だ'"
|
267 |
+
],
|
268 |
+
"n_oov_chars": 0,
|
269 |
+
"oov_ratio": 0.0,
|
270 |
+
"oov_charset": "[]"
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"text": "とにかく過去問題をやり、自分の苦手な分野をできるようにして行きました。最近の土木現場では、環境への影響を考えて工事をしなければならないので、この資格を取得しようと思いました。合格して本当に良かったです。",
|
274 |
+
"decoded_text": "とにかく過去問題をやり、自分の苦手な分野をできるようにして行きました。最近の土木現場では、環境への影響を考えて工事をしなければならないので、この資格を取得しようと思いました。合格して本当に良かったです。",
|
275 |
+
"diff": [
|
276 |
+
"replace text[21:23] --> decoded_text[21:22] 'で' --> 'で'",
|
277 |
+
"replace text[43:45] --> decoded_text[42:43] 'で' --> 'で'",
|
278 |
+
"replace text[64:66] --> decoded_text[62:63] 'ば' --> 'ば'",
|
279 |
+
"replace text[71:73] --> decoded_text[68:69] 'で' --> 'で'",
|
280 |
+
"replace text[102:104] --> decoded_text[98:99] 'で' --> 'で'"
|
281 |
+
],
|
282 |
+
"n_oov_chars": 0,
|
283 |
+
"oov_ratio": 0.0,
|
284 |
+
"oov_charset": "[]"
|
285 |
+
},
|
286 |
+
{
|
287 |
+
"text": "勉強する時と、遊ぶときの切り替えをうまくやり、合格へ向けて頑張ってください。",
|
288 |
+
"decoded_text": "勉強する時と、遊ぶときの切り替えをうまくやり、合格へ向けて頑張ってください。",
|
289 |
+
"diff": [
|
290 |
+
"replace text[8:10] --> decoded_text[8:9] 'ぶ' --> 'ぶ'",
|
291 |
+
"replace text[35:37] --> decoded_text[34:35] 'だ' --> 'だ'"
|
292 |
+
],
|
293 |
+
"n_oov_chars": 0,
|
294 |
+
"oov_ratio": 0.0,
|
295 |
+
"oov_charset": "[]"
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"text": "過去問を何度もやりました。頑張ってきたことが成果に出て嬉しいです。この調子で、測量士補、2級土木施工管理技術検定の学科試験にも合格できるように頑張り たいと思います。",
|
299 |
+
"decoded_text": "過去問を何度もやりました。頑張ってきたことが成果に出て嬉しいです。この調子で、測量士補、2級土木施工管理技術検定の学科試験にも合格できるように頑張り たいと思います。",
|
300 |
+
"diff": [
|
301 |
+
"replace text[21:23] --> decoded_text[21:22] 'が' --> 'が'",
|
302 |
+
"replace text[31:33] --> decoded_text[30:31] 'で' --> 'で'",
|
303 |
+
"replace text[39:41] --> decoded_text[37:38] 'で' --> 'で'",
|
304 |
+
"replace text[68:70] --> decoded_text[65:66] 'で' --> 'で'"
|
305 |
+
],
|
306 |
+
"n_oov_chars": 0,
|
307 |
+
"oov_ratio": 0.0,
|
308 |
+
"oov_charset": "[]"
|
309 |
+
},
|
310 |
+
{
|
311 |
+
"text": "何度も何度も繰り返し過去問を解くことが合格への近道です。頑張ってください。",
|
312 |
+
"decoded_text": "何度も何度も繰り返し過去問を解くことが合格への近道です。頑張ってください。",
|
313 |
+
"diff": [
|
314 |
+
"replace text[18:20] --> decoded_text[18:19] 'が' --> 'が'",
|
315 |
+
"replace text[26:28] --> decoded_text[25:26] 'で' --> 'で'",
|
316 |
+
"replace text[35:37] --> decoded_text[33:34] 'だ' --> 'だ'"
|
317 |
+
],
|
318 |
+
"n_oov_chars": 0,
|
319 |
+
"oov_ratio": 0.0,
|
320 |
+
"oov_charset": "[]"
|
321 |
+
},
|
322 |
+
{
|
323 |
+
"text": "放課後も残って勉強しました。その中で法律と施工管理を重点的に勉強しました。さらに作文も頑張りました。来年には2級ビオトープ計画管理士も受験して合格したいです。将来に必要な資格なので、持っていて損はないと思います。合格して本当に嬉しいです。",
|
324 |
+
"decoded_text": "放課後も残って勉強しました。その中で法律と施工管理を重点的に勉強しました。さらに作文も頑張りました。来年には2級ビオトープ計画管理士も受験して合格したいです。将来に必要な資格なので、持っていて損はないと思います。合格して本当に嬉しいです。",
|
325 |
+
"diff": [
|
326 |
+
"replace text[17:19] --> decoded_text[17:18] 'で' --> 'で'",
|
327 |
+
"replace text[57:59] --> decoded_text[56:57] 'ビ' --> 'ビ'",
|
328 |
+
"replace text[62:64] --> decoded_text[60:61] 'プ' --> 'プ'",
|
329 |
+
"replace text[79:81] --> decoded_text[76:77] 'で' --> 'で'",
|
330 |
+
"replace text[93:95] --> decoded_text[89:90] 'で' --> 'で'",
|
331 |
+
"replace text[121:123] --> decoded_text[116:117] 'で' --> 'で'"
|
332 |
+
],
|
333 |
+
"n_oov_chars": 0,
|
334 |
+
"oov_ratio": 0.0,
|
335 |
+
"oov_charset": "[]"
|
336 |
+
},
|
337 |
+
{
|
338 |
+
"text": "やる気があれば合格できます。専門学校に入学したからには自分で目標を作り、積極的に挑戦して行ってください。",
|
339 |
+
"decoded_text": "やる気があれば合格できます。専門学校に入学したからには自分で目標を作り、積極的に挑戦して行ってください。",
|
340 |
+
"diff": [
|
341 |
+
"replace text[3:5] --> decoded_text[3:4] 'が' --> 'が'",
|
342 |
+
"replace text[10:12] --> decoded_text[9:10] 'で' --> 'で'",
|
343 |
+
"replace text[31:33] --> decoded_text[29:30] 'で' --> 'で'",
|
344 |
+
"replace text[51:53] --> decoded_text[48:49] 'だ' --> 'だ'"
|
345 |
+
],
|
346 |
+
"n_oov_chars": 0,
|
347 |
+
"oov_ratio": 0.0,
|
348 |
+
"oov_charset": "[]"
|
349 |
+
},
|
350 |
+
{
|
351 |
+
"text": "放課後残って勉強しました。まずはどんな形であっても問題に取り組む事が大切だと思います。電気の知識はまだまだですが、学習の方法が定まってきたので、残りの2科目(電力科目・機械科目)、さらには消防設備士試験にも合格したいです。",
|
352 |
+
"decoded_text": "放課後残って勉強しました。まずはどんな形であっても問題に取り組む事が大切だと思います。電気の知識はまだまだですが、学習の方法が定まってきたので、残りの2科目(電力科目・機械科目)、さらには消防設備士試験にも合格したいです。",
|
353 |
+
"diff": [
|
354 |
+
"replace text[14:16] --> decoded_text[14:15] 'ず' --> 'ず'",
|
355 |
+
"replace text[17:19] --> decoded_text[16:17] 'ど' --> 'ど'",
|
356 |
+
"replace text[22:24] --> decoded_text[20:21] 'で' --> 'で'",
|
357 |
+
"replace text[36:38] --> decoded_text[33:34] 'が' --> 'が'",
|
358 |
+
"replace text[40:42] --> decoded_text[36:37] 'だ' --> 'だ'",
|
359 |
+
"replace text[55:57] --> decoded_text[50:51] 'だ' --> 'だ'",
|
360 |
+
"replace text[58:62] --> decoded_text[52:54] 'だで' --> 'だで'",
|
361 |
+
"replace text[63:65] --> decoded_text[55:56] 'が' --> 'が'",
|
362 |
+
"replace text[71:73] --> decoded_text[62:63] 'が' --> 'が'",
|
363 |
+
"replace text[80:82] --> decoded_text[70:71] 'で' --> 'で'",
|
364 |
+
"replace text[119:121] --> decoded_text[108:109] 'で' --> 'で'"
|
365 |
+
],
|
366 |
+
"n_oov_chars": 0,
|
367 |
+
"oov_ratio": 0.0,
|
368 |
+
"oov_charset": "[]"
|
369 |
+
},
|
370 |
+
{
|
371 |
+
"text": "気になる資格は、早め早めに下調べしておくことが大切だと思います。問題を知った上で授業を受けると、理解が、より深められます。",
|
372 |
+
"decoded_text": "気になる資格は、早め早めに下調べしておくことが大切だと思います。問題を知った上で授業を受けると、理解が、より深められます。",
|
373 |
+
"diff": [
|
374 |
+
"replace text[15:17] --> decoded_text[15:16] 'べ' --> 'べ'",
|
375 |
+
"replace text[23:25] --> decoded_text[22:23] 'が' --> 'が'",
|
376 |
+
"replace text[27:29] --> decoded_text[25:26] 'だ' --> 'だ'",
|
377 |
+
"replace text[53:55] --> decoded_text[50:51] 'が' --> 'が'"
|
378 |
+
],
|
379 |
+
"n_oov_chars": 0,
|
380 |
+
"oov_ratio": 0.0,
|
381 |
+
"oov_charset": "[]"
|
382 |
+
},
|
383 |
+
{
|
384 |
+
"text": "問題集を1冊を決めて、とことんやりこみました。何回かやっても理解できない時は、理解できる所から取り組み、自分の得意な分野で確実に点を取るよう勉強しました。残りの3科目を取得できるように、さらに努力しようと思います。",
|
385 |
+
"decoded_text": "問題集を1冊を決めて、とことんやりこみました。何回かやっても理解できない時は、理解できる所から取り組み、自分の得意な分野で確実に点を取るよう勉強しました。残りの3科目を取得できるように、さらに努力しようと思います。",
|
386 |
+
"diff": [
|
387 |
+
"replace text[32:34] --> decoded_text[32:33] 'で' --> 'で'",
|
388 |
+
"replace text[61:63] --> decoded_text[60:61] 'で' --> 'で'",
|
389 |
+
"replace text[88:90] --> decoded_text[86:87] 'で' --> 'で'"
|
390 |
+
],
|
391 |
+
"n_oov_chars": 0,
|
392 |
+
"oov_ratio": 0.0,
|
393 |
+
"oov_charset": "[]"
|
394 |
+
},
|
395 |
+
{
|
396 |
+
"text": "各科目ともに完璧を目指すのではなく、少しでも自分の得意な所を見つけて得点できるようにするのが重要だと思います。",
|
397 |
+
"decoded_text": "各科目ともに完璧を目指すのではなく、少しでも自分の得意な所を見つけて得点できるようにするのが重要だと思います。",
|
398 |
+
"diff": [
|
399 |
+
"replace text[13:15] --> decoded_text[13:14] 'で' --> 'で'",
|
400 |
+
"replace text[21:23] --> decoded_text[20:21] 'で' --> 'で'",
|
401 |
+
"replace text[38:40] --> decoded_text[36:37] 'で' --> 'で'",
|
402 |
+
"replace text[48:50] --> decoded_text[45:46] 'が' --> 'が'",
|
403 |
+
"replace text[52:54] --> decoded_text[48:49] 'だ' --> 'だ'"
|
404 |
+
],
|
405 |
+
"n_oov_chars": 0,
|
406 |
+
"oov_ratio": 0.0,
|
407 |
+
"oov_charset": "[]"
|
408 |
+
},
|
409 |
+
{
|
410 |
+
"text": "電験対策講座を受講し、過去問を何回も繰り返し解きました。また日頃の授業をしっかり聞き自分がわかるように、ノートにまとめました。合格はしましたが、自分では実力はまだついてないと思います。ほかの3科目も取得できるように頑張りたいと思います。",
|
411 |
+
"decoded_text": "電験対策講座を受講し、過去問を何回も繰り返し解きました。また日頃の授業をしっかり聞き自分がわかるように、ノートにまとめました。合格はしましたが、自分では実力はまだついてないと思います。ほかの3科目も取得できるように頑張りたいと思います。",
|
412 |
+
"diff": [
|
413 |
+
"replace text[44:46] --> decoded_text[44:45] 'が' --> 'が'",
|
414 |
+
"replace text[71:73] --> decoded_text[70:71] 'が' --> 'が'",
|
415 |
+
"replace text[76:78] --> decoded_text[74:75] 'で' --> 'で'",
|
416 |
+
"replace text[83:85] --> decoded_text[80:81] 'だ' --> 'だ'",
|
417 |
+
"replace text[105:107] --> decoded_text[101:102] 'で' --> 'で'"
|
418 |
+
],
|
419 |
+
"n_oov_chars": 0,
|
420 |
+
"oov_ratio": 0.0,
|
421 |
+
"oov_charset": "[]"
|
422 |
+
},
|
423 |
+
{
|
424 |
+
"text": "普通科出身でも、毎日の授業を真剣に聞いていれば、自分に自信がつくので頑張ってください。",
|
425 |
+
"decoded_text": "普通科出身でも、毎日の授業を真剣に聞いていれば、自分に自信がつくので頑張ってください。",
|
426 |
+
"diff": [
|
427 |
+
"replace text[5:7] --> decoded_text[5:6] 'で' --> 'で'",
|
428 |
+
"replace text[23:25] --> decoded_text[22:23] 'ば' --> 'ば'",
|
429 |
+
"replace text[31:33] --> decoded_text[29:30] 'が' --> 'が'",
|
430 |
+
"replace text[36:38] --> decoded_text[33:34] 'で' --> 'で'",
|
431 |
+
"replace text[43:45] --> decoded_text[39:40] 'だ' --> 'だ'"
|
432 |
+
],
|
433 |
+
"n_oov_chars": 0,
|
434 |
+
"oov_ratio": 0.0,
|
435 |
+
"oov_charset": "[]"
|
436 |
+
},
|
437 |
+
{
|
438 |
+
"text": "筆記試験対策では理解できるまで、過去問を繰り返し解きました。実技試験対策では、ミスをしないように、工夫した練習をしました。合格をいただいて、高圧の電気工事に従事する事ができるので嬉しかったです。",
|
439 |
+
"decoded_text": "筆記試験対策では理解できるまで、過去問を繰り返し解きました。実技試験対策では、ミスをしないように、工夫した練習をしました。合格をいただいて、高圧の電気工事に従事する事ができるので嬉しかったです。",
|
440 |
+
"diff": [
|
441 |
+
"replace text[6:8] --> decoded_text[6:7] 'で' --> 'で'",
|
442 |
+
"replace text[11:13] --> decoded_text[10:11] 'で' --> 'で'",
|
443 |
+
"replace text[16:18] --> decoded_text[14:15] 'で' --> 'で'",
|
444 |
+
"replace text[39:41] --> decoded_text[36:37] 'で' --> 'で'",
|
445 |
+
"replace text[70:72] --> decoded_text[66:67] 'だ' --> 'だ'",
|
446 |
+
"replace text[88:92] --> decoded_text[83:85] 'がで' --> 'がで'",
|
447 |
+
"replace text[95:97] --> decoded_text[88:89] 'で' --> 'で'",
|
448 |
+
"replace text[102:104] --> decoded_text[94:95] 'で' --> 'で'"
|
449 |
+
],
|
450 |
+
"n_oov_chars": 0,
|
451 |
+
"oov_ratio": 0.0,
|
452 |
+
"oov_charset": "[]"
|
453 |
+
},
|
454 |
+
{
|
455 |
+
"text": "入学してから受ける国家試験の中で少し難しいですが、頑張れば取れる資格なので、最後まで努力してみてください。",
|
456 |
+
"decoded_text": "入学してから受ける国家試験の中で少し難しいですが、頑張れば取れる資格なので、最後まで努力してみてください。",
|
457 |
+
"diff": [
|
458 |
+
"replace text[15:17] --> decoded_text[15:16] 'で' --> 'で'",
|
459 |
+
"replace text[22:24] --> decoded_text[21:22] 'で' --> 'で'",
|
460 |
+
"replace text[25:27] --> decoded_text[23:24] 'が' --> 'が'",
|
461 |
+
"replace text[31:33] --> decoded_text[28:29] 'ば' --> 'ば'",
|
462 |
+
"replace text[40:42] --> decoded_text[36:37] 'で' --> 'で'",
|
463 |
+
"replace text[46:48] --> decoded_text[41:42] 'で' --> 'で'",
|
464 |
+
"replace text[55:57] --> decoded_text[49:50] 'だ' --> 'だ'"
|
465 |
+
],
|
466 |
+
"n_oov_chars": 0,
|
467 |
+
"oov_ratio": 0.0,
|
468 |
+
"oov_charset": "[]"
|
469 |
+
},
|
470 |
+
{
|
471 |
+
"text": "先生が丁寧に教えてくださるので日々の授業を大切にし、分からない所はその日に先生に聞きに行きました。就職に有利になるように取得した資格を就職活動のため、また社会にでて仕事に活かしていきたいです。",
|
472 |
+
"decoded_text": "先生が丁寧に教えてくださるので日々の授業を大切にし、分からない所はその日に先生に聞きに行きました。就職に有利になるように取得した資格を就職活動のため、また社会にでて仕事に活かしていきたいです。",
|
473 |
+
"diff": [
|
474 |
+
"replace text[2:4] --> decoded_text[2:3] 'が' --> 'が'",
|
475 |
+
"replace text[11:13] --> decoded_text[10:11] 'だ' --> 'だ'",
|
476 |
+
"replace text[16:18] --> decoded_text[14:15] 'で' --> 'で'",
|
477 |
+
"replace text[83:85] --> decoded_text[80:81] 'で' --> 'で'",
|
478 |
+
"replace text[97:99] --> decoded_text[93:94] 'で' --> 'で'"
|
479 |
+
],
|
480 |
+
"n_oov_chars": 0,
|
481 |
+
"oov_ratio": 0.0,
|
482 |
+
"oov_charset": "[]"
|
483 |
+
},
|
484 |
+
{
|
485 |
+
"text": "入学した頃は電気に関して分からなかった私でも取得することができました。皆さんも自信を持って頑張ってください。",
|
486 |
+
"decoded_text": "入学した頃は電気に関して分からなかった私でも取得することができました。皆さんも自信を持って頑張ってください。",
|
487 |
+
"diff": [
|
488 |
+
"replace text[20:22] --> decoded_text[20:21] 'で' --> 'で'",
|
489 |
+
"replace text[29:33] --> decoded_text[28:30] 'がで' --> 'がで'",
|
490 |
+
"replace text[53:55] --> decoded_text[50:51] 'だ' --> 'だ'"
|
491 |
+
],
|
492 |
+
"n_oov_chars": 0,
|
493 |
+
"oov_ratio": 0.0,
|
494 |
+
"oov_charset": "[]"
|
495 |
+
},
|
496 |
+
{
|
497 |
+
"text": "とにかく過去問を何回も解き、疑問があれば先生に質問して分かるまで毎日の勉強を頑張りました。合格できて本当に良かったです。今後は第三種電気主任技術者の資格取得に向けて頑張ります。",
|
498 |
+
"decoded_text": "とにかく過去問を何回も解き、疑問があれば先生に質問して分かるまで毎日の勉強を頑張りました。合格できて本当に良かったです。今後は第三種電気主任技術者の資格取得に向けて頑張ります。",
|
499 |
+
"diff": [
|
500 |
+
"replace text[16:18] --> decoded_text[16:17] 'が' --> 'が'",
|
501 |
+
"replace text[32:34] --> decoded_text[31:32] 'で' --> 'で'",
|
502 |
+
"replace text[49:51] --> decoded_text[47:48] 'で' --> 'で'",
|
503 |
+
"replace text[60:62] --> decoded_text[57:58] 'で' --> 'で'"
|
504 |
+
],
|
505 |
+
"n_oov_chars": 0,
|
506 |
+
"oov_ratio": 0.0,
|
507 |
+
"oov_charset": "[]"
|
508 |
+
},
|
509 |
+
{
|
510 |
+
"text": "第一種電気工事士の本の要点をノートにおさえて復習しました。実技では配線図が分からず先生に教えていただき、早い段階で理解 きたのも合格できた要因ではないかと思います。合格して本当に嬉しかったです。これから就職して仕事に活かしていきたいと思います。",
|
511 |
+
"decoded_text": "第一種電気工事士の本の要点をノートにおさえて復習しました。実技では配線図が分からず先生に教えていただき、早い段階で理解 きたのも合格できた要因ではないかと思います。合格して本当に嬉しかったです。これから就職��て仕事に活かしていきたいと思います。",
|
512 |
+
"diff": [
|
513 |
+
"replace text[31:33] --> decoded_text[31:32] 'で' --> 'で'",
|
514 |
+
"replace text[37:39] --> decoded_text[36:37] 'が' --> 'が'",
|
515 |
+
"replace text[42:44] --> decoded_text[40:41] 'ず' --> 'ず'",
|
516 |
+
"replace text[52:54] --> decoded_text[49:50] 'だ' --> 'だ'",
|
517 |
+
"replace text[60:62] --> decoded_text[56:57] 'で' --> 'で'",
|
518 |
+
"replace text[71:73] --> decoded_text[66:67] 'で' --> 'で'",
|
519 |
+
"replace text[77:79] --> decoded_text[71:72] 'で' --> 'で'",
|
520 |
+
"replace text[101:103] --> decoded_text[94:95] 'で' --> 'で'"
|
521 |
+
],
|
522 |
+
"n_oov_chars": 0,
|
523 |
+
"oov_ratio": 0.0,
|
524 |
+
"oov_charset": "[]"
|
525 |
+
},
|
526 |
+
{
|
527 |
+
"text": "分からない場合は先生に聞けば、すぐ教えて下さるので早めに聞いたら良いと思います。",
|
528 |
+
"decoded_text": "分からない場合は先生に聞けば、すぐ教えて下さるので早めに聞いたら良いと思います。",
|
529 |
+
"diff": [
|
530 |
+
"replace text[13:15] --> decoded_text[13:14] 'ば' --> 'ば'",
|
531 |
+
"replace text[17:19] --> decoded_text[16:17] 'ぐ' --> 'ぐ'",
|
532 |
+
"replace text[26:28] --> decoded_text[24:25] 'で' --> 'で'"
|
533 |
+
],
|
534 |
+
"n_oov_chars": 0,
|
535 |
+
"oov_ratio": 0.0,
|
536 |
+
"oov_charset": "[]"
|
537 |
+
},
|
538 |
+
{
|
539 |
+
"text": "夏休みも学校へ行き、先生に教えていただきました。試験が終わってからは合格しているようにと祈っていました。合格と聞いて、とても嬉しかったです。これからは取得して終わりではなく、取得した資格をさらに活かせれるように勉強していきたいと思います。",
|
540 |
+
"decoded_text": "夏休みも学校へ行き、先生に教えていただきました。試験が終わってからは合格しているようにと祈っていました。合格と聞いて、とても嬉しかったです。これからは取得して終わりではなく、取得した資格をさらに活かせれるように勉強していきたいと思います。",
|
541 |
+
"diff": [
|
542 |
+
"replace text[18:20] --> decoded_text[18:19] 'だ' --> 'だ'",
|
543 |
+
"replace text[27:29] --> decoded_text[26:27] 'が' --> 'が'",
|
544 |
+
"replace text[69:71] --> decoded_text[67:68] 'で' --> 'で'",
|
545 |
+
"replace text[85:87] --> decoded_text[82:83] 'で' --> 'で'"
|
546 |
+
],
|
547 |
+
"n_oov_chars": 0,
|
548 |
+
"oov_ratio": 0.0,
|
549 |
+
"oov_charset": "[]"
|
550 |
+
},
|
551 |
+
{
|
552 |
+
"text": "自分の苦手な問題を1つでも無くし、どの問題が出題されても対応できるように勉強すれば大丈夫です!",
|
553 |
+
"decoded_text": "自分の苦手な問題を1つでも無くし、どの問題が出題されても対応できるように勉強すれば大丈夫です!",
|
554 |
+
"diff": [
|
555 |
+
"replace text[11:13] --> decoded_text[11:12] 'で' --> 'で'",
|
556 |
+
"replace text[18:20] --> decoded_text[17:18] 'ど' --> 'ど'",
|
557 |
+
"replace text[23:25] --> decoded_text[21:22] 'が' --> 'が'",
|
558 |
+
"replace text[33:35] --> decoded_text[30:31] 'で' --> 'で'",
|
559 |
+
"replace text[44:46] --> decoded_text[40:41] 'ば' --> 'ば'",
|
560 |
+
"replace text[49:51] --> decoded_text[44:45] 'で' --> 'で'"
|
561 |
+
],
|
562 |
+
"n_oov_chars": 0,
|
563 |
+
"oov_ratio": 0.0,
|
564 |
+
"oov_charset": "[]"
|
565 |
+
},
|
566 |
+
{
|
567 |
+
"text": "私は計算が苦手なので、マンツーマンで先生に教えていただき、頑張りました。資格取得を目標に専門学校に入学したので合格通知が届いたときは本当に嬉しかったです。さらに次の資格取得に向け頑張りたいと思います。",
|
568 |
+
"decoded_text": "私は計算が苦手なので、マンツーマンで先生に教えていただき、頑張りました。資格取得を目標に専門学校に入学したので合格通知が届いたときは本当に嬉しかったです。さらに次の資格取得に向け頑張りたいと思います。",
|
569 |
+
"diff": [
|
570 |
+
"replace text[4:6] --> decoded_text[4:5] 'が' --> 'が'",
|
571 |
+
"replace text[10:12] --> decoded_text[9:10] 'で' --> 'で'",
|
572 |
+
"replace text[19:21] --> decoded_text[17:18] 'で' --> 'で'",
|
573 |
+
"replace text[29:31] --> decoded_text[26:27] 'だ' --> 'だ'",
|
574 |
+
"replace text[58:60] --> decoded_text[54:55] 'で' --> 'で'",
|
575 |
+
"replace text[64:66] --> decoded_text[59:60] 'が' --> 'が'",
|
576 |
+
"replace text[80:82] --> decoded_text[74:75] 'で' --> 'で'"
|
577 |
+
],
|
578 |
+
"n_oov_chars": 0,
|
579 |
+
"oov_ratio": 0.0,
|
580 |
+
"oov_charset": "[]"
|
581 |
+
},
|
582 |
+
{
|
583 |
+
"text": "得意分野より苦手分野を克服することで合格に近づけると思います。最後まで諦めずに頑張ってください。",
|
584 |
+
"decoded_text": "得意分野より苦手分野を克服することで合格に近づけると���います。最後まで諦めずに頑張ってください。",
|
585 |
+
"diff": [
|
586 |
+
"replace text[17:19] --> decoded_text[17:18] 'で' --> 'で'",
|
587 |
+
"replace text[23:25] --> decoded_text[22:23] 'づ' --> 'づ'",
|
588 |
+
"replace text[36:38] --> decoded_text[34:35] 'で' --> 'で'",
|
589 |
+
"replace text[40:42] --> decoded_text[37:38] 'ず' --> 'ず'",
|
590 |
+
"replace text[48:50] --> decoded_text[44:45] 'だ' --> 'だ'"
|
591 |
+
],
|
592 |
+
"n_oov_chars": 0,
|
593 |
+
"oov_ratio": 0.0,
|
594 |
+
"oov_charset": "[]"
|
595 |
+
},
|
596 |
+
{
|
597 |
+
"text": "実技が苦手だったので、学校に放課後も残って頑張りました。復習も必ずしました。この資格を仕事で活かせるように頑張りたいと思います。、これからも、まだまだ他の資格にも挑戦して行きます。",
|
598 |
+
"decoded_text": "実技が苦手だったので、学校に放課後も残って頑張りました。復習も必ずしました。この資格を仕事で活かせるように頑張りたいと思います。、これからも、まだまだ他の資格にも挑戦して行きます。",
|
599 |
+
"diff": [
|
600 |
+
"replace text[2:4] --> decoded_text[2:3] 'が' --> 'が'",
|
601 |
+
"replace text[6:8] --> decoded_text[5:6] 'だ' --> 'だ'",
|
602 |
+
"replace text[11:13] --> decoded_text[9:10] 'で' --> 'で'",
|
603 |
+
"replace text[35:37] --> decoded_text[32:33] 'ず' --> 'ず'",
|
604 |
+
"replace text[49:51] --> decoded_text[45:46] 'で' --> 'で'",
|
605 |
+
"replace text[77:79] --> decoded_text[72:73] 'だ' --> 'だ'",
|
606 |
+
"replace text[80:82] --> decoded_text[74:75] 'だ' --> 'だ'"
|
607 |
+
],
|
608 |
+
"n_oov_chars": 0,
|
609 |
+
"oov_ratio": 0.0,
|
610 |
+
"oov_charset": "[]"
|
611 |
+
},
|
612 |
+
{
|
613 |
+
"text": "自分の苦手なことで壁にぶつかっても、諦めずに頑張ってください。後悔だけはしないように、やるからには全力で挑んでください。",
|
614 |
+
"decoded_text": "自分の苦手なことで壁にぶつかっても、諦めずに頑張ってください。後悔だけはしないように、やるからには全力で挑んでください。",
|
615 |
+
"diff": [
|
616 |
+
"replace text[8:10] --> decoded_text[8:9] 'で' --> 'で'",
|
617 |
+
"replace text[12:14] --> decoded_text[11:12] 'ぶ' --> 'ぶ'",
|
618 |
+
"replace text[22:24] --> decoded_text[20:21] 'ず' --> 'ず'",
|
619 |
+
"replace text[30:32] --> decoded_text[27:28] 'だ' --> 'だ'",
|
620 |
+
"replace text[37:39] --> decoded_text[33:34] 'だ' --> 'だ'",
|
621 |
+
"replace text[56:58] --> decoded_text[51:52] 'で' --> 'で'",
|
622 |
+
"replace text[60:62] --> decoded_text[54:55] 'で' --> 'で'",
|
623 |
+
"replace text[63:65] --> decoded_text[56:57] 'だ' --> 'だ'"
|
624 |
+
],
|
625 |
+
"n_oov_chars": 0,
|
626 |
+
"oov_ratio": 0.0,
|
627 |
+
"oov_charset": "[]"
|
628 |
+
},
|
629 |
+
{
|
630 |
+
"text": "前回正解率の低かったアルゴリズムや表計算を重点的に復習しました。本番では諦めずに問題を読み続けました。大学編入しても、情報処理技術者試験の勉強は続けていき、次は応用情報技術者試験にもチャレンジしていきたいです。",
|
631 |
+
"decoded_text": "前回正解率の低かったアルゴリズムや表計算を重点的に復習しました。本番では諦めずに問題を読み続けました。大学編入しても、情報処理技術者試験の勉強は続けていき、次は応用情報技術者試験にもチャレンジしていきたいです。",
|
632 |
+
"diff": [
|
633 |
+
"replace text[12:14] --> decoded_text[12:13] 'ゴ' --> 'ゴ'",
|
634 |
+
"replace text[15:17] --> decoded_text[14:15] 'ズ' --> 'ズ'",
|
635 |
+
"replace text[36:38] --> decoded_text[34:35] 'で' --> 'で'",
|
636 |
+
"replace text[41:43] --> decoded_text[38:39] 'ず' --> 'ず'",
|
637 |
+
"replace text[99:101] --> decoded_text[95:96] 'ジ' --> 'ジ'",
|
638 |
+
"replace text[107:109] --> decoded_text[102:103] 'で' --> 'で'"
|
639 |
+
],
|
640 |
+
"n_oov_chars": 0,
|
641 |
+
"oov_ratio": 0.0,
|
642 |
+
"oov_charset": "[]"
|
643 |
+
},
|
644 |
+
{
|
645 |
+
"text": "普段の授業をしっかり聞くことと、試験中は諦めない気持ちがあれば大丈夫です。",
|
646 |
+
"decoded_text": "普段の授業をしっかり聞くことと、試験中は諦めない気持ちがあれば大丈夫です。",
|
647 |
+
"diff": [
|
648 |
+
"replace text[27:29] --> decoded_text[27:28] 'が' --> 'が'",
|
649 |
+
"replace text[31:33] --> decoded_text[30:31] 'ば' --> 'ば'",
|
650 |
+
"replace text[36:38] --> decoded_text[34:35] 'で' --> 'で'"
|
651 |
+
],
|
652 |
+
"n_oov_chars": 0,
|
653 |
+
"oov_ratio": 0.0,
|
654 |
+
"oov_charset": "[]"
|
655 |
+
},
|
656 |
+
{
|
657 |
+
"text": "この資格が関係する、舞台現場でアルバイトをしていました。結果にはあまり自信がなかったのですが、合格通知が届いた時は何回も見直し、とても嬉しかったて��す。",
|
658 |
+
"decoded_text": "この資格が関係する、舞台現場でアルバイトをしていました。結果にはあまり自信がなかったのですが、合格通知が届いた時は何回も見直し、とても嬉しかったです。",
|
659 |
+
"diff": [
|
660 |
+
"replace text[4:6] --> decoded_text[4:5] 'が' --> 'が'",
|
661 |
+
"replace text[15:17] --> decoded_text[14:15] 'で' --> 'で'",
|
662 |
+
"replace text[19:21] --> decoded_text[17:18] 'バ' --> 'バ'",
|
663 |
+
"replace text[40:42] --> decoded_text[37:38] 'が' --> 'が'",
|
664 |
+
"replace text[47:49] --> decoded_text[43:44] 'で' --> 'で'",
|
665 |
+
"replace text[50:52] --> decoded_text[45:46] 'が' --> 'が'",
|
666 |
+
"replace text[57:59] --> decoded_text[51:52] 'が' --> 'が'",
|
667 |
+
"replace text[79:81] --> decoded_text[72:73] 'で' --> 'で'"
|
668 |
+
],
|
669 |
+
"n_oov_chars": 0,
|
670 |
+
"oov_ratio": 0.0,
|
671 |
+
"oov_charset": "[]"
|
672 |
+
},
|
673 |
+
{
|
674 |
+
"text": "資格対策授業で分からないところは積極的に質問しました。また復習はかかさずしました。資格を取得したからといって、勉強したことを忘れたら意味がありませんので、資格に恥じないよう に今後も頑張りたいと思います。",
|
675 |
+
"decoded_text": "資格対策授業で分からないところは積極的に質問しました。また復習はかかさずしました。資格を取得したからといって、勉強したことを忘れたら意味がありませんので、資格に恥じないよう に今後も頑張りたいと思います。",
|
676 |
+
"diff": [
|
677 |
+
"replace text[6:8] --> decoded_text[6:7] 'で' --> 'で'",
|
678 |
+
"replace text[36:38] --> decoded_text[35:36] 'ず' --> 'ず'",
|
679 |
+
"replace text[70:72] --> decoded_text[68:69] 'が' --> 'が'",
|
680 |
+
"replace text[78:80] --> decoded_text[75:76] 'で' --> 'で'",
|
681 |
+
"replace text[85:87] --> decoded_text[81:82] 'じ' --> 'じ'"
|
682 |
+
],
|
683 |
+
"n_oov_chars": 0,
|
684 |
+
"oov_ratio": 0.0,
|
685 |
+
"oov_charset": "[]"
|
686 |
+
},
|
687 |
+
{
|
688 |
+
"text": "試験を想定して取り組みました。同じ試験を受けるクラスメイトに分からない所を聞きました。国家資格なので、持っていて損はないと思います。",
|
689 |
+
"decoded_text": "試験を想定して取り組みました。同じ試験を受けるクラスメイトに分からない所を聞きました。国家資格なので、持っていて損はないと思います。",
|
690 |
+
"diff": [
|
691 |
+
"replace text[16:18] --> decoded_text[16:17] 'じ' --> 'じ'",
|
692 |
+
"replace text[50:52] --> decoded_text[49:50] 'で' --> 'で'"
|
693 |
+
],
|
694 |
+
"n_oov_chars": 0,
|
695 |
+
"oov_ratio": 0.0,
|
696 |
+
"oov_charset": "[]"
|
697 |
+
},
|
698 |
+
{
|
699 |
+
"text": "何度も、学科、実技、判断(旧要素)の対策をして合格を目指して頑張ってください。",
|
700 |
+
"decoded_text": "何度も、学科、実技、判断(旧要素)の対策をして合格を目指して頑張ってください。",
|
701 |
+
"diff": [
|
702 |
+
"replace text[35:37] --> decoded_text[35:36] 'だ' --> 'だ'"
|
703 |
+
],
|
704 |
+
"n_oov_chars": 0,
|
705 |
+
"oov_ratio": 0.0,
|
706 |
+
"oov_charset": "[]"
|
707 |
+
},
|
708 |
+
{
|
709 |
+
"text": "自分の力を信じて落ち着いて望んでください。練習が実を結ぶはずです。",
|
710 |
+
"decoded_text": "自分の力を信じて落ち着いて望んでください。練習が実を結ぶはずです。",
|
711 |
+
"diff": [
|
712 |
+
"replace text[6:8] --> decoded_text[6:7] 'じ' --> 'じ'",
|
713 |
+
"replace text[16:18] --> decoded_text[15:16] 'で' --> 'で'",
|
714 |
+
"replace text[19:21] --> decoded_text[17:18] 'だ' --> 'だ'",
|
715 |
+
"replace text[26:28] --> decoded_text[23:24] 'が' --> 'が'",
|
716 |
+
"replace text[31:33] --> decoded_text[27:28] 'ぶ' --> 'ぶ'",
|
717 |
+
"replace text[34:38] --> decoded_text[29:31] 'ずで' --> 'ずで'"
|
718 |
+
],
|
719 |
+
"n_oov_chars": 0,
|
720 |
+
"oov_ratio": 0.0,
|
721 |
+
"oov_charset": "[]"
|
722 |
+
},
|
723 |
+
{
|
724 |
+
"text": "分からない所は、先生に質問して勉強すれば大丈夫です。私は納得するまで解説してくださった先生に感謝しています。",
|
725 |
+
"decoded_text": "分からない所は、先生に質問して勉強すれば大丈夫です。私は納得するまで解説してくださった先生に感謝しています。",
|
726 |
+
"diff": [
|
727 |
+
"replace text[19:21] --> decoded_text[19:20] 'ば' --> 'ば'",
|
728 |
+
"replace text[24:26] --> decoded_text[23:24] 'で' --> 'で'",
|
729 |
+
"replace text[35:37] --> decoded_text[33:34] 'で' --> 'で'",
|
730 |
+
"replace text[42:44] --> decoded_text[39:40] 'だ' --> 'だ'"
|
731 |
+
],
|
732 |
+
"n_oov_chars": 0,
|
733 |
+
"oov_ratio": 0.0,
|
734 |
+
"oov_charset": "[]"
|
735 |
+
},
|
736 |
+
{
|
737 |
+
"text": "授業で分からない所は先生や友達に聞きました。問題はまず自分で解いて分からない用語は、本やネ��トで調べました。試験には合格しましたが、まだ分からないことが多いので、資格で勉強した知識をもっと深めていきたいです。",
|
738 |
+
"decoded_text": "授業で分からない所は先生や友達に聞きました。問題はまず自分で解いて分からない用語は、本やネットで調べました。試験には合格しましたが、まだ分からないことが多いので、資格で勉強した知識をもっと深めていきたいです。",
|
739 |
+
"diff": [
|
740 |
+
"replace text[2:4] --> decoded_text[2:3] 'で' --> 'で'",
|
741 |
+
"replace text[27:29] --> decoded_text[26:27] 'ず' --> 'ず'",
|
742 |
+
"replace text[31:33] --> decoded_text[29:30] 'で' --> 'で'",
|
743 |
+
"replace text[50:52] --> decoded_text[47:48] 'で' --> 'で'",
|
744 |
+
"replace text[53:55] --> decoded_text[49:50] 'べ' --> 'べ'",
|
745 |
+
"replace text[69:71] --> decoded_text[64:65] 'が' --> 'が'",
|
746 |
+
"replace text[73:75] --> decoded_text[67:68] 'だ' --> 'だ'",
|
747 |
+
"replace text[82:84] --> decoded_text[75:76] 'が' --> 'が'",
|
748 |
+
"replace text[87:89] --> decoded_text[79:80] 'で' --> 'で'",
|
749 |
+
"replace text[92:94] --> decoded_text[83:84] 'で' --> 'で'",
|
750 |
+
"replace text[111:113] --> decoded_text[101:102] 'で' --> 'で'"
|
751 |
+
],
|
752 |
+
"n_oov_chars": 0,
|
753 |
+
"oov_ratio": 0.0,
|
754 |
+
"oov_charset": "[]"
|
755 |
+
},
|
756 |
+
{
|
757 |
+
"text": "私は勉強が苦手でしたが、毎日勉強してAランクを取得できました。諦めないでください。",
|
758 |
+
"decoded_text": "私は勉強が苦手でしたが、毎日勉強してAランクを取得できました。諦めないでください。",
|
759 |
+
"diff": [
|
760 |
+
"replace text[4:6] --> decoded_text[4:5] 'が' --> 'が'",
|
761 |
+
"replace text[8:10] --> decoded_text[7:8] 'で' --> 'で'",
|
762 |
+
"replace text[12:14] --> decoded_text[10:11] 'が' --> 'が'",
|
763 |
+
"replace text[38:40] --> decoded_text[35:36] 'で' --> 'で'",
|
764 |
+
"replace text[41:43] --> decoded_text[37:38] 'だ' --> 'だ'"
|
765 |
+
],
|
766 |
+
"n_oov_chars": 0,
|
767 |
+
"oov_ratio": 0.0,
|
768 |
+
"oov_charset": "[]"
|
769 |
+
},
|
770 |
+
{
|
771 |
+
"text": "受かりたい気持ちがあれば、自然と頑張れます! 最後まで諦めずに頑張ってください!",
|
772 |
+
"decoded_text": "受かりたい気持ちがあれば、自然と頑張れます! 最後まで諦めずに頑張ってください!",
|
773 |
+
"diff": [
|
774 |
+
"replace text[8:10] --> decoded_text[8:9] 'が' --> 'が'",
|
775 |
+
"replace text[12:14] --> decoded_text[11:12] 'ば' --> 'ば'",
|
776 |
+
"replace text[28:30] --> decoded_text[26:27] 'で' --> 'で'",
|
777 |
+
"replace text[32:34] --> decoded_text[29:30] 'ず' --> 'ず'",
|
778 |
+
"replace text[40:42] --> decoded_text[36:37] 'だ' --> 'だ'"
|
779 |
+
],
|
780 |
+
"n_oov_chars": 0,
|
781 |
+
"oov_ratio": 0.0,
|
782 |
+
"oov_charset": "[]"
|
783 |
+
},
|
784 |
+
{
|
785 |
+
"text": "対策授業を真面目に受けました。分からない所は、先生に質問し自分が納得するまで勉強しました。合格をいただいて、本当に嬉しかったです。",
|
786 |
+
"decoded_text": "対策授業を真面目に受けました。分からない所は、先生に質問し自分が納得するまで勉強しました。合格をいただいて、本当に嬉しかったです。",
|
787 |
+
"diff": [
|
788 |
+
"replace text[31:33] --> decoded_text[31:32] 'が' --> 'が'",
|
789 |
+
"replace text[38:40] --> decoded_text[37:38] 'で' --> 'で'",
|
790 |
+
"replace text[52:54] --> decoded_text[50:51] 'だ' --> 'だ'",
|
791 |
+
"replace text[65:67] --> decoded_text[62:63] 'で' --> 'で'"
|
792 |
+
],
|
793 |
+
"n_oov_chars": 0,
|
794 |
+
"oov_ratio": 0.0,
|
795 |
+
"oov_charset": "[]"
|
796 |
+
},
|
797 |
+
{
|
798 |
+
"text": "授業を真面目に聞いていたら、必ず合格できます! ここには専門の先生がいるので一人一人に丁寧に教えてくれますよ。",
|
799 |
+
"decoded_text": "授業を真面目に聞いていたら、必ず合格できます! ここには専門の先生がいるので一人一人に丁寧に教えてくれますよ。",
|
800 |
+
"diff": [
|
801 |
+
"replace text[15:17] --> decoded_text[15:16] 'ず' --> 'ず'",
|
802 |
+
"replace text[19:21] --> decoded_text[18:19] 'で' --> 'で'",
|
803 |
+
"replace text[35:37] --> decoded_text[33:34] 'が' --> 'が'",
|
804 |
+
"replace text[40:42] --> decoded_text[37:38] 'で' --> 'で'"
|
805 |
+
],
|
806 |
+
"n_oov_chars": 0,
|
807 |
+
"oov_ratio": 0.0,
|
808 |
+
"oov_charset": "[]"
|
809 |
+
},
|
810 |
+
{
|
811 |
+
"text": "弱点を無くすために、間違えたところは、何回も復習をしました。合格して本当に嬉しかったです。この資格を就職でも活かしたいと思います。",
|
812 |
+
"decoded_text": "弱点を無くすために、間違えたところは、何回も復習をしました。合格して本当に嬉しかったです。���の資格を就職でも活かしたいと思います。",
|
813 |
+
"diff": [
|
814 |
+
"replace text[42:44] --> decoded_text[42:43] 'で' --> 'で'",
|
815 |
+
"replace text[53:55] --> decoded_text[52:53] 'で' --> 'で'"
|
816 |
+
],
|
817 |
+
"n_oov_chars": 0,
|
818 |
+
"oov_ratio": 0.0,
|
819 |
+
"oov_charset": "[]"
|
820 |
+
},
|
821 |
+
{
|
822 |
+
"text": "受かりたい気持ちがあればきっと合格できます! 分からない所は、諦めずに先生に質問して克服してください!",
|
823 |
+
"decoded_text": "受かりたい気持ちがあればきっと合格できます! 分からない所は、諦めずに先生に質問して克服してください!",
|
824 |
+
"diff": [
|
825 |
+
"replace text[8:10] --> decoded_text[8:9] 'が' --> 'が'",
|
826 |
+
"replace text[12:14] --> decoded_text[11:12] 'ば' --> 'ば'",
|
827 |
+
"replace text[19:21] --> decoded_text[17:18] 'で' --> 'で'",
|
828 |
+
"replace text[36:38] --> decoded_text[33:34] 'ず' --> 'ず'",
|
829 |
+
"replace text[51:53] --> decoded_text[47:48] 'だ' --> 'だ'"
|
830 |
+
],
|
831 |
+
"n_oov_chars": 0,
|
832 |
+
"oov_ratio": 0.0,
|
833 |
+
"oov_charset": "[]"
|
834 |
+
},
|
835 |
+
{
|
836 |
+
"text": "問題集を何回も解くことが一番大切です。また電車通学なので通学中にも勉強しました。資格を取得できたので、今後の就職活動に役立てたいと思います。また将来の夢のために、この資格を有効に活用したいと思います。",
|
837 |
+
"decoded_text": "問題集を何回も解くことが一番大切です。また電車通学なので通学中にも勉強しました。資格を取得できたので、今後の就職活動に役立てたいと思います。また将来の夢のために、この資格を有効に活用したいと思います。",
|
838 |
+
"diff": [
|
839 |
+
"replace text[11:13] --> decoded_text[11:12] 'が' --> 'が'",
|
840 |
+
"replace text[17:19] --> decoded_text[16:17] 'で' --> 'で'",
|
841 |
+
"replace text[29:31] --> decoded_text[27:28] 'で' --> 'で'",
|
842 |
+
"replace text[48:50] --> decoded_text[45:46] 'で' --> 'で'",
|
843 |
+
"replace text[53:55] --> decoded_text[49:50] 'で' --> 'で'"
|
844 |
+
],
|
845 |
+
"n_oov_chars": 0,
|
846 |
+
"oov_ratio": 0.0,
|
847 |
+
"oov_charset": "[]"
|
848 |
+
},
|
849 |
+
{
|
850 |
+
"text": "授業で手厚い資格対策をしてくださったので、それを元に復習しました。資格を取得することも、もちろんですが、その資格で勉強した事をきちんと身につけ、今後に役立てていきたいです。",
|
851 |
+
"decoded_text": "授業で手厚い資格対策をしてくださったので、それを元に復習しました。資格を取得することも、もちろんですが、その資格で勉強した事をきちんと身につけ、今後に役立てていきたいです。",
|
852 |
+
"diff": [
|
853 |
+
"replace text[2:4] --> decoded_text[2:3] 'で' --> 'で'",
|
854 |
+
"replace text[15:17] --> decoded_text[14:15] 'だ' --> 'だ'",
|
855 |
+
"replace text[21:23] --> decoded_text[19:20] 'で' --> 'で'",
|
856 |
+
"replace text[51:53] --> decoded_text[48:49] 'で' --> 'で'",
|
857 |
+
"replace text[54:56] --> decoded_text[50:51] 'が' --> 'が'",
|
858 |
+
"replace text[61:63] --> decoded_text[56:57] 'で' --> 'で'",
|
859 |
+
"replace text[89:91] --> decoded_text[83:84] 'で' --> 'で'"
|
860 |
+
],
|
861 |
+
"n_oov_chars": 0,
|
862 |
+
"oov_ratio": 0.0,
|
863 |
+
"oov_charset": "[]"
|
864 |
+
},
|
865 |
+
{
|
866 |
+
"text": "自分が勉強をした分だけ結果はついてきます。自分の夢を後押ししてくれる資格は絶対持ってるべきです。自分の夢の為に頑張りましょう。",
|
867 |
+
"decoded_text": "自分が勉強をした分だけ結果はついてきます。自分の夢を後押ししてくれる資格は絶対持ってるべきです。自分の夢の為に頑張りましょう。",
|
868 |
+
"diff": [
|
869 |
+
"replace text[2:4] --> decoded_text[2:3] 'が' --> 'が'",
|
870 |
+
"replace text[10:12] --> decoded_text[9:10] 'だ' --> 'だ'",
|
871 |
+
"replace text[45:47] --> decoded_text[43:44] 'べ' --> 'べ'",
|
872 |
+
"replace text[48:50] --> decoded_text[45:46] 'で' --> 'で'"
|
873 |
+
],
|
874 |
+
"n_oov_chars": 0,
|
875 |
+
"oov_ratio": 0.0,
|
876 |
+
"oov_charset": "[]"
|
877 |
+
},
|
878 |
+
{
|
879 |
+
"text": "対策授業には積極的に参加し、真面目に勉強しました。将来の事を考えたとき、この資格は絶対取得すべきだと思い、受験しました。資格試験の勉強は、自分にプラスになったと思います。",
|
880 |
+
"decoded_text": "対策授業には積極的に参加し、真面目に勉強しました。将来の事を考えたとき、この資格は絶対取得すべきだと思い、受験しました。資格試験の勉強は、自分にプラスになったと思います。",
|
881 |
+
"diff": [
|
882 |
+
"replace text[46:48] --> decoded_text[46:47] 'べ' --> 'べ'",
|
883 |
+
"replace text[49:51] --> decoded_text[48:49] 'だ' --> 'だ'",
|
884 |
+
"replace text[74:76] --> decoded_text[72:73] 'プ' --> 'プ'"
|
885 |
+
],
|
886 |
+
"n_oov_chars": 0,
|
887 |
+
"oov_ratio": 0.0,
|
888 |
+
"oov_charset": "[]"
|
889 |
+
},
|
890 |
+
{
|
891 |
+
"text": "対策用の教科書を繰り返し解いて覚えました。合格をいただいて本当に嬉しかったです。頑張った結果だったので自分に自信がつきました。",
|
892 |
+
"decoded_text": "対策用の教科書を繰り返し解いて覚えました。合格をいただいて本当に嬉しかったです。頑張った結果だったので自分に自信がつきました。",
|
893 |
+
"diff": [
|
894 |
+
"replace text[26:28] --> decoded_text[26:27] 'だ' --> 'だ'",
|
895 |
+
"replace text[38:40] --> decoded_text[37:38] 'で' --> 'で'",
|
896 |
+
"replace text[48:50] --> decoded_text[46:47] 'だ' --> 'だ'",
|
897 |
+
"replace text[53:55] --> decoded_text[50:51] 'で' --> 'で'",
|
898 |
+
"replace text[60:62] --> decoded_text[56:57] 'が' --> 'が'"
|
899 |
+
],
|
900 |
+
"n_oov_chars": 0,
|
901 |
+
"oov_ratio": 0.0,
|
902 |
+
"oov_charset": "[]"
|
903 |
+
},
|
904 |
+
{
|
905 |
+
"text": "この試験に限らず、どの資格を勉強するにもまず、繰り返し問題を解いて覚えることが大事です。そうする事で問題にも慣れ解けるようになります。",
|
906 |
+
"decoded_text": "この試験に限らず、どの資格を勉強するにもまず、繰り返し問題を解いて覚えることが大事です。そうする事で問題にも慣れ解けるようになります。",
|
907 |
+
"diff": [
|
908 |
+
"replace text[9:11] --> decoded_text[9:10] 'ど' --> 'ど'",
|
909 |
+
"replace text[22:24] --> decoded_text[21:22] 'ず' --> 'ず'",
|
910 |
+
"replace text[40:42] --> decoded_text[38:39] 'が' --> 'が'",
|
911 |
+
"replace text[44:46] --> decoded_text[41:42] 'で' --> 'で'",
|
912 |
+
"replace text[53:55] --> decoded_text[49:50] 'で' --> 'で'"
|
913 |
+
],
|
914 |
+
"n_oov_chars": 0,
|
915 |
+
"oov_ratio": 0.0,
|
916 |
+
"oov_charset": "[]"
|
917 |
+
},
|
918 |
+
{
|
919 |
+
"text": "練習で間違えた問題は2回、3回と繰り返し解きました。やっぱり一番の目的は就職です。就職する際に資格を持っていると有利になるので、合格して本当に良かったです。",
|
920 |
+
"decoded_text": "練習で間違えた問題は2回、3回と繰り返し解きました。やっぱり一番の目的は就職です。就職する際に資格を持っていると有利になるので、合格して本当に良かったです。",
|
921 |
+
"diff": [
|
922 |
+
"replace text[28:30] --> decoded_text[28:29] 'ぱ' --> 'ぱ'",
|
923 |
+
"replace text[39:41] --> decoded_text[38:39] 'で' --> 'で'",
|
924 |
+
"replace text[64:66] --> decoded_text[62:63] 'で' --> 'で'",
|
925 |
+
"replace text[78:80] --> decoded_text[75:76] 'で' --> 'で'"
|
926 |
+
],
|
927 |
+
"n_oov_chars": 0,
|
928 |
+
"oov_ratio": 0.0,
|
929 |
+
"oov_charset": "[]"
|
930 |
+
},
|
931 |
+
{
|
932 |
+
"text": "やはり資格がないと就職は難しいと思います。取得できるかどうか不安はあると思いますが、絶対受かっているという意思を持って頑張ってください。また、対策授業には積極的に参加してください。",
|
933 |
+
"decoded_text": "やはり資格がないと就職は難しいと思います。取得できるかどうか不安はあると思いますが、絶対受かっているという意思を持って頑張ってください。また、対策授業には積極的に参加してください。",
|
934 |
+
"diff": [
|
935 |
+
"replace text[5:7] --> decoded_text[5:6] 'が' --> 'が'",
|
936 |
+
"replace text[24:26] --> decoded_text[23:24] 'で' --> 'で'",
|
937 |
+
"replace text[29:31] --> decoded_text[27:28] 'ど' --> 'ど'",
|
938 |
+
"replace text[67:69] --> decoded_text[64:65] 'だ' --> 'だ'",
|
939 |
+
"replace text[90:92] --> decoded_text[86:87] 'だ' --> 'だ'"
|
940 |
+
],
|
941 |
+
"n_oov_chars": 0,
|
942 |
+
"oov_ratio": 0.0,
|
943 |
+
"oov_charset": "[]"
|
944 |
+
},
|
945 |
+
{
|
946 |
+
"text": "試験範囲は広いですが、出題は同じようなパターンが多いので、繰り返し過去問を解いていれば合格できますよ。",
|
947 |
+
"decoded_text": "試験範囲は広いですが、出題は同じようなパターンが多いので、繰り返し過去問を解いていれば合格できますよ。",
|
948 |
+
"diff": [
|
949 |
+
"replace text[7:9] --> decoded_text[7:8] 'で' --> 'で'",
|
950 |
+
"replace text[10:12] --> decoded_text[9:10] 'が' --> 'が'",
|
951 |
+
"replace text[44:46] --> decoded_text[42:43] 'ば' --> 'ば'",
|
952 |
+
"replace text[48:50] --> decoded_text[45:46] 'で' --> 'で'"
|
953 |
+
],
|
954 |
+
"n_oov_chars": 0,
|
955 |
+
"oov_ratio": 0.0,
|
956 |
+
"oov_charset": "[]"
|
957 |
+
},
|
958 |
+
{
|
959 |
+
"text": "通学時の電車の中でひたすら参考書を読み解き、最後の最後まで一生懸命取り組みました。また今までの授業をしっかり聞いていたので、分からないところも��く、無事合格できました。",
|
960 |
+
"decoded_text": "通学時の電車の中でひたすら参考書を読み解き、最後の最後まで一生懸命取り組みました。また今までの授業をしっかり聞いていたので、分からないところも無く、無事合格できました。",
|
961 |
+
"diff": [
|
962 |
+
"replace text[8:10] --> decoded_text[8:9] 'で' --> 'で'",
|
963 |
+
"replace text[29:31] --> decoded_text[28:29] 'で' --> 'で'",
|
964 |
+
"replace text[47:49] --> decoded_text[45:46] 'で' --> 'で'",
|
965 |
+
"replace text[63:65] --> decoded_text[60:61] 'で' --> 'で'",
|
966 |
+
"replace text[82:84] --> decoded_text[78:79] 'で' --> 'で'"
|
967 |
+
],
|
968 |
+
"n_oov_chars": 0,
|
969 |
+
"oov_ratio": 0.0,
|
970 |
+
"oov_charset": "[]"
|
971 |
+
},
|
972 |
+
{
|
973 |
+
"text": "数多くの資格に挑戦し、合格した時の達成感を自分で感じ取ってもらえればと思います。",
|
974 |
+
"decoded_text": "数多くの資格に挑戦し、合格した時の達成感を自分で感じ取ってもらえればと思います。",
|
975 |
+
"diff": [
|
976 |
+
"replace text[23:25] --> decoded_text[23:24] 'で' --> 'で'",
|
977 |
+
"replace text[26:28] --> decoded_text[25:26] 'じ' --> 'じ'",
|
978 |
+
"replace text[35:37] --> decoded_text[33:34] 'ば' --> 'ば'"
|
979 |
+
],
|
980 |
+
"n_oov_chars": 0,
|
981 |
+
"oov_ratio": 0.0,
|
982 |
+
"oov_charset": "[]"
|
983 |
+
},
|
984 |
+
{
|
985 |
+
"text": "学校の授業だけではなく、家でもプリントや過去問を何度も解いて勉強しました。資格を取ったらそこで終わりではなく、就職してからも役に立つ知識を身につける事ができたので、今後も活かしていこうと思います。",
|
986 |
+
"decoded_text": "学校の授業だけではなく、家でもプリントや過去問を何度も解いて勉強しました。資格を取ったらそこで終わりではなく、就職してからも役に立つ知識を身につける事ができたので、今後も活かしていこうと思います。",
|
987 |
+
"diff": [
|
988 |
+
"replace text[5:7] --> decoded_text[5:6] 'だ' --> 'だ'",
|
989 |
+
"replace text[8:10] --> decoded_text[7:8] 'で' --> 'で'",
|
990 |
+
"replace text[15:17] --> decoded_text[13:14] 'で' --> 'で'",
|
991 |
+
"replace text[18:20] --> decoded_text[15:16] 'プ' --> 'プ'",
|
992 |
+
"replace text[50:52] --> decoded_text[46:47] 'で' --> 'で'",
|
993 |
+
"replace text[55:57] --> decoded_text[50:51] 'で' --> 'で'",
|
994 |
+
"replace text[81:85] --> decoded_text[75:77] 'がで' --> 'がで'",
|
995 |
+
"replace text[88:90] --> decoded_text[80:81] 'で' --> 'で'"
|
996 |
+
],
|
997 |
+
"n_oov_chars": 0,
|
998 |
+
"oov_ratio": 0.0,
|
999 |
+
"oov_charset": "[]"
|
1000 |
+
},
|
1001 |
+
{
|
1002 |
+
"text": "頑張ったら合格できるので機会があれば、ぜひ資格はとっておくべきだと思います。",
|
1003 |
+
"decoded_text": "頑張ったら合格できるので機会があれば、ぜひ資格はとっておくべきだと思います。",
|
1004 |
+
"diff": [
|
1005 |
+
"replace text[7:9] --> decoded_text[7:8] 'で' --> 'で'",
|
1006 |
+
"replace text[12:14] --> decoded_text[11:12] 'で' --> 'で'",
|
1007 |
+
"replace text[16:18] --> decoded_text[14:15] 'が' --> 'が'",
|
1008 |
+
"replace text[20:22] --> decoded_text[17:18] 'ば' --> 'ば'",
|
1009 |
+
"replace text[23:25] --> decoded_text[19:20] 'ぜ' --> 'ぜ'",
|
1010 |
+
"replace text[34:36] --> decoded_text[29:30] 'べ' --> 'べ'",
|
1011 |
+
"replace text[37:39] --> decoded_text[31:32] 'だ' --> 'だ'"
|
1012 |
+
],
|
1013 |
+
"n_oov_chars": 0,
|
1014 |
+
"oov_ratio": 0.0,
|
1015 |
+
"oov_charset": "[]"
|
1016 |
+
},
|
1017 |
+
{
|
1018 |
+
"text": "この資格は音響関係の仕事で、どこに行っても活かしていけるので取得しました。 過去問は大事です。たくさんの過去問を解き、数式も出てくるので暗記だけではなく、公式も覚えるようにしました。",
|
1019 |
+
"decoded_text": "この資格は音響関係の仕事で、どこに行っても活かしていけるので取得しました。 過去問は大事です。たくさんの過去問を解き、数式も出てくるので暗記だけではなく、公式も覚えるようにしました。",
|
1020 |
+
"diff": [
|
1021 |
+
"replace text[12:14] --> decoded_text[12:13] 'で' --> 'で'",
|
1022 |
+
"replace text[15:17] --> decoded_text[14:15] 'ど' --> 'ど'",
|
1023 |
+
"replace text[31:33] --> decoded_text[29:30] 'で' --> 'で'",
|
1024 |
+
"replace text[47:49] --> decoded_text[44:45] 'で' --> 'で'",
|
1025 |
+
"replace text[71:73] --> decoded_text[67:68] 'で' --> 'で'",
|
1026 |
+
"replace text[75:77] --> decoded_text[70:71] 'だ' --> 'だ'",
|
1027 |
+
"replace text[78:80] --> decoded_text[72:73] 'で' --> 'で'"
|
1028 |
+
],
|
1029 |
+
"n_oov_chars": 0,
|
1030 |
+
"oov_ratio": 0.0,
|
1031 |
+
"oov_charset": "[]"
|
1032 |
+
},
|
1033 |
+
{
|
1034 |
+
"text": "資格を取りたいという気持ちがあれば��丈夫です。頑張ってください。",
|
1035 |
+
"decoded_text": "資格を取りたいという気持ちがあれば大丈夫です。頑張ってください。",
|
1036 |
+
"diff": [
|
1037 |
+
"replace text[13:15] --> decoded_text[13:14] 'が' --> 'が'",
|
1038 |
+
"replace text[17:19] --> decoded_text[16:17] 'ば' --> 'ば'",
|
1039 |
+
"replace text[22:24] --> decoded_text[20:21] 'で' --> 'で'",
|
1040 |
+
"replace text[31:33] --> decoded_text[28:29] 'だ' --> 'だ'"
|
1041 |
+
],
|
1042 |
+
"n_oov_chars": 0,
|
1043 |
+
"oov_ratio": 0.0,
|
1044 |
+
"oov_charset": "[]"
|
1045 |
+
}
|
1046 |
+
]
|
stats/compression_rate/Qwen.Qwen3-4B-Instruct-2507 @ cc100.ko.diff.json
ADDED
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "낙천적(樂天的) : 세상과 인생을 즐겁고 좋은 것으로 여기는. 또는 그런 것. 인생을 즐길 줄 안다는 건 정말 행복한 일 아닐까요? 심리적으로 자신감과 낙천적인 태도를 갖게 하며, 새로운 아이디어를 얻도록 도움을 주는 노란색의 예쁜벽을 오디오로 채우신 고객님댁을 소개드립니다. ..more",
|
4 |
+
"decoded_text": "낙천적(樂天的) : 세상과 인생을 즐겁고 좋은 것으로 여기는. 또는 그런 것. 인생을 즐길 줄 안다는 건 정말 행복한 일 아닐까요? 심리적으로 자신감과 낙천적인 태도를 갖게 하며, 새로운 아이디어를 얻도록 도움을 주는 노란색의 예쁜벽을 오디오로 채우신 고객님댁을 소개드립니다. ..more",
|
5 |
+
"diff": [
|
6 |
+
"replace text[4:5] --> decoded_text[4:5] '樂' --> '樂'"
|
7 |
+
],
|
8 |
+
"n_oov_chars": 1,
|
9 |
+
"oov_ratio": 0.006211180124223602,
|
10 |
+
"oov_charset": "[\"樂\"]"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"text": "명 태조는 포의(布衣)에서 몸을 일으켜 천하를 평정하였다. 천하 평정뒤에는 관리 등용시험에 경의(敬意)를 주로 하였으므로 문교(文敎)가 융성하게 되어 많은 학자를 배출시켰다. 그러나 당시의 학자는 대개 정주학(程朱學)을 신봉한 사람들 뿐이라 명대(明代)의 특색이 아직 나타나고 있지 않다. 그러므로 영락(永樂) 12년 호(胡)에게 명하여 「사서대전」,「오경대전」 을 찬술케 하였을때 어느 경(經)이나 모두 주자의 주(主)에 기본을 두고 있다. 이것에 의해서만도 명초(明初)의 학문이 주자학을 그대로 계승하였을뿐 독창성이 없었다는 것을 상상할 수 있다. 본론에서는 오강재(吳康齋) 사상에 대해 논하고 순수한 주자학자의 설선에 대한 내용을 먼저 언급한 뒤에 다시 오강재의 문인인 호거인(胡居仁), 진헌장(陳獻章), 누량(累諒)에 대해 알아보고자 한다.",
|
14 |
+
"decoded_text": "명 태조는 포의(布衣)에서 몸을 일으켜 천하를 평정하였다. 천하 평정뒤에는 관리 등용시험에 경의(敬意)를 주로 하였으므로 문교(文敎)가 융성하게 되어 많은 학자를 배출시켰다. 그러나 당시의 학자는 대개 정주학(程朱學)을 신봉한 사람들 뿐이라 명대(明代)의 특색이 아직 나타나고 있지 않다. 그러므로 영락(永樂) 12년 호(胡)에게 명하여 「사서대전」,「오경대전」 을 찬술케 하였을때 어느 경(經)이나 모두 주자의 주(主)에 기본을 두고 있다. 이것에 의해서만도 명초(明初)의 학문이 주자학을 그대로 계승하였을뿐 독창성이 없었다는 것을 상상할 수 있다. 본론에서는 오강재(吳康齋) 사상에 대해 논하고 순수한 주자학자의 설선에 대한 내용을 먼저 언급한 뒤에 다시 오강재의 문인인 호거인(胡居仁), 진헌장(陳獻章), 누량(累諒)에 대해 알아보고자 한다.",
|
15 |
+
"diff": [
|
16 |
+
"replace text[171:172] --> decoded_text[171:172] '樂' --> '樂'",
|
17 |
+
"replace text[401:402] --> decoded_text[401:402] '累' --> '累'"
|
18 |
+
],
|
19 |
+
"n_oov_chars": 2,
|
20 |
+
"oov_ratio": 0.004784688995215311,
|
21 |
+
"oov_charset": "[\"樂\", \"累\"]"
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"text": "역할을 수행해갔는지를 살펴보도록 하겠다. 1. 양명학 이전의 주자학 ... 과 육학(陸學) 1) 송대 주자학의 성립 데니스 트위쳇이『케임브리지 중국사 ... 독재적인 명 초기의 황제들{ 특히 홍무제의 경우 주자학을 체제교학으로",
|
25 |
+
"decoded_text": "역할을 수행해갔는지를 살펴보도록 하겠다. 1. 양명학 이전의 주자학 ... 과 육학(陸學) 1) 송대 주자학의 성립 데니스 트위쳇이『케임브리지 중국사 ... 독재적인 명 초기의 황제들{ 특히 홍무제의 경우 주자학을 체제교학으로",
|
26 |
+
"diff": [
|
27 |
+
"replace text[47:48] --> decoded_text[47:48] '陸' --> '陸'"
|
28 |
+
],
|
29 |
+
"n_oov_chars": 1,
|
30 |
+
"oov_ratio": 0.007936507936507936,
|
31 |
+
"oov_charset": "[\"陸\"]"
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"text": "확고한 지반을 굳혔다. 주자학이 명대에 와서 국가의 지도이념으로 확립되어 ... 양명학의 성립과 전개 1. 서론 朱子學(주자학)이 宋代(송대)의 학술 ... 기점으로 知行合一說(지행합일설), 致良知說(치양지설)을 속속 제출하여 주자학",
|
35 |
+
"decoded_text": "확고한 지반을 굳혔다. 주자학이 명대에 와서 국가의 지도이념으로 확립되어 ... 양명학의 성립과 전개 1. 서론 朱子學(주자학)이 宋代(송대)의 학술 ... 기점으로 知行合一說(지행합일설), 致良知說(치양지설)을 속속 제출하여 주자학",
|
36 |
+
"diff": [
|
37 |
+
"replace text[108:109] --> decoded_text[108:109] '良' --> '良'"
|
38 |
+
],
|
39 |
+
"n_oov_chars": 1,
|
40 |
+
"oov_ratio": 0.007692307692307693,
|
41 |
+
"oov_charset": "[\"良\"]"
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"text": "▷주자:이(理)�� 만물의 근원이 되는 이치이자, 기(氣)의 활동 근거인 반면 기(氣)는 만물을 구성하는 재료로서 사물을 낳는 도구이다",
|
45 |
+
"decoded_text": "▷주자:이(理)란 만물의 근원이 되는 이치이자, 기(氣)의 활동 근거인 반면 기(氣)는 만물을 구성하는 재료로서 사물을 낳는 도구이다",
|
46 |
+
"diff": [
|
47 |
+
"replace text[6:7] --> decoded_text[6:7] '理' --> '理'"
|
48 |
+
],
|
49 |
+
"n_oov_chars": 1,
|
50 |
+
"oov_ratio": 0.013513513513513514,
|
51 |
+
"oov_charset": "[\"理\"]"
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"text": "예술이라는 한자(漢字)에서 ‘예(藝)’에는 본디 ‘심는다(種 ·樹)’는 뜻이 있으며, 따라서 그것은‘기능(機能)’‘기술(技術)’을 의미하며 고대 동양에서 사대부가 필수적으로 갖추어야 했다. 육예(六藝:禮 ·樂 ·射 ·御 ·書 ·數)에서의 ‘예’는 인간적 결실을 얻기 위해 필요한 기초 교양의 씨를 뿌리고 인격의 꽃을 피우는 수단으로 여겼던 만큼 거기에는 인격도야의 의의도 있다고 하겠다.",
|
55 |
+
"decoded_text": "예술이라는 한자(漢字)에서 ‘예(藝)’에는 본디 ‘심는다(種 ·樹)’는 뜻이 있으며, 따라서 그것은‘기능(機能)’‘기술(技術)’을 의미하며 고대 동양에서 사대부가 필수적으로 갖추어야 했다. 육예(六藝:禮 ·樂 ·射 ·御 ·書 ·數)에서의 ‘예’는 인간적 결실을 얻기 위해 필요한 기초 교양의 씨를 뿌리고 인격의 꽃을 피우는 수단으로 여겼던 만큼 거기에는 인격도야의 의의도 있다고 하겠다.",
|
56 |
+
"diff": [
|
57 |
+
"replace text[115:116] --> decoded_text[115:116] '樂' --> '樂'"
|
58 |
+
],
|
59 |
+
"n_oov_chars": 1,
|
60 |
+
"oov_ratio": 0.004629629629629629,
|
61 |
+
"oov_charset": "[\"樂\"]"
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"text": "입지(立志)를 강조해 자경문, 성학집요, 격몽요결, 학교모범에서 항상 ... 이루어진다. 순언은 율곡이 도덕경 81장 중에서 유교 경전의 내용과 일치하며 ... 사상가 연구 - 율곡 이이 수강 과목 : 담당 교수 : 교수님 제출",
|
65 |
+
"decoded_text": "입지(立志)를 강조해 자경문, 성학집요, 격몽요결, 학교모범에서 항상 ... 이루어진다. 순언은 율곡이 도덕경 81장 중에서 유교 경전의 내용과 일치하며 ... 사상가 연구 - 율곡 이이 수강 과목 : 담당 교수 : 교수님 제출",
|
66 |
+
"diff": [
|
67 |
+
"replace text[3:4] --> decoded_text[3:4] '立' --> '立'"
|
68 |
+
],
|
69 |
+
"n_oov_chars": 1,
|
70 |
+
"oov_ratio": 0.007874015748031496,
|
71 |
+
"oov_charset": "[\"立\"]"
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"text": "Ⅰ. 김용과 중국 무협문학 1. 작가 소개 김용(金用)은 1924년 ... 것이다. 그러므로 무협소설 작가 김용(金用)은 언론인이자 정치가인 차량융의 ... 출간하기 시작하였다. 이렇게 보면 무협소설 작가인 김용(金用)은 언론인",
|
75 |
+
"decoded_text": "Ⅰ. 김용과 중국 무협문학 1. 작가 소개 김용(金用)은 1924년 ... 것이다. 그러므로 무협소설 작가 김용(金用)은 언론인이자 정치가인 차량융의 ... 출간하기 시작하였다. 이렇게 보면 무협소설 작가인 김용(金用)은 언론인",
|
76 |
+
"diff": [
|
77 |
+
"replace text[27:28] --> decoded_text[27:28] '金' --> '金'",
|
78 |
+
"replace text[63:64] --> decoded_text[63:64] '金' --> '金'",
|
79 |
+
"replace text[119:120] --> decoded_text[119:120] '金' --> '金'"
|
80 |
+
],
|
81 |
+
"n_oov_chars": 3,
|
82 |
+
"oov_ratio": 0.023622047244094488,
|
83 |
+
"oov_charset": "[\"金\"]"
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"text": "3 이 때 상제님께서 미처 말씀을 마치지 아니하셨는데 면장 양 모(梁某)와 이장이 세금을 받으러 오거늘",
|
87 |
+
"decoded_text": "3 이 때 상제님께서 미처 말씀을 마치지 아니하셨는데 면장 양 모(梁某)와 이장이 세금을 받으러 오거늘",
|
88 |
+
"diff": [
|
89 |
+
"replace text[37:38] --> decoded_text[37:38] '梁' --> '梁'"
|
90 |
+
],
|
91 |
+
"n_oov_chars": 1,
|
92 |
+
"oov_ratio": 0.017543859649122806,
|
93 |
+
"oov_charset": "[\"梁\"]"
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"text": "2 26일 새벽이 되자 백낙두(白樂斗)를 비롯하여 무장한 순검 수십 명이 공신의 집을 에워싸고 형렬과 자현 등 여러 사람을 결박한 뒤에 상제님의 처소를 묻거늘",
|
97 |
+
"decoded_text": "2 26일 새벽이 되자 백낙두(白樂斗)를 비롯하여 무장한 순검 수십 명이 공신의 집을 에워싸고 형렬과 자현 등 여러 사람을 결박한 뒤에 상제님의 처소를 묻거늘",
|
98 |
+
"diff": [
|
99 |
+
"replace text[18:19] --> decoded_text[18:19] '樂' --> '樂'"
|
100 |
+
],
|
101 |
+
"n_oov_chars": 1,
|
102 |
+
"oov_ratio": 0.011363636363636364,
|
103 |
+
"oov_charset": "[\"樂\"]"
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"text": "상제님이 진주공사에서 원래는 33명에게 새 세상 일꾼 기운을 붙여서 공사를 보시려고 했는데 12명이 변심을 해서 그 자리에서 빼 버렸다. 그래서 신경수 성도 집과 문공신 성도 집에 있던 21명의 성도들이 잡혀서 왔다. 지금 여기 다내(月乃)가 상제님이 수명소 공사 주인 신경수 성도 집에서 일본 경찰들한테 체포돼서 고부경찰서까지 잡혀 가는 40리 길의 딱 중간이야. 상제님이 오신 길이 그러면 어디냐? 정토칠봉의 끝자리 수금리水金里에서 배를 타고 정읍천을 건너서 이쪽 용두龍頭마을 방향으로 오신 거다. 여기 삼거리에 주막이 있었다. 여기서 상제님이 21명의 성도들과 순검들에게 “너희도 배가 고플 테니까 가져온 음식과 고기를 여기서 배불리 먹고 가자.” 하셨다. 그래서 상제님이 여기서 술 한 잔을 드시고 나서 갑자기 일어나셔서 외치셨다.",
|
107 |
+
"decoded_text": "상제님이 진주공사에서 원래는 33명에게 새 세상 일꾼 기운을 붙여서 공사를 보시려고 했는데 12명이 변심을 해서 그 자리에서 빼 버렸다. 그래서 신경수 성도 집과 문공신 성도 집에 있던 21명의 성도들이 잡혀서 왔다. 지금 여기 다내(月乃)가 상제님이 수명소 공사 주인 신경수 성도 집에서 일본 경찰들한테 체포돼서 고부경찰서까지 잡혀 가는 40리 길의 딱 중간이야. 상제님이 오신 길이 그러면 어디냐? 정토칠봉의 끝자리 수금리水金里에서 배를 타고 정읍천을 건너서 이쪽 용두龍頭마을 방향으로 오신 거다. 여기 삼거리에 주막이 있었다. 여기서 상제님이 21명의 성도들과 순검들에게 “너희도 배가 고플 테니까 가져온 음식과 고기를 여기서 배불리 먹고 가자.” 하셨다. 그래서 상제님이 여기서 술 한 잔을 드시고 나서 갑자기 일어나셔서 외치셨다.",
|
108 |
+
"diff": [
|
109 |
+
"replace text[239:240] --> decoded_text[239:240] '金' --> '金'"
|
110 |
+
],
|
111 |
+
"n_oov_chars": 1,
|
112 |
+
"oov_ratio": 0.002421307506053269,
|
113 |
+
"oov_charset": "[\"金\"]"
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"text": "이광수의 조혼을 다룬 희곡 <규한>보다는 진일보를 보인 작품. 3)金祐鎭 ... ) 그러나, 희곡이 무대상연을 전제로 하는 문학이라면, <不孝天罰 ... 의미로서의 창작 희곡은 아님. -대부분의 작품이 일본 신파 연극을 번역",
|
117 |
+
"decoded_text": "이광수의 조혼을 다룬 희곡 <규한>보다는 진일보를 보인 작품. 3)金祐鎭 ... ) 그러나, 희곡이 무대상연을 전제로 하는 문학이라면, <不孝天罰 ... 의미로서의 창작 희곡은 아님. -대부분의 작품이 일본 신파 연극을 번역",
|
118 |
+
"diff": [
|
119 |
+
"replace text[77:78] --> decoded_text[77:78] '不' --> '不'"
|
120 |
+
],
|
121 |
+
"n_oov_chars": 1,
|
122 |
+
"oov_ratio": 0.008,
|
123 |
+
"oov_charset": "[\"不\"]"
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"text": "{{ 중국의 고리대금업 { 중국(中國)의 고리대금업(高利貸金業) 차 ... 아니라, 국가에서 대출을 해주는 시스템도 존재하지 않았다. 고리대금업은 전통 ... 례 서론 본론 상인(商人)의 유래 대금업의 탄생 - 위진남북조 시대의",
|
127 |
+
"decoded_text": "{{ 중국의 고리대금업 { 중국(中國)의 고리대금업(高利貸金業) 차 ... 아니라, 국가에서 대출을 해주는 시스템도 존재하지 않았다. 고리대금업은 전통 ... 례 서론 본론 상인(商人)의 유래 대금업의 탄생 - 위진남북조 시대의",
|
128 |
+
"diff": [
|
129 |
+
"replace text[32:33] --> decoded_text[32:33] '金' --> '金'"
|
130 |
+
],
|
131 |
+
"n_oov_chars": 1,
|
132 |
+
"oov_ratio": 0.007874015748031496,
|
133 |
+
"oov_charset": "[\"金\"]"
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"text": "..PAGE:1 종교 속의 성차별 ..PAGE:2 목 차 카톨릭 속의 ... 성차별 토론 ..PAGE:3 대부분 종교가 사제직 '여성 不可' 한국천주교 ... 대한 부정적 시각 ..PAGE:4 카톨릭 속의 성차별 사제는 남성이어야",
|
137 |
+
"decoded_text": "..PAGE:1 종교 속의 성차별 ..PAGE:2 목 차 카톨릭 속의 ... 성차별 토론 ..PAGE:3 대부분 종교가 사제직 '여성 不可' 한국천주교 ... 대한 부정적 시각 ..PAGE:4 카톨릭 속의 성차별 사제는 남성이어야",
|
138 |
+
"diff": [
|
139 |
+
"replace text[75:76] --> decoded_text[75:76] '不' --> '不'"
|
140 |
+
],
|
141 |
+
"n_oov_chars": 1,
|
142 |
+
"oov_ratio": 0.0078125,
|
143 |
+
"oov_charset": "[\"不\"]"
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"text": "不壞)의 몸이기 때문이다. 거기에 음독금련사는 더 했다. 그의 입에서 나오는 액은 모든 것을 녹아버리",
|
147 |
+
"decoded_text": "不壞)의 몸이기 때문이다. 거기에 음독금련사는 더 했다. 그의 입에서 나오는 액은 모든 것을 녹아버리",
|
148 |
+
"diff": [
|
149 |
+
"replace text[0:1] --> decoded_text[0:1] '不' --> '不'"
|
150 |
+
],
|
151 |
+
"n_oov_chars": 1,
|
152 |
+
"oov_ratio": 0.017857142857142856,
|
153 |
+
"oov_charset": "[\"不\"]"
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"text": "백리무극 옆에 서 있는 인물은 바로 그의 첫제자 용구찬(龍九燦)이 서 있었다. 그는 천황의 지시하는 일",
|
157 |
+
"decoded_text": "백리무극 옆에 서 있는 인물은 바로 그의 첫제자 용구찬(龍九燦)이 서 있���다. 그는 천황의 지시하는 일",
|
158 |
+
"diff": [
|
159 |
+
"replace text[31:32] --> decoded_text[31:32] '龍' --> '龍'"
|
160 |
+
],
|
161 |
+
"n_oov_chars": 1,
|
162 |
+
"oov_ratio": 0.017543859649122806,
|
163 |
+
"oov_charset": "[\"龍\"]"
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"text": "면 정액이 고갈(枯渴)되어 죽고 말았지만 지금 북리천과 유나영은 용(龍)의 내단을 먹어 삼일까지 이렇",
|
167 |
+
"decoded_text": "면 정액이 고갈(枯渴)되어 죽고 말았지만 지금 북리천과 유나영은 용(龍)의 내단을 먹어 삼일까지 이렇",
|
168 |
+
"diff": [
|
169 |
+
"replace text[38:39] --> decoded_text[38:39] '龍' --> '龍'"
|
170 |
+
],
|
171 |
+
"n_oov_chars": 1,
|
172 |
+
"oov_ratio": 0.017857142857142856,
|
173 |
+
"oov_charset": "[\"龍\"]"
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"text": "라 영약(靈藥)뿐이였다.",
|
177 |
+
"decoded_text": "라 영약(靈藥)뿐이였다.",
|
178 |
+
"diff": [
|
179 |
+
"replace text[5:6] --> decoded_text[5:6] '靈' --> '靈'"
|
180 |
+
],
|
181 |
+
"n_oov_chars": 1,
|
182 |
+
"oov_ratio": 0.07692307692307693,
|
183 |
+
"oov_charset": "[\"靈\"]"
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"text": "5 '선릉역 사건' 가해女, 피해女 만남 전 흉기 챙긴 이유는? 친구 데려온다는 말에",
|
187 |
+
"decoded_text": "5 '선릉역 사건' 가해女, 피해女 만남 전 흉기 챙긴 이유는? 친구 데려온다는 말에",
|
188 |
+
"diff": [
|
189 |
+
"replace text[13:14] --> decoded_text[13:14] '女' --> '女'",
|
190 |
+
"replace text[18:19] --> decoded_text[18:19] '女' --> '女'"
|
191 |
+
],
|
192 |
+
"n_oov_chars": 2,
|
193 |
+
"oov_ratio": 0.0425531914893617,
|
194 |
+
"oov_charset": "[\"女\"]"
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"text": "알프레드 노스 화이트헤드(Alfred North Whitehead)는 20세기의 대표적인 철학자 가운데 한 사람이다. 영국 케임브리지의 트리니티 칼리지에서 수학을 전공하였고, 그 후에 동 대학의 특별연구원(Fellow)과 수석 강사(1885~1911), 런던대학의 임페리얼 칼리지 응용수학교수(1914~1924), 그리고 미국 하버드대학 철학교수(1924~1937)를 역임했다. 수학자였지만 고전에도 정통했으며, 새로운 물리학의 의미를 정확히 인식하고 있었을 뿐만 아니라 전통적인 철학을 오랫동안 깊이 연구해 왔다. 수제자 버트런드 러셀과의 공저 『수학 원리』(전 3권, 1910~1913)와 같은 수리논리학 분야에서 획기적인 업적을 남긴 수학자, 논리학자로서도 높이 평가된다. 또 한편으로는 특히 아인슈타인의 상대성 원리 등 현대 자연과학의 발전을 계기로, 현대 과학설을 철학에 도입시켜 철학 사상사에 새로운 국면을 전개한 과학철학자 그리고 “유기체 철학”(philosophy of organism)의 철학자로서도 높이 평가된다. 화이트헤드는 신중한 사람이었다. “진리를 그 가장 깊은 뿌리에서 부터 탐구”(본문 제2장 중에서) 하는 작업을 평생 멈추지 않았던 사상가였으며, 오랫동안 수학의 전문가였다. 그의 최초의 철학적 저작인 『과학과 근대세계』(1925)는 그가 63세 때, 대표작 『과정과 실재』(1929)는 68세 때에, 그로부터 4년 후에는 『관념의 모험』(1933)이 출간되었다. 그 당시 사람들은 사멸된 것으로 알았던 형이상학이 우주에 관한 상상적 사유라는 형태로 당당하게 부활하는 데 놀랐다. 그의 형이상학 체계는 사물의 유동(流動)을 둘러싸고 전개되는 체계라는 형태의 우주론으로서, 어디까지나 개방된 체계였다. 형이상학을 싫어했던 존 듀이도 화이트헤드의 “유기체 철학”에 대하여 “철학에의 혁명적 공헌” 이라는 찬사를 보냈으며, 영국의 시인이자 문학평론가·철학자였던 허버트 리드는 화이트헤드를 “20세기의 데카르트”라 평하기도 했다. 현대 프랑스의 포스트모더니즘 철학의 기수로 불리는 질 들뢰즈 같은 이는 화이트헤드를 가리켜 “영미권의 마지막 위대한 철학자”로 평하였다. 지은 책으로는 『화이트헤드의 수학이란 무엇인가』『교육의 목적』『화이트헤드와의 대화』등이 있다.",
|
198 |
+
"decoded_text": "알프레드 노스 화이트헤드(Alfred North Whitehead)는 20세기의 대표적인 철학자 가운데 한 사람이다. 영국 케임브리지의 트리니티 칼리지에서 수학을 전공하였고, 그 후에 동 대학의 특별연구원(Fellow)과 수석 강사(1885~1911), 런던대학의 임페리얼 칼리지 응용수학교수(1914~1924), 그리고 미국 하버드대학 철학교수(1924~1937)를 역임했다. 수학자였지만 고전에도 정통했으며, 새로운 물리학의 의미를 정확히 인식하고 있었을 뿐만 아니라 전통적인 철학을 오랫동안 깊이 연구해 왔다. 수제자 버트런드 러셀과의 공저 『수학 원리』(전 3권, 1910~1913)와 같은 수리논리학 분야에서 획기적인 업적을 남긴 수학자, 논리학자로서도 높이 평가된다. 또 한편으로는 특히 아인슈타인의 상���성 원리 등 현대 자연과학의 발전을 계기로, 현대 과학설을 철학에 도입시켜 철학 사상사에 새로운 국면을 전개한 과학철학자 그리고 “유기체 철학”(philosophy of organism)의 철학자로서도 높이 평가된다. 화이트헤드는 신중한 사람이었다. “진리를 그 가장 깊은 뿌리에서 부터 탐구”(본문 제2장 중에서) 하는 작업을 평생 멈추지 않았던 사상가였으며, 오랫동안 수학의 전문가였다. 그의 최초의 철학적 저작인 『과학과 근대세계』(1925)는 그가 63세 때, 대표작 『과정과 실재』(1929)는 68세 때에, 그로부터 4년 후에는 『관념의 모험』(1933)이 출간되었다. 그 당시 사람들은 사멸된 것으로 알았던 형이상학이 우주에 관한 상상적 사유라는 형태로 당당하게 부활하는 데 놀랐다. 그의 형이상학 체계는 사물의 유동(流動)을 둘러싸고 전개되는 체계라는 형태의 우주론으로서, 어디까지나 개방된 체계였다. 형이상학을 싫어했던 존 듀이도 화이트헤드의 “유기체 철학”에 대하여 “철학에의 혁명적 공헌” 이라는 찬사를 보냈으며, 영국의 시인이자 문학평론가·철학자였던 허버트 리드는 화이트헤드를 “20세기의 데카르트”라 평하기도 했다. 현대 프랑스의 포스트모더니즘 철학의 기수로 불리는 질 들뢰즈 같은 이는 화이트헤드를 가리켜 “영미권의 마지막 위대한 철학자”로 평하였다. 지은 책으로는 『화이트헤드의 수학이란 무엇인가』『교육의 목적』『화이트헤드와의 대화』등이 있다.",
|
199 |
+
"diff": [
|
200 |
+
"replace text[809:810] --> decoded_text[809:810] '流' --> '流'"
|
201 |
+
],
|
202 |
+
"n_oov_chars": 1,
|
203 |
+
"oov_ratio": 0.0009000900090009,
|
204 |
+
"oov_charset": "[\"流\"]"
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"text": "<부가설명> 오늘 청지기의 표지(標識, 표식은 잘못된 읽기이다.) 열거한, 신실함, 신뢰할 만하다는 말은 전부 헬라어 ‘피스티스, 피스토스, 피스튜오’에서 나온 말이다. 깨끗한 양심과 순종만이 다른 단어이다. 청지기는 믿을 수 있는 인격이어야 한다는 말이다. 그런 사람은 깨끗한 양심으로 하나님의 말씀을 수행하는 지체들이 된다는 것을 나타낸다. 순종이라는 단어는 ‘휘포쿠오’ ‘휘포탓소’등인데 ‘휘포’라는 말은 ‘…아래’라는 뜻이다. 순종은 누군가가 순종하는 사람 위에 있다는 것을 나타낸다. 위에서 시키는 사람의 말을 듣는 것이 ‘휘포쿠오’이고 위에서 시키는 대로 정돈하는 것이 ‘휘포탓소’이다. 청지기는 위에 주인이 계신다. 주인이 시키는 말을 잘 듣고 시키는 말대로 하는 것이고, 시킨 대로 정돈하는 것이다. 그리스도인 청지기는 예수께서 주인이시기 때문에 예수님의 말씀을 잘 듣고 그대로 수행해야 하고 그대로 정리해야 한다. 그렇게 할 때 주인의 신임을 얻고 하늘나라를 상속받게 된다. 하나님이 주신 것을 하나님의 뜻대로 관리하지 못할 것을 아시면 하나님께서 관리할 것을 맡기겠는가?",
|
208 |
+
"decoded_text": "<부가설명> 오늘 청지기의 표지(標識, 표식은 잘못된 읽기이다.) 열거한, 신실함, 신뢰할 만하다는 말은 전부 헬라어 ‘피스티스, 피스토스, 피스튜오’에서 나온 말이다. 깨끗한 양심과 순종만이 다른 단어이다. 청지기는 믿을 수 있는 인격이어야 한다는 말이다. 그런 사람은 깨끗한 양심으로 하나님의 말씀을 수행하는 지체들이 된다는 것을 나타낸다. 순종이라는 단어는 ‘휘포쿠오’ ‘휘포탓소’등인데 ‘휘포’라는 말은 ‘…아래’라는 뜻이다. 순종은 누군가가 순종하는 사람 위에 있다는 것을 나타낸다. 위에서 시키는 사람의 말을 듣는 것이 ‘휘포쿠오’이고 위에서 시키는 대로 정돈하는 것이 ‘휘포탓소’이다. 청지기는 위에 주인이 계신다. 주인이 시키는 말을 잘 듣고 시키는 말대로 하는 것이고, 시킨 대로 정돈하는 것이다. 그리스도인 청지기는 예수께서 주인이시기 때문에 예수님의 말씀을 잘 듣고 그대로 수행해야 하고 그대로 정리해야 한다. 그렇게 할 때 주인의 신임을 얻고 하늘나라를 상속받게 된다. 하나님이 주신 것을 하나님의 뜻대로 관리하지 못할 것을 아시면 하나님께서 관리할 것을 맡기겠는가?",
|
209 |
+
"diff": [
|
210 |
+
"replace text[19:20] --> decoded_text[19:20] '識' --> '識'"
|
211 |
+
],
|
212 |
+
"n_oov_chars": 1,
|
213 |
+
"oov_ratio": 0.0018083182640144665,
|
214 |
+
"oov_charset": "[\"識\"]"
|
215 |
+
}
|
216 |
+
]
|
stats/compression_rate/Qwen.Qwen3-Embedding-0.6B @ cc100.ar.diff.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
stats/compression_rate/Qwen.Qwen3-Embedding-0.6B @ cc100.de.diff.json
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "Erstmals erreichte eine Raumsonde die Ceres. Mit den beiden Kameras an Bord erkunden die Wissenschaftler aus dem Max-Planck-Institut für Sonnensystemforschung in Göttingen die dunkle Oberfläche des Zwergplaneten. Wassereis haben sie schon entdeckt. Aber ruht tief unter den Kratern auch noch ein Ozean?",
|
4 |
+
"decoded_text": "Erstmals erreichte eine Raumsonde die Ceres. Mit den beiden Kameras an Bord erkunden die Wissenschaftler aus dem Max-Planck-Institut für Sonnensystemforschung in Göttingen die dunkle Oberfläche des Zwergplaneten. Wassereis haben sie schon entdeckt. Aber ruht tief unter den Kratern auch noch ein Ozean?",
|
5 |
+
"diff": [
|
6 |
+
"replace text[134:136] --> decoded_text[134:135] 'ü' --> 'ü'"
|
7 |
+
],
|
8 |
+
"n_oov_chars": 0,
|
9 |
+
"oov_ratio": 0.0,
|
10 |
+
"oov_charset": "[]"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"text": "Der Vergleich mit der bemannten Mondlandung mag ein wenig übertrieben erscheinen, doch zweifellos gehört Rosetta zu den kühnsten Unternehmen der Raumfahrt: Zum ersten Mal in der Geschichte begleitet eine Sonde einen Kometen auf seiner Bahn um die Sonne und soll Mitte November den Lander Philae auf dessen Oberfläche absetzen. Bei der Auswertung der Bilder und Daten von 67P/Churyumov-Gerasimenko, so der Name des Schweifsterns, sitzen Wissenschaftler des Göttinger Max-Planck-Instituts für Sonnensystemforschung in der ersten Reihe.",
|
14 |
+
"decoded_text": "Der Vergleich mit der bemannten Mondlandung mag ein wenig übertrieben erscheinen, doch zweifellos gehört Rosetta zu den kühnsten Unternehmen der Raumfahrt: Zum ersten Mal in der Geschichte begleitet eine Sonde einen Kometen auf seiner Bahn um die Sonne und soll Mitte November den Lander Philae auf dessen Oberfläche absetzen. Bei der Auswertung der Bilder und Daten von 67P/Churyumov-Gerasimenko, so der Name des Schweifsterns, sitzen Wissenschaftler des Göttinger Max-Planck-Instituts für Sonnensystemforschung in der ersten Reihe.",
|
15 |
+
"diff": [
|
16 |
+
"replace text[488:490] --> decoded_text[488:489] 'ü' --> 'ü'"
|
17 |
+
],
|
18 |
+
"n_oov_chars": 0,
|
19 |
+
"oov_ratio": 0.0,
|
20 |
+
"oov_charset": "[]"
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"text": "Die Sonne ist der wichtigste Energiespender der Erde und Motor des Klimas. Doch sie schickt mal mehr, mal weniger Licht zur Erde. Astronomen um Natalie Krivova erfassen am Max-Planck-Institut für Sonnensystemforschung in Göttingen diese Schwankungen der Sonnenstrahlung in Modellen, um herauszufinden, ob die Veränderungen zur Erderwärmung beitragen oder ob sie ihr entgegenwirken.",
|
24 |
+
"decoded_text": "Die Sonne ist der wichtigste Energiespender der Erde und Motor des Klimas. Doch sie schickt mal mehr, mal weniger Licht zur Erde. Astronomen um Natalie Krivova erfassen am Max-Planck-Institut für Sonnensystemforschung in Göttingen diese Schwankungen der Sonnenstrahlung in Modellen, um herauszufinden, ob die Veränderungen zur Erderwärmung beitragen oder ob sie ihr entgegenwirken.",
|
25 |
+
"diff": [
|
26 |
+
"replace text[193:195] --> decoded_text[193:194] 'ü' --> 'ü'"
|
27 |
+
],
|
28 |
+
"n_oov_chars": 0,
|
29 |
+
"oov_ratio": 0.0,
|
30 |
+
"oov_charset": "[]"
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"text": "Zum Glückwunsch haben Sie in den nächsten Wochen eine doppelte Gelegenheit:",
|
34 |
+
"decoded_text": "Zum Glückwunsch haben Sie in den nächsten Wochen eine doppelte Gelegenheit:",
|
35 |
+
"diff": [
|
36 |
+
"replace text[6:8] --> decoded_text[6:7] 'ü' --> 'ü'",
|
37 |
+
"replace text[35:37] --> decoded_text[34:35] 'ä' --> 'ä'"
|
38 |
+
],
|
39 |
+
"n_oov_chars": 0,
|
40 |
+
"oov_ratio": 0.0,
|
41 |
+
"oov_charset": "[]"
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"text": "Zum einen haben wir im März einen neuen Kirchenvorstand gewählt: Jüngere und Ältere, Erfahrene und Neue mischen sich zu einer Gruppe, die die Verantwortung für die Gemeinde übernimmt. Ich bitte Sie und Euch alle, allen Kandidat*nnen zu danken, sie auf der Straße anzusprechen und zu beglückwünschen: Denn es ist nicht selbstverständlich, für ein Amt von sechs Jahren Dauer zu kandidieren.",
|
45 |
+
"decoded_text": "Zum einen haben wir im März einen neuen Kirchenvorstand gewählt: Jüngere und Ältere, Erfahrene und Neue mischen sich zu einer Gruppe, die die Verantwortung für die Gemeinde übernimmt. Ich bitte Sie und Euch alle, allen Kandidat*nnen zu danken, sie auf der Straße anzusprechen und zu beglückwünschen: Denn es ist nicht selbstverständlich, für ein Amt von sechs Jahren Dauer zu kandidieren.",
|
46 |
+
"diff": [
|
47 |
+
"replace text[24:26] --> decoded_text[24:25] 'ä' --> 'ä'",
|
48 |
+
"replace text[61:63] --> decoded_text[60:61] 'ä' --> 'ä'",
|
49 |
+
"replace text[69:71] --> decoded_text[67:68] 'ü' --> 'ü'",
|
50 |
+
"replace text[81:83] --> decoded_text[78:79] 'Ä' --> 'Ä'",
|
51 |
+
"replace text[162:164] --> decoded_text[158:159] 'ü' --> 'ü'",
|
52 |
+
"replace text[180:182] --> decoded_text[175:176] 'ü' --> 'ü'",
|
53 |
+
"replace text[295:297] --> decoded_text[289:290] 'ü' --> 'ü'",
|
54 |
+
"replace text[301:303] --> decoded_text[294:295] 'ü' --> 'ü'",
|
55 |
+
"replace text[340:342] --> decoded_text[332:333] 'ä' --> 'ä'",
|
56 |
+
"replace text[352:354] --> decoded_text[343:344] 'ü' --> 'ü'"
|
57 |
+
],
|
58 |
+
"n_oov_chars": 0,
|
59 |
+
"oov_ratio": 0.0,
|
60 |
+
"oov_charset": "[]"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"text": "Darüber freuen wir uns. Und wenn Sie je manden von denen kennen, freuen die sich, wenn Sie sie ansprechen und darin bestärken.",
|
64 |
+
"decoded_text": "Darüber freuen wir uns. Und wenn Sie je manden von denen kennen, freuen die sich, wenn Sie sie ansprechen und darin bestärken.",
|
65 |
+
"diff": [
|
66 |
+
"replace text[3:5] --> decoded_text[3:4] 'ü' --> 'ü'",
|
67 |
+
"replace text[122:124] --> decoded_text[121:122] 'ä' --> 'ä'"
|
68 |
+
],
|
69 |
+
"n_oov_chars": 0,
|
70 |
+
"oov_ratio": 0.0,
|
71 |
+
"oov_charset": "[]"
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"text": "Unsere Kirchengemeinde lebt davon, dass manche eine Aufgabe haben oder eine besondere Zeit erleben – wie den Konfirmandenunterricht. Die anderen aber daran teilhaben und sie unterstützen und ihnen Glück wünschen. Und Sie werden merken, dass es Sie selbst beglücken kann, anderen Glück zu wünschen!",
|
75 |
+
"decoded_text": "Unsere Kirchengemeinde lebt davon, dass manche eine Aufgabe haben oder eine besondere Zeit erleben – wie den Konfirmandenunterricht. Die anderen aber daran teilhaben und sie unterstützen und ihnen Glück wünschen. Und Sie werden merken, dass es Sie selbst beglücken kann, anderen Glück zu wünschen!",
|
76 |
+
"diff": [
|
77 |
+
"replace text[184:186] --> decoded_text[184:185] 'ü' --> 'ü'",
|
78 |
+
"replace text[203:205] --> decoded_text[202:203] 'ü' --> 'ü'",
|
79 |
+
"replace text[209:211] --> decoded_text[207:208] 'ü' --> 'ü'",
|
80 |
+
"replace text[265:267] --> decoded_text[262:263] 'ü' --> 'ü'",
|
81 |
+
"replace text[288:290] --> decoded_text[284:285] 'ü' --> 'ü'",
|
82 |
+
"replace text[297:299] --> decoded_text[292:293] 'ü' --> 'ü'"
|
83 |
+
],
|
84 |
+
"n_oov_chars": 0,
|
85 |
+
"oov_ratio": 0.0,
|
86 |
+
"oov_charset": "[]"
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"text": "‹ Herzlichen Dank für all Ihre Spenden!",
|
90 |
+
"decoded_text": "‹ Herzlichen Dank für all Ihre Spenden!",
|
91 |
+
"diff": [
|
92 |
+
"replace text[19:21] --> decoded_text[19:20] 'ü' --> 'ü'"
|
93 |
+
],
|
94 |
+
"n_oov_chars": 0,
|
95 |
+
"oov_ratio": 0.0,
|
96 |
+
"oov_charset": "[]"
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"text": "Bitte beachte: Du kannst die an uns erteile Einwilligung auch jederzeit widerrufen. Nutze dazu einfach unsere Kontaktmöglichkeiten. Durch den Widerruf wird die Rechtmäßigkeit der bis dahin erfolgten Verarbeitung nicht berührt.",
|
100 |
+
"decoded_text": "Bitte beachte: Du kannst die an uns erteile Einwilligung auch jederzeit widerrufen. Nutze dazu einfach unsere Kontaktmöglichkeiten. Durch den Widerruf wird die Rechtmäßigkeit der bis dahin erfolgten Verarbeitung nicht berührt.",
|
101 |
+
"diff": [
|
102 |
+
"replace text[166:168] --> decoded_text[166:167] 'ä' --> 'ä'",
|
103 |
+
"replace text[222:224] --> decoded_text[221:222] 'ü' --> 'ü'"
|
104 |
+
],
|
105 |
+
"n_oov_chars": 0,
|
106 |
+
"oov_ratio": 0.0,
|
107 |
+
"oov_charset": "[]"
|
108 |
+
}
|
109 |
+
]
|
stats/compression_rate/Qwen.Qwen3-Embedding-0.6B @ cc100.fa.diff.json
ADDED
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "رئيس حوزه علميه اصفهان گفت: دليل نزول اکثر مصائب رفتار خودِ انسان هاست؛ فقر، بيماري هاي لاعلاج و ناامني هاي اجتماعي، همه و همه نتيجه گناهاني است که اکثر افراد جامعه مرتکب مي شوند. به گزارش رسا، حضرت آيت الله حسين مظاهري در جلسه تفسير قرآن صبح ديروز که در مسجد اميرالمؤمنين(ع) خيابان جي برگزار شد، گفت: هميشه خير و صلاح افراد در بهره برداري بيشتر از دنيا نيست. وي در ادامه تفسير آيه صد و پنجاه و پنجم سوره بقره که مي فرمايد«وَلَنَبْلُوَنَّکُمْ بِشَيْءٍ مِنَ الْخَوْفِ وَالْجُوعِ وَنَقْصٍ مِنَ الأمْوَالِ وَالأنْفُسِ وَالثَّمَرَاتِ وَبَشِّرِ الصَّابِرِينَ» افزود: بلاها و مصائب با اين دنيا عجين شده است و اين بلاها در همه زمينه هاي فردي و اجتماعي جريان دارد و رفتار مردم در برابر اين بلاها و مصائب به دو بخش تقسيم مي شود، عده اي در برابر مصائب جزع و فزع مي کنند و گاهي منکر همه اعتقادات و باورهايشان مي شوند و عده اي در برابر همين بلاها صبر پيشه مي کنند و همه مصائب را از سوي خداوند مي بينند و به فعل خداوند راضي هستند. حضرت آيت الله مظاهري برخي از مصائب را از الطاف خفيه الهي دانست و خاطرنشان کرد: خداوند بر افرادي که در برابر مصائب صبر پيشه",
|
4 |
+
"decoded_text": "رئيس حوزه علميه اصفهان گفت: دليل نزول اکثر مصائب رفتار خودِ انسان هاست؛ فقر، بيماري هاي لاعلاج و ناامني هاي اجتماعي، همه و همه نتيجه گناهاني است که اکثر افراد جامعه مرتکب مي شوند. به گزارش رسا، حضرت آيت الله حسين مظاهري در جلسه تفسير قرآن صبح ديروز که در مسجد اميرالمؤمنين(ع) خيابان جي برگزار شد، گفت: هميشه خير و صلاح افراد در بهره برداري بيشتر از دنيا نيست. وي در ادامه تفسير آيه صد و پنجاه و پنجم سوره بقره که مي فرمايد«وَلَنَبْلُوَنَّکُمْ بِشَيْءٍ مِنَ الْخَوْفِ وَالْجُوعِ وَنَقْصٍ مِنَ الأمْوَالِ وَالأنْفُسِ وَالثَّمَرَاتِ وَبَشِّرِ الصَّابِرِينَ» افزود: بلاها و مصائب با اين دنيا عجين شده است و اين بلاها در همه زمينه هاي فردي و اجتماعي جريان دارد و رفتار مردم در برابر اين بلاها و مصائب به دو بخش تقسيم مي شود، عده اي در برابر مصائب جزع و فزع مي کنند و گاهي منکر همه اعتقادات و باورهايشان مي شوند و عده اي در برابر همين بلاها صبر پيشه مي کنند و همه مصائب را از سوي خداوند مي بينند و به فعل خداوند راضي هستند. حضرت آيت الله مظاهري برخي از مصائب را از الطاف خفيه الهي دانست و خاطرنشان کرد: خداوند بر افرادي که در برابر مصائب صبر پيشه",
|
5 |
+
"diff": [
|
6 |
+
"insert text[436:436] --> decoded_text[436:437] '' --> 'َ'",
|
7 |
+
"delete text[437:438] --> decoded_text[438:438] 'َ' --> ''",
|
8 |
+
"insert text[520:520] --> decoded_text[520:521] '' --> 'َ'",
|
9 |
+
"delete text[521:522] --> decoded_text[522:522] 'َ' --> ''",
|
10 |
+
"insert text[543:543] --> decoded_text[543:544] '' --> 'َ'",
|
11 |
+
"delete text[544:545] --> decoded_text[545:545] 'َ' --> ''"
|
12 |
+
],
|
13 |
+
"n_oov_chars": 0,
|
14 |
+
"oov_ratio": 0.0,
|
15 |
+
"oov_charset": "[]"
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"text": "«رِجَالٌ لا تُلْهِیهِمْ تجَارَةٌ وَ لا بَیْعٌ عَن ذِکْرِ اللهِ وَ إِقَامِ الصَّلَوةِ وَ إِیتَاءِ الزَّکَوةِ ـ مردانی که نه تجارت و نه معاملهای آنان را از یاد خدا و برپاداشتن نماز و ادای زکات غافل نمیکند.»[1]",
|
19 |
+
"decoded_text": "«رِجَالٌ لا تُلْهِیهِمْ تجَارَةٌ وَ لا بَیْعٌ عَن ذِکْرِ اللهِ وَ إِقَامِ الصَّلَوةِ وَ إِیتَاءِ الزَّکَوةِ ـ مردانی که نه تجارت و نه معاملهای آنان را از یاد خدا و برپاداشتن نماز و ادای زکات غافل نمیکند.»[1]",
|
20 |
+
"diff": [
|
21 |
+
"insert text[77:77] --> decoded_text[77:78] '' --> 'َ'",
|
22 |
+
"delete text[78:79] --> decoded_text[79:79] 'َ' --> ''",
|
23 |
+
"insert text[100:100] --> decoded_text[100:101] '' --> 'َ'",
|
24 |
+
"delete text[101:102] --> decoded_text[102:102] 'َ' --> ''"
|
25 |
+
],
|
26 |
+
"n_oov_chars": 0,
|
27 |
+
"oov_ratio": 0.0,
|
28 |
+
"oov_charset": "[]"
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"text": "«وَ إِمَّا یَنْزَغَنَّکَ مِنَ الشَّیْطانِ نَزْغٌ فَاسْتَعِذْ بِاللهِ إِنَّهُ سَمِیعٌ عَلِیمٌ ـ و هر گاه وسوسهای از شیطان به تو رسد، به خدا پناه ببر که او شنوای داناست.»[19]",
|
32 |
+
"decoded_text": "«وَ إِمَّا یَنْزَغَنَّکَ مِنَ الشَّیْطانِ نَزْغٌ فَاسْتَعِذْ بِاللهِ إِنَّهُ سَمِیعٌ عَلِیمٌ ـ و هر گاه وسوسهای از شیطان به تو رسد، به خدا پناه ببر که او شنوای داناست.»[19]",
|
33 |
+
"diff": [
|
34 |
+
"insert text[7:7] --> decoded_text[7:8] '' --> 'َ'",
|
35 |
+
"delete text[8:9] --> decoded_text[9:9] 'َ' --> ''",
|
36 |
+
"insert text[20:20] --> decoded_text[20:21] '' --> 'َ'",
|
37 |
+
"delete text[21:22] --> decoded_text[22:22] 'َ' --> ''",
|
38 |
+
"insert text[33:33] --> decoded_text[33:34] '' --> 'َ'",
|
39 |
+
"delete text[34:35] --> decoded_text[35:35] 'َ' --> ''",
|
40 |
+
"insert text[72:72] --> decoded_text[72:73] '' --> 'َ'",
|
41 |
+
"delete text[73:74] --> decoded_text[74:74] 'َ' --> ''"
|
42 |
+
],
|
43 |
+
"n_oov_chars": 0,
|
44 |
+
"oov_ratio": 0.0,
|
45 |
+
"oov_charset": "[]"
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"text": "«وَ قُلْ رَبِّ أَعُوذُ بِکَ مِنْ هَمَزاتِ الشَّیَاطِینِ* وَ أَعُوذُ بِکَ رَبِّ أَنْ یَحْضُرُونِ ـ بگو ای پروردگار من؛ پناه میبرم به تو از وسوسههای شیاطین* و پناه میبرم به تو ای پروردگار من؛ از اینکه حاضر شوند.»([20]",
|
49 |
+
"decoded_text": "«وَ قُلْ رَبِّ أَعُوذُ بِکَ مِنْ هَمَزاتِ الشَّیَاطِینِ* وَ أَعُوذُ بِکَ رَبِّ أَنْ یَحْضُرُونِ ـ بگو ای پروردگار من؛ پناه میبرم به تو از وسوسههای شیاطین* و پناه میبرم به تو ای پروردگار من؛ از اینکه حاضر شوند.»([20]",
|
50 |
+
"diff": [
|
51 |
+
"insert text[12:12] --> decoded_text[12:13] '' --> 'ِ'",
|
52 |
+
"delete text[13:14] --> decoded_text[14:14] 'ِ' --> ''",
|
53 |
+
"insert text[45:45] --> decoded_text[45:46] '' --> 'َ'",
|
54 |
+
"delete text[46:47] --> decoded_text[47:47] 'َ' --> ''",
|
55 |
+
"insert text[76:76] --> decoded_text[76:77] '' --> 'ِ'",
|
56 |
+
"delete text[77:78] --> decoded_text[78:78] 'ِ' --> ''"
|
57 |
+
],
|
58 |
+
"n_oov_chars": 0,
|
59 |
+
"oov_ratio": 0.0,
|
60 |
+
"oov_charset": "[]"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"text": "امیرالمؤمنین علیهالسلام فرمودهاند: «هنگامی که شیطان یکی از شما را وسوسه نمود، باید به خدا پناه ببرد و بگوید: آمَنْتُ بِاللهِ وَ بِرَسُولِهِ مُخْلِصاً لَهُ الدِّینَ»[23]",
|
64 |
+
"decoded_text": "امیرالمؤمنین علیهالسلام فرمودهاند: «هنگامی که شیطان یکی از شما را وسوسه نمود، باید به خدا پناه ببرد و بگوید: آمَنْتُ بِاللهِ وَ بِرَسُولِهِ مُخْلِصاً لَهُ الدِّینَ»[23]",
|
65 |
+
"diff": [
|
66 |
+
"insert text[161:161] --> decoded_text[161:162] '' --> 'ِ'",
|
67 |
+
"delete text[162:163] --> decoded_text[163:163] 'ِ' --> ''"
|
68 |
+
],
|
69 |
+
"n_oov_chars": 0,
|
70 |
+
"oov_ratio": 0.0,
|
71 |
+
"oov_charset": "[]"
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"text": "حضرت صادق علیهالسلام فرمود: مردی خدمت رسول خدا آمد و گفت: یا رسول الله؛ از وسوسهای که در نماز به من القا میشود شکایت دارم. حتی اینکه نمیدانم چهقدر نماز کردم از زیاده یا کم. حضرت فرمودند: «وقتی داخل نماز شدی، انگشت سبّابهی دست راستت را به ران پای چپت بزن و سپس بگو: بِسْمِ اللهِ وَ بِاللهِ تَوَکَّلْتُ عَلَی اللهِ أَعُوذُ بِاللهِ السَّمِیعِ الْعَلِیمِ مِنَ الشَّیْطَانِ الرَّجِیمِ ـ پس همانا او را دور و از خود منع و طرد کنی.»[24]",
|
75 |
+
"decoded_text": "حضرت صادق علیهالسلام فرمود: مردی خدمت رسول خدا آمد و گفت: یا رسول الله؛ از وسوسهای که در نماز به من القا میشود شکایت دارم. حتی اینکه نمیدانم چهقدر نماز کردم از زیاده یا کم. حضرت فرمودند: «وقتی داخل نماز شدی، انگشت سبّابهی دست راستت را به ران پای چپت بزن و سپس بگو: بِسْمِ اللهِ وَ بِاللهِ تَوَکَّلْتُ عَلَی اللهِ أَعُوذُ بِاللهِ السَّمِیعِ الْعَلِیمِ مِنَ الشَّیْطَانِ الرَّجِیمِ ـ پس همانا او را دور و از خود منع و طرد کنی.»[24]",
|
76 |
+
"diff": [
|
77 |
+
"insert text[301:301] --> decoded_text[301:302] '' --> 'َ'",
|
78 |
+
"delete text[302:303] --> decoded_text[303:303] 'َ' --> ''",
|
79 |
+
"insert text[339:339] --> decoded_text[339:340] '' --> 'َ'",
|
80 |
+
"delete text[340:341] --> decoded_text[341:341] 'َ' --> ''",
|
81 |
+
"insert text[366:366] --> decoded_text[366:367] '' --> 'َ'",
|
82 |
+
"delete text[367:368] --> decoded_text[368:368] 'َ' --> ''",
|
83 |
+
"insert text[379:379] --> decoded_text[379:380] '' --> 'َ'",
|
84 |
+
"delete text[380:381] --> decoded_text[381:381] 'َ' --> ''"
|
85 |
+
],
|
86 |
+
"n_oov_chars": 0,
|
87 |
+
"oov_ratio": 0.0,
|
88 |
+
"oov_charset": "[]"
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"text": "آموزگاران بهترین مردم اند . هرگاه یاد [خدا] کهنه مى شود، تجدیدش مى کنند . به آنان عطا کنید ولى آنها را به مزدورى نگیرید که بر آنان سخت مى آید. کد خبر: ۷۴۸۰۲. تاریخ: ۱۸ اردیبهشت ۱۳۹۴ - ۰۹:۲۱. رسول خدا صلى الله علیه و آله :. المُعَلِّمونَ خَیرُ النّاسِ کُلَّما أخلَقَ الذِّکرُ جَدَّدوهُ، أعطوهُم ولا تَستَأجِروهُم فَتُحرِجوهُم؛. آموزگاران بهترین مردم اند . هرگاه یاد [خدا] کهنه مى شود، تجدیدش. مى کنند . به آنان عطا کنید ولى آنها را به مزدورى نگیرید که بر آنان سخت. مى آید. الفردوس : ۴ / ۱۹۳ / ۶۵۹۷ . علم و حکمت ج 2، ص 626. امام هادی علیه السلام:.",
|
92 |
+
"decoded_text": "آموزگاران بهترین مردم اند . هرگاه یاد [خدا] کهنه مى شود، تجدیدش مى کنند . به آنان عطا کنید ولى آنها را به مزدورى نگیرید که بر آنان سخت مى آید. کد خبر: ۷۴۸۰۲. تاریخ: ۱۸ اردیبهشت ۱۳۹۴ - ۰۹:۲۱. رسول خدا صلى الله علیه و آله :. المُعَلِّمونَ خَیرُ النّاسِ کُلَّما أخلَقَ الذِّکرُ جَدَّدوهُ، أعطوهُم ولا تَستَأجِروهُم فَتُحرِجوهُم؛. آموزگاران بهترین مردم اند . هرگاه یاد [خدا] کهنه مى شود، تجدیدش. مى کنند . به آنان عطا کنید ولى آنها را به مزدورى نگیرید که بر آنان سخت. مى آید. الفردوس : ۴ / ۱۹۳ / ۶۵۹۷ . علم و حکمت ج 2، ص 626. امام هادی علیه السلام:.",
|
93 |
+
"diff": [
|
94 |
+
"insert text[230:230] --> decoded_text[230:231] '' --> 'ِ'",
|
95 |
+
"insert text[231:231] --> decoded_text[232:249] '' --> 'مونَ خَیرُ النّاس'",
|
96 |
+
"replace text[232:246] --> decoded_text[250:255] 'مونَ خَیرُ الن' --> ' کُلَ'",
|
97 |
+
"replace text[247:249] --> decoded_text[256:269] 'اس' --> 'ما أخلَقَ الذ'",
|
98 |
+
"delete text[250:254] --> decoded_text[270:270] ' کُل' --> ''",
|
99 |
+
"replace text[255:269] --> decoded_text[271:279] 'َما أخلَقَ الذ' --> 'کرُ جَدَ'",
|
100 |
+
"delete text[270:280] --> decoded_text[280:280] 'ِکرُ جَدَّ' --> ''"
|
101 |
+
],
|
102 |
+
"n_oov_chars": 0,
|
103 |
+
"oov_ratio": 0.0,
|
104 |
+
"oov_charset": "[]"
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"text": "آن گاه که در روز قیامت برانگیخته شوم، گناهکاران امّت پیامبر اسلام را شفاعت خواهم کرد. کد خبر: ۷۲۳۰۷. تاریخ: ۲۱ فروردین ۱۳۹۴ - ۰۶:۰۰. حضرت فاطمه علیها السلام :. إذا حُشِرتُ یَومَ القِیامَةِ أشفَعُ عُصاةَ اُمَّةِ النَّبِیِّ صلی الله علیه و آله؛. آن گاه که در روز قیامت برانگیخته شوم، گناهکاران امّت پیامبر اسلام را شفاعت خواهم کرد. إحقاق الحقّ، ج 19، ص 129 ؛ آینه یادها ص 172. امام على علیه السلام:. ضادُّوا التَّوانِیَ بِالعَزمِ؛. از راه تصمیم راسخ گرفتن، با سستى نبرد کنید. عیون الحکم والمواعظ: ص ۳۱۰ ح ۵۴۵۴ / میزان الحکمه: ج10 ص134.",
|
108 |
+
"decoded_text": "آن گاه که در روز قیامت برانگیخته شوم، گن��هکاران امّت پیامبر اسلام را شفاعت خواهم کرد. کد خبر: ۷۲۳۰۷. تاریخ: ۲۱ فروردین ۱۳۹۴ - ۰۶:۰۰. حضرت فاطمه علیها السلام :. إذا حُشِرتُ یَومَ القِیامَةِ أشفَعُ عُصاةَ اُمَّةِ النَّبِیِّ صلی الله علیه و آله؛. آن گاه که در روز قیامت برانگیخته شوم، گناهکاران امّت پیامبر اسلام را شفاعت خواهم کرد. إحقاق الحقّ، ج 19، ص 129 ؛ آینه یادها ص 172. امام على علیه السلام:. ضادُّوا التَّوانِیَ بِالعَزمِ؛. از راه تصمیم راسخ گرفتن، با سستى نبرد کنید. عیون الحکم والمواعظ: ص ۳۱۰ ح ۵۴۵۴ / میزان الحکمه: ج10 ص134.",
|
109 |
+
"diff": [
|
110 |
+
"replace text[207:209] --> decoded_text[207:209] 'َّ' --> 'َّ'",
|
111 |
+
"replace text[215:222] --> decoded_text[215:222] 'َّبِیِّ' --> 'َّبِیِّ'",
|
112 |
+
"delete text[402:403] --> decoded_text[402:402] 'ّ' --> ''",
|
113 |
+
"replace text[404:412] --> decoded_text[403:412] 'وا التَّ' --> 'ّوا التَّ'"
|
114 |
+
],
|
115 |
+
"n_oov_chars": 0,
|
116 |
+
"oov_ratio": 0.0,
|
117 |
+
"oov_charset": "[]"
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"text": "آن گاه که وقت خروج قائم مىشود، منادىاى از آسمان ندا مىدهد: «اى مردم ! مدّت حکومت جبّاران بر شما، به پایان رسید و بهترین فرد امّت محمّد، حکومت را به دست گرفته است، پس به مکّه بروید». کد خبر: ۷۱۵۹۷. تاریخ: ۱۲ فروردین ۱۳۹۴ - ۰۶:۰۰. پیامبر صلى الله علیه وآله:. إذا. کانَ عِندَ خُروجِ القائِمِ یُنادی مُنادٍ مِنَ السَّماءِ: أیُّهَا. النّاسُ! قَطَعَ عَنکُم مُدَّةُ الجَبّارینَ ووَلِیَ الأَمرَ خَیرُ اُمَّةِ. مُحَمَّدٍ فَالحَقوا بِمَکَّةَ؛. آن گاه که وقت خروج قائم مىشود، منادىاى از آسمان ندا مىدهد:. «اى مردم ! مدّت حکومت جبّاران بر شما، به",
|
121 |
+
"decoded_text": "آن گاه که وقت خروج قائم مىشود، منادىاى از آسمان ندا مىدهد: «اى مردم ! مدّت حکومت جبّاران بر شما، به پایان رسید و بهترین فرد امّت محمّد، حکومت را به دست گرفته است، پس به مکّه بروید». کد خبر: ۷۱۵۹۷. تاریخ: ۱۲ فروردین ۱۳۹۴ - ۰۶:۰۰. پیامبر صلى الله علیه وآله:. إذا. کانَ عِندَ خُروجِ القائِمِ یُنادی مُنادٍ مِنَ السَّماءِ: أیُّهَا. النّاسُ! قَطَعَ عَنکُم مُدَّةُ الجَبّارینَ ووَلِیَ الأَمرَ خَیرُ اُمَّةِ. مُحَمَّدٍ فَالحَقوا بِمَکَّةَ؛. آن گاه که وقت خروج قائم مىشود، منادىاى از آسمان ندا مىدهد:. «اى مردم ! مدّت حکومت جبّاران بر شما، به",
|
122 |
+
"diff": [
|
123 |
+
"replace text[321:323] --> decoded_text[321:323] 'َّ' --> 'َّ'",
|
124 |
+
"replace text[331:333] --> decoded_text[331:333] 'ُّ' --> 'ُّ'",
|
125 |
+
"replace text[364:366] --> decoded_text[364:366] 'َّ' --> 'َّ'",
|
126 |
+
"replace text[406:408] --> decoded_text[406:408] 'َّ' --> 'َّ'",
|
127 |
+
"replace text[417:419] --> decoded_text[417:419] 'َّ' --> 'َّ'",
|
128 |
+
"replace text[437:439] --> decoded_text[437:439] 'َّ' --> 'َّ'"
|
129 |
+
],
|
130 |
+
"n_oov_chars": 0,
|
131 |
+
"oov_ratio": 0.0,
|
132 |
+
"oov_charset": "[]"
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"text": "آن که سازش و مدارا را ترک کند، ناگوارى به او روى آورَد. کد خبر: ۷۳۵۳۸. تاریخ: ۰۲ اردیبهشت ۱۳۹۴ - ۰۶:۰۰. امام جواد(سلام الله علیه):. مَن هَجَرَ الْمُداراةَ قَاربَهُ المَکرُوهُ؛. آن که سازش و مدارا را ترک کند، ناگوارى به او روى آورَد. بحارالأنوار، ج 68، ص 341. پیامبر خدا(صلی الله علیه و آله):. لا تَخَفْ فِی اللَّهِ لَومَةَ لائمٍ؛. در راه خدا از ملامت و نکوهش ملامتگران نترس. معانى الأخبار، ص 335.",
|
136 |
+
"decoded_text": "آن که سازش و مدارا را ترک کند، ناگوارى به او روى آورَد. کد خبر: ۷۳۵۳۸. تاریخ: ۰۲ اردیبهشت ۱۳۹۴ - ۰۶:۰۰. امام جواد(سلام الله علیه):. مَن هَجَرَ الْمُداراةَ قَاربَهُ المَکرُوهُ؛. آن که سازش و مدارا را ترک کند، ناگوارى به او روى آورَد. بحارالأنوار، ج 68، ص 341. پیامبر خدا(صلی الله علیه و آله):. لا تَخَفْ فِی اللَّهِ لَومَةَ لائمٍ؛. در راه خدا از ملامت و نکوهش ملامتگران نترس. معانى الأخبار، ص 335.",
|
137 |
+
"diff": [
|
138 |
+
"insert text[310:310] --> decoded_text[310:311] '' --> 'َ'",
|
139 |
+
"delete text[311:312] --> decoded_text[312:312] 'َ' --> ''"
|
140 |
+
],
|
141 |
+
"n_oov_chars": 0,
|
142 |
+
"oov_ratio": 0.0,
|
143 |
+
"oov_charset": "[]"
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"text": "آنچه را به تو ربطی ندارد، رها کن و به کارى که رستگارت کند، مشغول شو. کد خبر: ۶۸۸۵۸. تاریخ: ۲۶ بهمن ۱۳۹۳ - ۰۶:۰۰. امام علی علیه السلام:. دَع ما لا یَعنِیکَ، وَ اشتَغِل بِمُهِمِّکَ الَّذی یُنجِیکَ؛. آنچه را به تو ربطی ندارد، رها کن و به کارى که رستگارت کند، مشغول شو. غرر الحکم: ح ۵۱۳۳/ گزیده غررالحکم و دررالکلم، ص52. امام على علیهالسلام :. عَظِّمُوا أقدارَکُم بِالتَّغافُلِ عَنِ الدَّنِیِّ مِنَ الاُْمُورِ ؛. با بى توجهى به امور پست، بر ارزش خود بیفزایید . تحف العقول ، ص 224.",
|
147 |
+
"decoded_text": "آنچه را به تو ربطی ندارد، رها کن و به کارى که رستگارت کند، مشغول شو. کد خبر: ۶۸۸۵۸. تاریخ: ۲۶ بهمن ۱۳۹۳ - ۰۶:۰۰. امام علی علیه السلام:. دَع ما لا یَعنِیکَ، وَ اشتَغِل بِمُهِمِّکَ الَّذی یُنجِیکَ؛. آنچه را به تو ربطی ندارد، رها کن و به کارى که رستگارت کند، مشغول شو. غرر الحکم: ح ۵۱۳۳/ گزیده غررالحکم و دررالکلم، ص52. امام على علیهالسلام :. عَظِّمُوا أقدارَکُم بِالتَّغافُلِ عَنِ الدَّنِیِّ مِنَ الاُْمُورِ ؛. با بى توجهى به امور پست، بر ارزش خود بیفزایید . تحف العقول ، ص 224.",
|
148 |
+
"diff": [
|
149 |
+
"replace text[174:183] --> decoded_text[174:183] 'ِّکَ الَّ' --> 'ِّکَ الَّ'",
|
150 |
+
"replace text[344:346] --> decoded_text[344:346] 'ِّ' --> 'ِّ'",
|
151 |
+
"replace text[366:368] --> decoded_text[366:368] 'َّ' --> 'َّ'",
|
152 |
+
"replace text[383:399] --> decoded_text[383:400] 'َّنِیِّ مِنَ الا' --> 'َّنِیِّ مِنَ الاُ'",
|
153 |
+
"delete text[400:401] --> decoded_text[401:401] 'ُ' --> ''"
|
154 |
+
],
|
155 |
+
"n_oov_chars": 0,
|
156 |
+
"oov_ratio": 0.0,
|
157 |
+
"oov_charset": "[]"
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"text": "اگر به آنچه تو را به آن فرمان میدهیم عمل کنی و از آنچه برحذر میداریم دوری کنی ، از شیعیان مایی و الّا هرگز. کد خبر: ۷۱۵۲۴. تاریخ: ۰۳ فروردین ۱۳۹۴ - ۰۶:۰۰. حضرت فاطمه علیها السلام :. إنْ کُنتَ تَعمَلُ بِما أمَرناکَ و تَنتَهی عَمّا زَجَرناکَ عَنهُ فَأنتَ مِن شیعَتِنا و إلّا فَلا؛. اگر به آنچه تو را به آن فرمان میدهیم عمل کنی و از آنچه برحذر میداریم دوری کنی ، از شیعیان مایی و الّا هرگز. بحار الأنوار ، ج ۶۸ ، ص ۱۵۵ . امام على سلام الله علیه:. تَبارَکَ. اللّهُ الَّذی . . . أنشَأَ السَّحابَ الثِّقالَ ، فَأَهطَلَ دِیَمَها. وعَدَّدَ قِسَمَها",
|
161 |
+
"decoded_text": "اگر به آنچه تو را به آن فرمان میدهیم عمل کنی و از آنچه برحذر میداریم دوری کنی ، از شیعیان مایی و الّا هرگز. کد خبر: ۷۱۵۲۴. تاریخ: ۰۳ فروردین ۱۳۹۴ - ۰۶:۰۰. حضرت فاطمه علیها السلام :. إنْ کُنتَ تَعمَلُ بِما أمَرناکَ و تَنتَهی عَمّا زَجَرناکَ عَنهُ فَأنتَ مِن شیعَتِنا و إلّا فَلا؛. اگر به آنچه تو را به آن فرمان میدهیم عمل کنی و از آنچه برحذر میداریم دوری کنی ، از شیعیان مایی و الّا هرگز. بحار الأنوار ، ج ۶۸ ، ص ۱۵۵ . امام على سلام الله علیه:. تَبارَکَ. اللّهُ الَّذی . . . أنشَأَ السَّحابَ الثِّقالَ ، فَأَهطَلَ دِیَمَها. وعَدَّدَ قِسَمَها",
|
162 |
+
"diff": [
|
163 |
+
"replace text[470:472] --> decoded_text[470:472] 'َّ' --> 'َّ'",
|
164 |
+
"replace text[491:493] --> decoded_text[491:493] 'َّ' --> 'َّ'",
|
165 |
+
"delete text[501:502] --> decoded_text[501:501] 'ّ' --> ''",
|
166 |
+
"insert text[503:503] --> decoded_text[502:503] '' --> 'ّ'",
|
167 |
+
"replace text[534:536] --> decoded_text[534:536] 'َّ' --> 'َّ'"
|
168 |
+
],
|
169 |
+
"n_oov_chars": 0,
|
170 |
+
"oov_ratio": 0.0,
|
171 |
+
"oov_charset": "[]"
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"text": "اگر مىتوانى، همیشه با وضو باش. کد خبر: ۷۱۵۰۰. تاریخ: ۲۹ اسفند ۱۳۹۳ - ۱۲:۰۷. پیامبر اکرم (صلی الله علیه و آله):. إنِ استَطَعتَ أن تَکونَ أبَداً عَلى وُضوءٍ فَافعَل. اگر مىتوانى، همیشه با وضو باش. حکمت نامه پیامبر اعظم(ص): ج9- ص306 - ح 7205. امام باقر (سلام الله علیه) :. الکَسَلُ یُضِرُّ بِالدِّینِ والدُّنیا. تنبلى به دین و دنیا ضرر مىزند. میزان الحکمة: ج10- ص131- ح 17769.",
|
175 |
+
"decoded_text": "اگر مىتوانى، همیشه با وضو باش. کد خبر: ۷۱۵۰۰. تاریخ: ۲۹ اسفند ۱۳۹۳ - ۱۲:۰۷. پیامبر اکرم (صلی الله علیه و آله):. إنِ استَطَعتَ أن تَکونَ أبَداً عَلى وُضوءٍ فَافعَل. اگر مىتوانى، همیشه با وضو باش. حکمت نامه پیامبر اعظم(ص): ج9- ص306 - ح 7205. امام باقر (سلام الله علیه) :. الکَسَلُ یُضِرُّ بِالدِّینِ والدُّنیا. تنبلى به دین و دنیا ضرر مىزند. میزان الحکمة: ج10- ص131- ح 17769.",
|
176 |
+
"diff": [
|
177 |
+
"insert text[288:288] --> decoded_text[288:289] '' --> 'ُ'",
|
178 |
+
"replace text[289:296] --> decoded_text[290:297] 'ُ بِالد' --> ' بِالدِ'",
|
179 |
+
"replace text[297:306] --> decoded_text[298:307] 'ِینِ والد' --> 'ینِ والدُ'",
|
180 |
+
"delete text[307:308] --> decoded_text[308:308] 'ُ' --> ''"
|
181 |
+
],
|
182 |
+
"n_oov_chars": 0,
|
183 |
+
"oov_ratio": 0.0,
|
184 |
+
"oov_charset": "[]"
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"text": "اگر مىتوانید هر روز را نوروز کنید؛ یعنى در راه خدا به یکدیگر هدیه بدهید و با یکدیگر پیوند داشته باشید. کد خبر: ۷۱۵۰۱. تاریخ: ۰۱ فروردین ۱۳۹۴ - ۰۶:۰۰. پیامبر اکرم صلی الله علیه وآله. فَنَیرِزُوا إن قَدَرتُم کُلَّ یَومٍ یَعنی تَهادَوا و تَواصَلُوا فِی اللَّهِ؛. اگر مىتوانید هر روز را نوروز کنید؛ یعنى در راه خدا به یکدیگر هدیه بدهید و با یکدیگر پیوند داشته باشید. دعائم الإسلام: ج 2، ص 326. امام صادق سلام الله علیه. إنَّ یَومَ النَّیروزِ هُوَ الیَومُ الّذى أخَذَ اللَّهُ فیهِ مَواثیقَ العِبادِ أن یَعبُدوهُ. روز نوروز همان روزى است که خداوند از بند",
|
188 |
+
"decoded_text": "اگر مىتوانید هر روز را نوروز کنید؛ یعنى در راه خدا به یکدیگر هدیه بدهید و با یکدیگر پیوند داشته باشید. کد خبر: ۷۱۵۰۱. تاریخ: ۰۱ فروردین ۱۳۹۴ - ۰۶:۰۰. پیامبر اکرم صلی الله علیه وآله. فَنَیرِزُوا إن قَدَرتُم کُلَّ یَومٍ یَعنی تَهادَوا و تَواصَلُوا فِی اللَّهِ؛. اگر مىتوانید هر روز را نوروز کنید؛ یعنى در راه خدا به یکدیگر هدیه بدهید و با یکدیگر پیوند داشته باشید. دعائم الإسلام: ج 2، ص 326. امام صادق سلام الله علیه. إنَّ یَومَ النَّیروزِ هُوَ الیَومُ الّذى أخَذَ اللَّهُ فیهِ مَواثیقَ العِبادِ أن یَعبُدوهُ. روز نوروز همان روزى است که خداوند از بند",
|
189 |
+
"diff": [
|
190 |
+
"insert text[210:210] --> decoded_text[210:211] '' --> 'َ'",
|
191 |
+
"delete text[211:212] --> decoded_text[212:212] 'َ' --> ''",
|
192 |
+
"insert text[254:254] --> decoded_text[254:255] '' --> 'َ'",
|
193 |
+
"delete text[255:256] --> decoded_text[256:256] 'َ' --> ''",
|
194 |
+
"insert text[420:420] --> decoded_text[420:421] '' --> 'َ'",
|
195 |
+
"replace text[421:432] --> decoded_text[422:433] 'َ یَومَ الن' --> ' یَومَ النَ'",
|
196 |
+
"delete text[433:434] --> decoded_text[434:434] 'َ' --> ''",
|
197 |
+
"insert text[468:468] --> decoded_text[468:469] '' --> 'َ'",
|
198 |
+
"delete text[469:470] --> decoded_text[470:470] 'َ' --> ''"
|
199 |
+
],
|
200 |
+
"n_oov_chars": 0,
|
201 |
+
"oov_ratio": 0.0,
|
202 |
+
"oov_charset": "[]"
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"text": "امام باقر علیه السلام:. أحسِن؛ فَإِنّى لَم أرَ شَیئاً قَطُّ أشَدَّ طَلَباً ولا أسرَعَ دَرکاً مِن حَسَنَةٍ مُحدَثَةٍ لِذَنبٍ قَدیمٍ؛. نیکى کن؛ که بهراستى هرگز چیزى مانند کار نیکِ جدید را ندیدهام. که اینچنین، به تعقیب یک گناه قدیم برآید و با سرعت، خود را به آن برساند. [و آن را محو سازد]. علل الشرائع: ص ۵۹۹ ح ۴۹ / حکمتنامه حضرت عبدالعظیم الحسنی علیه السلام، ص175. امام صادق سلام الله علیه :. کَثرَةُ النَّومِ مَذهَبَةٌ للدِّینِ والدُّنیا؛. پرخوابى، دین و دنیا را از بین مىبرد. کافی : ج 5، ص 84، ح 1 / میزان الحکمة: ج 12 ، ص 493.",
|
206 |
+
"decoded_text": "امام باقر علیه السلام:. أحسِن؛ فَإِنّى لَم أرَ شَیئاً قَطُّ أشَدَّ طَلَباً ولا أسرَعَ دَرکاً مِن حَسَنَةٍ مُحدَثَةٍ لِذَنبٍ قَدیمٍ؛. نیکى کن؛ که بهراستى هرگز چیزى مانند کار نیکِ جدید را ندیدهام. که اینچنین، به تعقیب یک گناه قدیم برآید و با سرعت، خود را به آن برساند. [و آن را محو سازد]. علل الشرائع: ص ۵۹۹ ح ۴۹ / حکمتنامه حضرت عبدالعظیم الحسنی علیه السلام، ص175. امام صادق سلام الله علیه :. کَثرَةُ النَّومِ مَذهَبَةٌ للدِّینِ والدُّنیا؛. پرخوابى، دین و دنیا را از بین مىبرد. کافی : ج 5، ص 84، ح 1 / میزان الحکمة: ج 12 ، ص 493.",
|
207 |
+
"diff": [
|
208 |
+
"insert text[58:58] --> decoded_text[58:59] '' --> 'ُ'",
|
209 |
+
"delete text[59:60] --> decoded_text[60:60] 'ُ' --> ''",
|
210 |
+
"insert text[65:65] --> decoded_text[65:66] '' --> 'َ'",
|
211 |
+
"delete text[66:67] --> decoded_text[67:67] 'َ' --> ''",
|
212 |
+
"insert text[408:408] --> decoded_text[408:409] '' --> 'َ'",
|
213 |
+
"delete text[409:410] --> decoded_text[410:410] 'َ' --> ''",
|
214 |
+
"insert text[427:427] --> decoded_text[427:428] '' --> 'ِ'",
|
215 |
+
"replace text[428:437] --> decoded_text[429:438] 'ِینِ والد' --> 'ینِ والدُ'",
|
216 |
+
"delete text[438:439] --> decoded_text[439:439] 'ُ' --> ''"
|
217 |
+
],
|
218 |
+
"n_oov_chars": 0,
|
219 |
+
"oov_ratio": 0.0,
|
220 |
+
"oov_charset": "[]"
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"text": "امام باقر علیه السلام:. أقرَبُ ما یَکونُ العَبدُ مِنَ اللَّهِ إذا کانَ فِی الصَّلاةِ؛. نزدیکترین حالت بنده به خدا، هنگامى است که در نماز است. دعائم الإسلام: ج ۱ ص ۱۳۴/ شناختنامه نماز: ج1 ص204. پیامبر اکرم صلی الله علیه و آله:. فإنَّ خِیارَکُم خِیارُکُم لِأهلِهِ. براستى بهترین شما کسى است که براى خانواده اش بهتر باشد . بحار الأنوار :ج 5 ، ص 268 ، ح79 / میزان الحکمة : ج 5 ، ص 101.",
|
224 |
+
"decoded_text": "امام باقر علیه السلام:. أقرَبُ ما یَکونُ العَبدُ مِنَ اللَّهِ إذا کانَ فِی الصَّلاةِ؛. نزدیکترین حالت بنده به خدا، هنگامى است که در نماز است. دعائم الإسلام: ج ۱ ص ۱۳۴/ شناختنامه نماز: ج1 ص204. پیامبر اکرم صلی الله علیه و آله:. فإنَّ خِیارَکُم خِیارُکُم لِأهلِهِ. براستى بهترین شما کسى است که براى خانواده اش بهتر باشد . بحار الأنوار :ج 5 ، ص 268 ، ح79 / میزان الحکمة : ج 5 ، ص 101.",
|
225 |
+
"diff": [
|
226 |
+
"insert text[57:57] --> decoded_text[57:58] '' --> 'َ'",
|
227 |
+
"delete text[58:59] --> decoded_text[59:59] 'َ' --> ''",
|
228 |
+
"insert text[78:78] --> decoded_text[78:79] '' --> 'َ'",
|
229 |
+
"delete text[79:80] --> decoded_text[80:80] 'َ' --> ''",
|
230 |
+
"insert text[232:232] --> decoded_text[232:233] '' --> 'َ'",
|
231 |
+
"delete text[233:234] --> decoded_text[234:234] 'َ' --> ''"
|
232 |
+
],
|
233 |
+
"n_oov_chars": 0,
|
234 |
+
"oov_ratio": 0.0,
|
235 |
+
"oov_charset": "[]"
|
236 |
+
},
|
237 |
+
{
|
238 |
+
"text": "قالیباف افزود: یا حدیث معروفِ «مَن اَصبَحَ وَ لَم یَهتَمَّ بِاُمورِ المُسلِمینَ فَلَیسَ بِمُسلِم» و نمونه های مشابه آن، جزو بیّنات اسلام است؛ یعنى اسلام انسان را اینجور خواسته است که مسئول باشد؛ هم نسبت به خود، هم نسبت به نزدیکان خود، هم نسبت به جامعهى خود، هم نسبت به بشریّت.",
|
239 |
+
"decoded_text": "قالیباف افزود: یا حدیث معروفِ «مَن اَصبَحَ وَ لَم یَهتَمَّ بِاُمورِ المُسلِمینَ فَلَیسَ بِمُسلِم» و نمونه های مشابه آن، جزو بیّنات اسلام است؛ یعنى اسلام انسان را اینجور خواسته است که مسئول باشد؛ هم نسبت به خود، هم نسبت به نزدیکان خود، هم نسبت به جامعهى خود، هم نسبت به بشریّت.",
|
240 |
+
"diff": [
|
241 |
+
"insert text[56:56] --> decoded_text[56:57] '' --> 'َ'",
|
242 |
+
"delete text[57:58] --> decoded_text[58:58] 'َ' --> ''"
|
243 |
+
],
|
244 |
+
"n_oov_chars": 0,
|
245 |
+
"oov_ratio": 0.0,
|
246 |
+
"oov_charset": "[]"
|
247 |
+
}
|
248 |
+
]
|
stats/compression_rate/Qwen.Qwen3-Embedding-0.6B @ cc100.fr.diff.json
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "Une Agence conseil en Communication et Coopération Décentralisée.",
|
4 |
+
"decoded_text": "Une Agence conseil en Communication et Coopération Décentralisée.",
|
5 |
+
"diff": [
|
6 |
+
"replace text[43:45] --> decoded_text[43:44] 'é' --> 'é'",
|
7 |
+
"replace text[53:55] --> decoded_text[52:53] 'é' --> 'é'",
|
8 |
+
"replace text[64:66] --> decoded_text[62:63] 'é' --> 'é'"
|
9 |
+
],
|
10 |
+
"n_oov_chars": 0,
|
11 |
+
"oov_ratio": 0.0,
|
12 |
+
"oov_charset": "[]"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"text": "Une équipe de consultants expérimentés pour accompagner, conseiller tout type de coopération économique, sociale, culturelle et durable, en lien avec l’Afrique.",
|
16 |
+
"decoded_text": "Une équipe de consultants expérimentés pour accompagner, conseiller tout type de coopération économique, sociale, culturelle et durable, en lien avec l’Afrique.",
|
17 |
+
"diff": [
|
18 |
+
"replace text[4:6] --> decoded_text[4:5] 'é' --> 'é'",
|
19 |
+
"replace text[30:32] --> decoded_text[29:30] 'é' --> 'é'",
|
20 |
+
"replace text[38:40] --> decoded_text[36:37] 'é' --> 'é'",
|
21 |
+
"replace text[88:90] --> decoded_text[85:86] 'é' --> 'é'",
|
22 |
+
"replace text[97:99] --> decoded_text[93:94] 'é' --> 'é'"
|
23 |
+
],
|
24 |
+
"n_oov_chars": 0,
|
25 |
+
"oov_ratio": 0.0,
|
26 |
+
"oov_charset": "[]"
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"text": "Des compétences confirmées dans les domaines des médias et de la communication publique.",
|
30 |
+
"decoded_text": "Des compétences confirmées dans les domaines des médias et de la communication publique.",
|
31 |
+
"diff": [
|
32 |
+
"replace text[8:10] --> decoded_text[8:9] 'é' --> 'é'",
|
33 |
+
"replace text[24:26] --> decoded_text[23:24] 'é' --> 'é'",
|
34 |
+
"replace text[52:54] --> decoded_text[50:51] 'é' --> 'é'"
|
35 |
+
],
|
36 |
+
"n_oov_chars": 0,
|
37 |
+
"oov_ratio": 0.0,
|
38 |
+
"oov_charset": "[]"
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"text": "Une bonne connaissance du réseau des décideurs publics et privés, en France et ailleurs dans le monde, principalement pour l’Afrique.",
|
42 |
+
"decoded_text": "Une bonne connaissance du réseau des décideurs publics et privés, en France et ailleurs dans le monde, principalement pour l’Afrique.",
|
43 |
+
"diff": [
|
44 |
+
"replace text[27:29] --> decoded_text[27:28] 'é' --> 'é'",
|
45 |
+
"replace text[39:41] --> decoded_text[38:39] 'é' --> 'é'",
|
46 |
+
"replace text[64:66] --> decoded_text[62:63] 'é' --> 'é'"
|
47 |
+
],
|
48 |
+
"n_oov_chars": 0,
|
49 |
+
"oov_ratio": 0.0,
|
50 |
+
"oov_charset": "[]"
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"text": "La tribune internationale pour parler des atouts de la Côte d’Ivoire et lui offrir les meilleures conditions des approches nouvelles des politiques de coopération française et européenne.",
|
54 |
+
"decoded_text": "La tribune internationale pour parler des atouts de la Côte d’Ivoire et lui offrir les meilleures conditions des approches nouvelles des politiques de coopération française et européenne.",
|
55 |
+
"diff": [
|
56 |
+
"replace text[56:58] --> decoded_text[56:57] 'ô' --> 'ô'",
|
57 |
+
"replace text[156:158] --> decoded_text[155:156] 'é' --> 'é'",
|
58 |
+
"replace text[169:171] --> decoded_text[167:168] 'ç' --> 'ç'",
|
59 |
+
"replace text[184:186] --> decoded_text[181:182] 'é' --> 'é'"
|
60 |
+
],
|
61 |
+
"n_oov_chars": 0,
|
62 |
+
"oov_ratio": 0.0,
|
63 |
+
"oov_charset": "[]"
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"text": "La rencontre entre les pouvoirs publics et décideurs français, les autorités ivoiriennes, les hommes d'affaires, entrepreneurs et investisseurs, venant de tous horizons.",
|
67 |
+
"decoded_text": "La rencontre entre les pouvoirs publics et décideurs français, les autorités ivoiriennes, les hommes d'affaires, entrepreneurs et investisseurs, venant de tous horizons.",
|
68 |
+
"diff": [
|
69 |
+
"replace text[44:46] --> decoded_text[44:45] 'é' --> 'é'",
|
70 |
+
"replace text[58:60] --> decoded_text[57:58] 'ç' --> 'ç'",
|
71 |
+
"replace text[76:78] --> decoded_text[74:75] 'é' --> 'é'"
|
72 |
+
],
|
73 |
+
"n_oov_chars": 0,
|
74 |
+
"oov_ratio": 0.0,
|
75 |
+
"oov_charset": "[]"
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"text": "Parler des opportunités de la relation privilégiée de la coopération française et européenne avec la Côte d’Ivoire.",
|
79 |
+
"decoded_text": "Parler des opportunités de la relation privilégiée de la coopération française et européenne avec la Côte d’Ivoire.",
|
80 |
+
"diff": [
|
81 |
+
"replace text[21:23] --> decoded_text[21:22] 'é' --> 'é'",
|
82 |
+
"replace text[46:48] --> decoded_text[45:46] 'é' --> 'é'",
|
83 |
+
"replace text[50:52] --> decoded_text[48:49] 'é' --> 'é'",
|
84 |
+
"replace text[64:66] --> decoded_text[61:62] 'é' --> 'é'",
|
85 |
+
"replace text[77:79] --> decoded_text[73:74] 'ç' --> 'ç'",
|
86 |
+
"replace text[92:94] --> decoded_text[87:88] 'é' --> 'é'",
|
87 |
+
"replace text[108:110] --> decoded_text[102:103] 'ô' --> 'ô'"
|
88 |
+
],
|
89 |
+
"n_oov_chars": 0,
|
90 |
+
"oov_ratio": 0.0,
|
91 |
+
"oov_charset": "[]"
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"text": "Faciliter les discussions entre les acteurs de la coopération décentralisée.",
|
95 |
+
"decoded_text": "Faciliter les discussions entre les acteurs de la coopération décentralisée.",
|
96 |
+
"diff": [
|
97 |
+
"replace text[54:56] --> decoded_text[54:55] 'é' --> 'é'",
|
98 |
+
"replace text[64:66] --> decoded_text[63:64] 'é' --> 'é'",
|
99 |
+
"replace text[75:77] --> decoded_text[73:74] 'é' --> 'é'"
|
100 |
+
],
|
101 |
+
"n_oov_chars": 0,
|
102 |
+
"oov_ratio": 0.0,
|
103 |
+
"oov_charset": "[]"
|
104 |
+
}
|
105 |
+
]
|
stats/compression_rate/Qwen.Qwen3-Embedding-0.6B @ cc100.ja.diff.json
ADDED
@@ -0,0 +1,1046 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "点を取れる部分をメインに勉強を行いました。とても勉強になったので、チャレンジして良かったです。仕事で活かせるように今後も勉強を続けて行きたいです。",
|
4 |
+
"decoded_text": "点を取れる部分をメインに勉強を行いました。とても勉強になったので、チャレンジして良かったです。仕事で活かせるように今後も勉強を続けて行きたいです。",
|
5 |
+
"diff": [
|
6 |
+
"replace text[31:33] --> decoded_text[31:32] 'で' --> 'で'",
|
7 |
+
"replace text[38:40] --> decoded_text[37:38] 'ジ' --> 'ジ'",
|
8 |
+
"replace text[46:48] --> decoded_text[44:45] 'で' --> 'で'",
|
9 |
+
"replace text[52:54] --> decoded_text[49:50] 'で' --> 'で'",
|
10 |
+
"replace text[74:76] --> decoded_text[70:71] 'で' --> 'で'"
|
11 |
+
],
|
12 |
+
"n_oov_chars": 0,
|
13 |
+
"oov_ratio": 0.0,
|
14 |
+
"oov_charset": "[]"
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"text": "ドローンは今後、土木・農業など様々な分野での利用が見込まれるので、ぜひチャレンジしてみてください。",
|
18 |
+
"decoded_text": "ドローンは今後、土木・農業など様々な分野での利用が見込まれるので、ぜひチャレンジしてみてください。",
|
19 |
+
"diff": [
|
20 |
+
"replace text[0:2] --> decoded_text[0:1] 'ド' --> 'ド'",
|
21 |
+
"replace text[15:17] --> decoded_text[14:15] 'ど' --> 'ど'",
|
22 |
+
"replace text[22:24] --> decoded_text[20:21] 'で' --> 'で'",
|
23 |
+
"replace text[27:29] --> decoded_text[24:25] 'が' --> 'が'",
|
24 |
+
"replace text[35:37] --> decoded_text[31:32] 'で' --> 'で'",
|
25 |
+
"replace text[38:40] --> decoded_text[33:34] 'ぜ' --> 'ぜ'",
|
26 |
+
"replace text[45:47] --> decoded_text[39:40] 'ジ' --> 'ジ'",
|
27 |
+
"replace text[52:54] --> decoded_text[45:46] 'だ' --> 'だ'"
|
28 |
+
],
|
29 |
+
"n_oov_chars": 0,
|
30 |
+
"oov_ratio": 0.0,
|
31 |
+
"oov_charset": "[]"
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"text": "覚える範囲が多かったので、要点を絞って取り組みました。合格して良かったです。内定先で今後使う話が出てきたら率先して取り組んで行きたいです。",
|
35 |
+
"decoded_text": "覚える範囲が多かったので、要点を絞って取り組みました。合格して良かったです。内定先で今後使う話が出てきたら率先して取り組んで行きたいです。",
|
36 |
+
"diff": [
|
37 |
+
"replace text[5:7] --> decoded_text[5:6] 'が' --> 'が'",
|
38 |
+
"replace text[12:14] --> decoded_text[11:12] 'で' --> 'で'",
|
39 |
+
"replace text[37:39] --> decoded_text[35:36] 'で' --> 'で'",
|
40 |
+
"replace text[44:46] --> decoded_text[41:42] 'で' --> 'で'",
|
41 |
+
"replace text[51:53] --> decoded_text[47:48] 'が' --> 'が'",
|
42 |
+
"replace text[66:68] --> decoded_text[61:62] 'で' --> 'で'",
|
43 |
+
"replace text[72:74] --> decoded_text[66:67] 'で' --> 'で'"
|
44 |
+
],
|
45 |
+
"n_oov_chars": 0,
|
46 |
+
"oov_ratio": 0.0,
|
47 |
+
"oov_charset": "[]"
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"text": "先生が全面的にサポートして下さるので安心して勉強できます。分からない時は先生になんでも質問してください。",
|
51 |
+
"decoded_text": "先生が全面的にサポートして下さるので安心して勉強できます。分からない時は先生になんでも質問してください。",
|
52 |
+
"diff": [
|
53 |
+
"replace text[2:4] --> decoded_text[2:3] 'が' --> 'が'",
|
54 |
+
"replace text[9:11] --> decoded_text[8:9] 'ポ' --> 'ポ'",
|
55 |
+
"replace text[19:21] --> decoded_text[17:18] 'で' --> 'で'",
|
56 |
+
"replace text[27:29] --> decoded_text[24:25] 'で' --> 'で'",
|
57 |
+
"replace text[45:47] --> decoded_text[41:42] 'で' --> 'で'",
|
58 |
+
"replace text[53:55] --> decoded_text[48:49] 'だ' --> 'だ'"
|
59 |
+
],
|
60 |
+
"n_oov_chars": 0,
|
61 |
+
"oov_ratio": 0.0,
|
62 |
+
"oov_charset": "[]"
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"text": "過去問題をたくさん解くことで問題の傾向を分析しました。しっかり勉強して臨んだので、合格する自信がありました。この資格を活かし、工事測量などで、正確な値を出すことで適切な指示を出せるようにしていきたいです。",
|
66 |
+
"decoded_text": "過去問題をたくさん解くことで問題の傾向を分析しました。しっかり勉強して臨んだので、合格する自信がありました。この資格を活かし、工事測量などで、正確な値を出すことで適切な指示を出せるようにしていきたいです。",
|
67 |
+
"diff": [
|
68 |
+
"replace text[13:15] --> decoded_text[13:14] 'で' --> 'で'",
|
69 |
+
"replace text[38:40] --> decoded_text[37:38] 'だ' --> 'だ'",
|
70 |
+
"replace text[41:43] --> decoded_text[39:40] 'で' --> 'で'",
|
71 |
+
"replace text[50:52] --> decoded_text[47:48] 'が' --> 'が'",
|
72 |
+
"replace text[72:76] --> decoded_text[68:70] 'どで' --> 'どで'",
|
73 |
+
"replace text[86:88] --> decoded_text[80:81] 'で' --> 'で'",
|
74 |
+
"replace text[106:108] --> decoded_text[99:100] 'で' --> 'で'"
|
75 |
+
],
|
76 |
+
"n_oov_chars": 0,
|
77 |
+
"oov_ratio": 0.0,
|
78 |
+
"oov_charset": "[]"
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"text": "しっかり勉強したら必ず自分のためになりますよ。将来の夢を持ち、それを実現できるように今できる事を頑張ってください。",
|
82 |
+
"decoded_text": "しっかり勉強したら必ず自分のためになりますよ。将来の夢を持ち、それを実現できるように今できる事を頑張ってください。",
|
83 |
+
"diff": [
|
84 |
+
"replace text[10:12] --> decoded_text[10:11] 'ず' --> 'ず'",
|
85 |
+
"replace text[37:39] --> decoded_text[36:37] 'で' --> 'で'",
|
86 |
+
"replace text[45:47] --> decoded_text[43:44] 'で' --> 'で'",
|
87 |
+
"replace text[56:58] --> decoded_text[53:54] 'だ' --> 'だ'"
|
88 |
+
],
|
89 |
+
"n_oov_chars": 0,
|
90 |
+
"oov_ratio": 0.0,
|
91 |
+
"oov_charset": "[]"
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"text": "まずは、自分が何をしたいのか考え、そしてそこから、自分の目標に向かって資格取得を目指してください。せっかくの取得チャンスを無駄にしないでください。",
|
95 |
+
"decoded_text": "まずは、自分が何をしたいのか考え、そしてそこから、自分の目標に向かって資格取得を目指してください。せっかくの取得チャンスを無駄にしないでください。",
|
96 |
+
"diff": [
|
97 |
+
"replace text[1:3] --> decoded_text[1:2] 'ず' --> 'ず'",
|
98 |
+
"replace text[7:9] --> decoded_text[6:7] 'が' --> 'が'",
|
99 |
+
"replace text[47:49] --> decoded_text[45:46] 'だ' --> 'だ'",
|
100 |
+
"replace text[70:72] --> decoded_text[67:68] 'で' --> 'で'",
|
101 |
+
"replace text[73:75] --> decoded_text[69:70] 'だ' --> 'だ'"
|
102 |
+
],
|
103 |
+
"n_oov_chars": 0,
|
104 |
+
"oov_ratio": 0.0,
|
105 |
+
"oov_charset": "[]"
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"text": "過去問題を解き、答え合わせをした後、自分がどこを間違っていたか、なぜ間違えたか等、徹底的に勉強しました。次は測量士補や、ドローン検定等の資格を目指し、頑張りたいと思います。",
|
109 |
+
"decoded_text": "過去問題を解き、答え合わせをした後、自分がどこを間違っていたか、なぜ間違えたか等、徹底的に勉強しました。次は測量士補や、ドローン検定等の資格を目指し、頑張りたいと思います。",
|
110 |
+
"diff": [
|
111 |
+
"replace text[20:24] --> decoded_text[20:22] 'がど' --> 'がど'",
|
112 |
+
"replace text[35:37] --> decoded_text[33:34] 'ぜ' --> 'ぜ'",
|
113 |
+
"replace text[63:65] --> decoded_text[60:61] 'ド' --> 'ド'"
|
114 |
+
],
|
115 |
+
"n_oov_chars": 0,
|
116 |
+
"oov_ratio": 0.0,
|
117 |
+
"oov_charset": "[]"
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"text": "細かい事でも、疑問に思った事は必ず先生に質問してください。",
|
121 |
+
"decoded_text": "細かい事でも、疑問に思った事は必ず先生に質問してください。",
|
122 |
+
"diff": [
|
123 |
+
"replace text[4:6] --> decoded_text[4:5] 'で' --> 'で'",
|
124 |
+
"replace text[17:19] --> decoded_text[16:17] 'ず' --> 'ず'",
|
125 |
+
"replace text[27:29] --> decoded_text[25:26] 'だ' --> 'だ'"
|
126 |
+
],
|
127 |
+
"n_oov_chars": 0,
|
128 |
+
"oov_ratio": 0.0,
|
129 |
+
"oov_charset": "[]"
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"text": "過去問を解き続け、頭に知識と問題の傾向を叩き込みました。合格して本当に良かったです。 試験を受けるチャンスがあり、思い切ってチャレンジして良かったと思います。",
|
133 |
+
"decoded_text": "過去問を解き続け、頭に知識と問題の傾向を叩き込みました。合格して本当に良かったです。 試験を受けるチャンスがあり、思い切ってチャレンジして良かったと思います。",
|
134 |
+
"diff": [
|
135 |
+
"replace text[39:41] --> decoded_text[39:40] 'で' --> 'で'",
|
136 |
+
"replace text[54:56] --> decoded_text[53:54] 'が' --> 'が'"
|
137 |
+
],
|
138 |
+
"n_oov_chars": 0,
|
139 |
+
"oov_ratio": 0.0,
|
140 |
+
"oov_charset": "[]"
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"text": "何事もまずは挑戦してみてください。分からない事は先生が教えてくださいます。安心して勉強に取り組んでください。",
|
144 |
+
"decoded_text": "何事もまずは挑戦してみてください。分からない事は先生が教えてくださいます。安心して勉強に取り組んでください。",
|
145 |
+
"diff": [
|
146 |
+
"replace text[4:6] --> decoded_text[4:5] 'ず' --> 'ず'",
|
147 |
+
"replace text[14:16] --> decoded_text[13:14] 'だ' --> 'だ'",
|
148 |
+
"replace text[28:30] --> decoded_text[26:27] 'が' --> 'が'",
|
149 |
+
"replace text[34:36] --> decoded_text[31:32] 'だ' --> 'だ'",
|
150 |
+
"replace text[52:54] --> decoded_text[48:49] 'で' --> 'で'",
|
151 |
+
"replace text[55:57] --> decoded_text[50:51] 'だ' --> 'だ'"
|
152 |
+
],
|
153 |
+
"n_oov_chars": 0,
|
154 |
+
"oov_ratio": 0.0,
|
155 |
+
"oov_charset": "[]"
|
156 |
+
},
|
157 |
+
{
|
158 |
+
"text": "日々の勉強を大切にし、試験当日は集中して受験しました。試験一か月前までは、ひたすら過去問を解いて自分の弱いところを集中的に勉強しました。この学校に入学して1番の目標だったので取得できて嬉しいです。",
|
159 |
+
"decoded_text": "日々の勉強を大切にし、試験当日は集中して受験しました。試験一か月前までは、ひたすら過去問を解いて自分の弱いところを集中的に勉強しました。この学校に入学して1番の目標だったので取得できて嬉しいです。",
|
160 |
+
"diff": [
|
161 |
+
"replace text[34:36] --> decoded_text[34:35] 'で' --> 'で'",
|
162 |
+
"replace text[83:85] --> decoded_text[82:83] 'だ' --> 'だ'",
|
163 |
+
"replace text[88:90] --> decoded_text[86:87] 'で' --> 'で'",
|
164 |
+
"replace text[92:94] --> decoded_text[89:90] 'で' --> 'で'",
|
165 |
+
"replace text[99:101] --> decoded_text[95:96] 'で' --> 'で'"
|
166 |
+
],
|
167 |
+
"n_oov_chars": 0,
|
168 |
+
"oov_ratio": 0.0,
|
169 |
+
"oov_charset": "[]"
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"text": "何回も過去問を解き、傾向を掴みました。合格して嬉しいです。さらに上の資格を目指し頑張りたいと思います。",
|
173 |
+
"decoded_text": "何回も過去問を解き、傾向を掴みました。合格して嬉しいです。さらに上の資格を目指し頑張りたいと思います。",
|
174 |
+
"diff": [
|
175 |
+
"replace text[26:28] --> decoded_text[26:27] 'で' --> 'で'"
|
176 |
+
],
|
177 |
+
"n_oov_chars": 0,
|
178 |
+
"oov_ratio": 0.0,
|
179 |
+
"oov_charset": "[]"
|
180 |
+
},
|
181 |
+
{
|
182 |
+
"text": "受けるからには合格するほうが良いので積極的に勉強してください。そして後悔しないように頑張ってください。",
|
183 |
+
"decoded_text": "受けるからには合格するほうが良いので積極的に勉強してください。そして後悔しないように頑張ってください。",
|
184 |
+
"diff": [
|
185 |
+
"replace text[13:15] --> decoded_text[13:14] 'が' --> 'が'",
|
186 |
+
"replace text[18:20] --> decoded_text[17:18] 'で' --> 'で'",
|
187 |
+
"replace text[29:31] --> decoded_text[27:28] 'だ' --> 'だ'",
|
188 |
+
"replace text[50:52] --> decoded_text[47:48] 'だ' --> 'だ'"
|
189 |
+
],
|
190 |
+
"n_oov_chars": 0,
|
191 |
+
"oov_ratio": 0.0,
|
192 |
+
"oov_charset": "[]"
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"text": "分野ごとに勉強を行いました。土木業界ではこの資格がないと仕事の範囲が狭くなるので、合格して嬉しいです。実地試験に合格したら次は1級を目指し、頑張ろうと思います。",
|
196 |
+
"decoded_text": "分野ごとに勉強を行いました。土木業界ではこの資格がないと仕事の範囲が狭くなるので、合格して嬉しいです。実地試験に合格したら次は1級を目指し、頑張ろうと思います。",
|
197 |
+
"diff": [
|
198 |
+
"replace text[2:4] --> decoded_text[2:3] 'ご' --> 'ご'",
|
199 |
+
"replace text[19:21] --> decoded_text[18:19] 'で' --> 'で'",
|
200 |
+
"replace text[26:28] --> decoded_text[24:25] 'が' --> 'が'",
|
201 |
+
"replace text[36:38] --> decoded_text[33:34] 'が' --> 'が'",
|
202 |
+
"replace text[43:45] --> decoded_text[39:40] 'で' --> 'で'",
|
203 |
+
"replace text[53:55] --> decoded_text[48:49] 'で' --> 'で'"
|
204 |
+
],
|
205 |
+
"n_oov_chars": 0,
|
206 |
+
"oov_ratio": 0.0,
|
207 |
+
"oov_charset": "[]"
|
208 |
+
},
|
209 |
+
{
|
210 |
+
"text": "難しい試験ですが、勉強を続けているとコツがつかめてきます。諦めずに頑張ってください。",
|
211 |
+
"decoded_text": "難しい試験ですが、勉強を続けているとコツがつかめてきます。諦めずに頑張ってください。",
|
212 |
+
"diff": [
|
213 |
+
"replace text[5:7] --> decoded_text[5:6] 'で' --> 'で'",
|
214 |
+
"replace text[8:10] --> decoded_text[7:8] 'が' --> 'が'",
|
215 |
+
"replace text[22:24] --> decoded_text[20:21] 'が' --> 'が'",
|
216 |
+
"replace text[34:36] --> decoded_text[31:32] 'ず' --> 'ず'",
|
217 |
+
"replace text[42:44] --> decoded_text[38:39] 'だ' --> 'だ'"
|
218 |
+
],
|
219 |
+
"n_oov_chars": 0,
|
220 |
+
"oov_ratio": 0.0,
|
221 |
+
"oov_charset": "[]"
|
222 |
+
},
|
223 |
+
{
|
224 |
+
"text": "過去問を解くことで対策をする事ができました。学科試験に合格したので、次は実地試験に合格して、現場を任せてもらえるようになりたいです。",
|
225 |
+
"decoded_text": "過去問を解くことで対策をする事ができました。学科試験に合格したので、次は実地試験に合格して、現場を任せてもらえるようになりたい��す。",
|
226 |
+
"diff": [
|
227 |
+
"replace text[8:10] --> decoded_text[8:9] 'で' --> 'で'",
|
228 |
+
"replace text[16:20] --> decoded_text[15:17] 'がで' --> 'がで'",
|
229 |
+
"replace text[35:37] --> decoded_text[32:33] 'で' --> 'で'",
|
230 |
+
"replace text[67:69] --> decoded_text[63:64] 'で' --> 'で'"
|
231 |
+
],
|
232 |
+
"n_oov_chars": 0,
|
233 |
+
"oov_ratio": 0.0,
|
234 |
+
"oov_charset": "[]"
|
235 |
+
},
|
236 |
+
{
|
237 |
+
"text": "取りたい資格は積極的に勉強し、自信を持って頑張ってください。わからない事は必ず先生が教えてくださいます。",
|
238 |
+
"decoded_text": "取りたい資格は積極的に勉強し、自信を持って頑張ってください。わからない事は必ず先生が教えてくださいます。",
|
239 |
+
"diff": [
|
240 |
+
"replace text[26:28] --> decoded_text[26:27] 'だ' --> 'だ'",
|
241 |
+
"replace text[39:41] --> decoded_text[38:39] 'ず' --> 'ず'",
|
242 |
+
"replace text[43:45] --> decoded_text[41:42] 'が' --> 'が'",
|
243 |
+
"replace text[49:51] --> decoded_text[46:47] 'だ' --> 'だ'"
|
244 |
+
],
|
245 |
+
"n_oov_chars": 0,
|
246 |
+
"oov_ratio": 0.0,
|
247 |
+
"oov_charset": "[]"
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"text": "放課後の勉強会を頑張りました。2級土木施工管理技術検定は、土木技術者になるための第一歩だと思います。合格して本当に嬉しいです。",
|
251 |
+
"decoded_text": "放課後の勉強会を頑張りました。2級土木施工管理技術検定は、土木技術者になるための第一歩だと思います。合格して本当に嬉しいです。",
|
252 |
+
"diff": [
|
253 |
+
"replace text[43:45] --> decoded_text[43:44] 'だ' --> 'だ'",
|
254 |
+
"replace text[61:63] --> decoded_text[60:61] 'で' --> 'で'"
|
255 |
+
],
|
256 |
+
"n_oov_chars": 0,
|
257 |
+
"oov_ratio": 0.0,
|
258 |
+
"oov_charset": "[]"
|
259 |
+
},
|
260 |
+
{
|
261 |
+
"text": "遊び、バイト、 勉強、いろいろありますが、区切りを付けて頑張ってください。",
|
262 |
+
"decoded_text": "遊び、バイト、 勉強、いろいろありますが、区切りを付けて頑張ってください。",
|
263 |
+
"diff": [
|
264 |
+
"replace text[1:3] --> decoded_text[1:2] 'び' --> 'び'",
|
265 |
+
"replace text[4:6] --> decoded_text[3:4] 'バ' --> 'バ'",
|
266 |
+
"replace text[35:37] --> decoded_text[33:34] 'だ' --> 'だ'"
|
267 |
+
],
|
268 |
+
"n_oov_chars": 0,
|
269 |
+
"oov_ratio": 0.0,
|
270 |
+
"oov_charset": "[]"
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"text": "とにかく過去問題をやり、自分の苦手な分野をできるようにして行きました。最近の土木現場では、環境への影響を考えて工事をしなければならないので、この資格を取得しようと思いました。合格して本当に良かったです。",
|
274 |
+
"decoded_text": "とにかく過去問題をやり、自分の苦手な分野をできるようにして行きました。最近の土木現場では、環境への影響を考えて工事をしなければならないので、この資格を取得しようと思いました。合格して本当に良かったです。",
|
275 |
+
"diff": [
|
276 |
+
"replace text[21:23] --> decoded_text[21:22] 'で' --> 'で'",
|
277 |
+
"replace text[43:45] --> decoded_text[42:43] 'で' --> 'で'",
|
278 |
+
"replace text[64:66] --> decoded_text[62:63] 'ば' --> 'ば'",
|
279 |
+
"replace text[71:73] --> decoded_text[68:69] 'で' --> 'で'",
|
280 |
+
"replace text[102:104] --> decoded_text[98:99] 'で' --> 'で'"
|
281 |
+
],
|
282 |
+
"n_oov_chars": 0,
|
283 |
+
"oov_ratio": 0.0,
|
284 |
+
"oov_charset": "[]"
|
285 |
+
},
|
286 |
+
{
|
287 |
+
"text": "勉強する時と、遊ぶときの切り替えをうまくやり、合格へ向けて頑張ってください。",
|
288 |
+
"decoded_text": "勉強する時と、遊ぶときの切り替えをうまくやり、合格へ向けて頑張ってください。",
|
289 |
+
"diff": [
|
290 |
+
"replace text[8:10] --> decoded_text[8:9] 'ぶ' --> 'ぶ'",
|
291 |
+
"replace text[35:37] --> decoded_text[34:35] 'だ' --> 'だ'"
|
292 |
+
],
|
293 |
+
"n_oov_chars": 0,
|
294 |
+
"oov_ratio": 0.0,
|
295 |
+
"oov_charset": "[]"
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"text": "過去問を何度もやりました。頑張ってきたことが成果に出て嬉しいです。この調子で、測量士補、2級土木施工管理技術検定の学科試験にも合格できるように頑張り たいと思います。",
|
299 |
+
"decoded_text": "過去問を何度もやりました。頑張ってきたことが成果に出て嬉しいです。この調子で、測量士補、2級土木施工管理技術検定の学科試験にも合格できるように頑張り たいと思います。",
|
300 |
+
"diff": [
|
301 |
+
"replace text[21:23] --> decoded_text[21:22] 'が' --> 'が'",
|
302 |
+
"replace text[31:33] --> decoded_text[30:31] 'で' --> 'で'",
|
303 |
+
"replace text[39:41] --> decoded_text[37:38] 'で' --> 'で'",
|
304 |
+
"replace text[68:70] --> decoded_text[65:66] 'で' --> 'で'"
|
305 |
+
],
|
306 |
+
"n_oov_chars": 0,
|
307 |
+
"oov_ratio": 0.0,
|
308 |
+
"oov_charset": "[]"
|
309 |
+
},
|
310 |
+
{
|
311 |
+
"text": "何度も何度も繰り返し過去問を解くことが合格への近道です。頑張ってください。",
|
312 |
+
"decoded_text": "何度も何度も繰り返し過去問を解くことが合格への近道です。頑張ってください。",
|
313 |
+
"diff": [
|
314 |
+
"replace text[18:20] --> decoded_text[18:19] 'が' --> 'が'",
|
315 |
+
"replace text[26:28] --> decoded_text[25:26] 'で' --> 'で'",
|
316 |
+
"replace text[35:37] --> decoded_text[33:34] 'だ' --> 'だ'"
|
317 |
+
],
|
318 |
+
"n_oov_chars": 0,
|
319 |
+
"oov_ratio": 0.0,
|
320 |
+
"oov_charset": "[]"
|
321 |
+
},
|
322 |
+
{
|
323 |
+
"text": "放課後も残って勉強しました。その中で法律と施工管理を重点的に勉強しました。さらに作文も頑張りました。来年には2級ビオトープ計画管理士も受験して合格したいです。将来に必要な資格なので、持っていて損はないと思います。合格して本当に嬉しいです。",
|
324 |
+
"decoded_text": "放課後も残って勉強しました。その中で法律と施工管理を重点的に勉強しました。さらに作文も頑張りました。来年には2級ビオトープ計画管理士も受験して合格したいです。将来に必要な資格なので、持っていて損はないと思います。合格して本当に嬉しいです。",
|
325 |
+
"diff": [
|
326 |
+
"replace text[17:19] --> decoded_text[17:18] 'で' --> 'で'",
|
327 |
+
"replace text[57:59] --> decoded_text[56:57] 'ビ' --> 'ビ'",
|
328 |
+
"replace text[62:64] --> decoded_text[60:61] 'プ' --> 'プ'",
|
329 |
+
"replace text[79:81] --> decoded_text[76:77] 'で' --> 'で'",
|
330 |
+
"replace text[93:95] --> decoded_text[89:90] 'で' --> 'で'",
|
331 |
+
"replace text[121:123] --> decoded_text[116:117] 'で' --> 'で'"
|
332 |
+
],
|
333 |
+
"n_oov_chars": 0,
|
334 |
+
"oov_ratio": 0.0,
|
335 |
+
"oov_charset": "[]"
|
336 |
+
},
|
337 |
+
{
|
338 |
+
"text": "やる気があれば合格できます。専門学校に入学したからには自分で目標を作り、積極的に挑戦して行ってください。",
|
339 |
+
"decoded_text": "やる気があれば合格できます。専門学校に入学したからには自分で目標を作り、積極的に挑戦して行ってください。",
|
340 |
+
"diff": [
|
341 |
+
"replace text[3:5] --> decoded_text[3:4] 'が' --> 'が'",
|
342 |
+
"replace text[10:12] --> decoded_text[9:10] 'で' --> 'で'",
|
343 |
+
"replace text[31:33] --> decoded_text[29:30] 'で' --> 'で'",
|
344 |
+
"replace text[51:53] --> decoded_text[48:49] 'だ' --> 'だ'"
|
345 |
+
],
|
346 |
+
"n_oov_chars": 0,
|
347 |
+
"oov_ratio": 0.0,
|
348 |
+
"oov_charset": "[]"
|
349 |
+
},
|
350 |
+
{
|
351 |
+
"text": "放課後残って勉強しました。まずはどんな形であっても問題に取り組む事が大切だと思います。電気の知識はまだまだですが、学習の方法が定まってきたので、残りの2科目(電力科目・機械科目)、さらには消防設備士試験にも合格したいです。",
|
352 |
+
"decoded_text": "放課後残って勉強しました。まずはどんな形であっても問題に取り組む事が大切だと思います。電気の知識はまだまだですが、学習の方法が定まってきたので、残りの2科目(電力科目・機械科目)、さらには消防設備士試験にも合格したいです。",
|
353 |
+
"diff": [
|
354 |
+
"replace text[14:16] --> decoded_text[14:15] 'ず' --> 'ず'",
|
355 |
+
"replace text[17:19] --> decoded_text[16:17] 'ど' --> 'ど'",
|
356 |
+
"replace text[22:24] --> decoded_text[20:21] 'で' --> 'で'",
|
357 |
+
"replace text[36:38] --> decoded_text[33:34] 'が' --> 'が'",
|
358 |
+
"replace text[40:42] --> decoded_text[36:37] 'だ' --> 'だ'",
|
359 |
+
"replace text[55:57] --> decoded_text[50:51] 'だ' --> 'だ'",
|
360 |
+
"replace text[58:62] --> decoded_text[52:54] 'だで' --> 'だで'",
|
361 |
+
"replace text[63:65] --> decoded_text[55:56] 'が' --> 'が'",
|
362 |
+
"replace text[71:73] --> decoded_text[62:63] 'が' --> 'が'",
|
363 |
+
"replace text[80:82] --> decoded_text[70:71] 'で' --> 'で'",
|
364 |
+
"replace text[119:121] --> decoded_text[108:109] 'で' --> 'で'"
|
365 |
+
],
|
366 |
+
"n_oov_chars": 0,
|
367 |
+
"oov_ratio": 0.0,
|
368 |
+
"oov_charset": "[]"
|
369 |
+
},
|
370 |
+
{
|
371 |
+
"text": "気になる資格は、早め早めに下調べしておくことが大切だと思います。問題を知った上で授業を受けると、理解が、より深められます。",
|
372 |
+
"decoded_text": "気になる資格は、早め早めに下調べしておくことが大切だと思います。問題を知った上で授業を受けると、理解が、より深められます。",
|
373 |
+
"diff": [
|
374 |
+
"replace text[15:17] --> decoded_text[15:16] 'べ' --> 'べ'",
|
375 |
+
"replace text[23:25] --> decoded_text[22:23] 'が' --> 'が'",
|
376 |
+
"replace text[27:29] --> decoded_text[25:26] 'だ' --> 'だ'",
|
377 |
+
"replace text[53:55] --> decoded_text[50:51] 'が' --> 'が'"
|
378 |
+
],
|
379 |
+
"n_oov_chars": 0,
|
380 |
+
"oov_ratio": 0.0,
|
381 |
+
"oov_charset": "[]"
|
382 |
+
},
|
383 |
+
{
|
384 |
+
"text": "問題集を1冊を決めて、とことんやりこみました。何回かやっても理解できない時は、理解できる所から取り組み、自分の得意な分野で確実に点を取るよう勉強しました。残りの3科目を取得できるように、さらに努力しようと思います。",
|
385 |
+
"decoded_text": "問題集を1冊を決めて、とことんやりこみました。何回かやっても理解できない時は、理解できる所から取り組み、自分の得意な分野で確実に点を取るよう勉強しました。残りの3科目を取得できるように、さらに努力しようと思います。",
|
386 |
+
"diff": [
|
387 |
+
"replace text[32:34] --> decoded_text[32:33] 'で' --> 'で'",
|
388 |
+
"replace text[61:63] --> decoded_text[60:61] 'で' --> 'で'",
|
389 |
+
"replace text[88:90] --> decoded_text[86:87] 'で' --> 'で'"
|
390 |
+
],
|
391 |
+
"n_oov_chars": 0,
|
392 |
+
"oov_ratio": 0.0,
|
393 |
+
"oov_charset": "[]"
|
394 |
+
},
|
395 |
+
{
|
396 |
+
"text": "各科目ともに完璧を目指すのではなく、少しでも自分の得意な所を見つけて得点できるようにするのが重要だと思います。",
|
397 |
+
"decoded_text": "各科目ともに完璧を目指すのではなく、少しでも自分の得意な所を見つけて得点できるようにするのが重要だと思います。",
|
398 |
+
"diff": [
|
399 |
+
"replace text[13:15] --> decoded_text[13:14] 'で' --> 'で'",
|
400 |
+
"replace text[21:23] --> decoded_text[20:21] 'で' --> 'で'",
|
401 |
+
"replace text[38:40] --> decoded_text[36:37] 'で' --> 'で'",
|
402 |
+
"replace text[48:50] --> decoded_text[45:46] 'が' --> 'が'",
|
403 |
+
"replace text[52:54] --> decoded_text[48:49] 'だ' --> 'だ'"
|
404 |
+
],
|
405 |
+
"n_oov_chars": 0,
|
406 |
+
"oov_ratio": 0.0,
|
407 |
+
"oov_charset": "[]"
|
408 |
+
},
|
409 |
+
{
|
410 |
+
"text": "電験対策講座を受講し、過去問を何回も繰り返し解きました。また日頃の授業をしっかり聞き自分がわかるように、ノートにまとめました。合格はしましたが、自分では実力はまだついてないと思います。ほかの3科目も取得できるように頑張りたいと思います。",
|
411 |
+
"decoded_text": "電験対策講座を受講し、過去問を何回も繰り返し解きました。また日頃の授業をしっかり聞き自分がわかるように、ノートにまとめました。合格はしましたが、自分では実力はまだついてないと思います。ほかの3科目も取得できるように頑張りたいと思います。",
|
412 |
+
"diff": [
|
413 |
+
"replace text[44:46] --> decoded_text[44:45] 'が' --> 'が'",
|
414 |
+
"replace text[71:73] --> decoded_text[70:71] 'が' --> 'が'",
|
415 |
+
"replace text[76:78] --> decoded_text[74:75] 'で' --> 'で'",
|
416 |
+
"replace text[83:85] --> decoded_text[80:81] 'だ' --> 'だ'",
|
417 |
+
"replace text[105:107] --> decoded_text[101:102] 'で' --> 'で'"
|
418 |
+
],
|
419 |
+
"n_oov_chars": 0,
|
420 |
+
"oov_ratio": 0.0,
|
421 |
+
"oov_charset": "[]"
|
422 |
+
},
|
423 |
+
{
|
424 |
+
"text": "普通科出身でも、毎日の授業を真剣に聞いていれば、自分に自信がつくので頑張ってください。",
|
425 |
+
"decoded_text": "普通科出身でも、毎日の授業を真剣に聞いていれば、自分に自信がつくので頑張ってください。",
|
426 |
+
"diff": [
|
427 |
+
"replace text[5:7] --> decoded_text[5:6] 'で' --> 'で'",
|
428 |
+
"replace text[23:25] --> decoded_text[22:23] 'ば' --> 'ば'",
|
429 |
+
"replace text[31:33] --> decoded_text[29:30] 'が' --> 'が'",
|
430 |
+
"replace text[36:38] --> decoded_text[33:34] 'で' --> 'で'",
|
431 |
+
"replace text[43:45] --> decoded_text[39:40] 'だ' --> 'だ'"
|
432 |
+
],
|
433 |
+
"n_oov_chars": 0,
|
434 |
+
"oov_ratio": 0.0,
|
435 |
+
"oov_charset": "[]"
|
436 |
+
},
|
437 |
+
{
|
438 |
+
"text": "筆記試験対策では理解できるまで、過去問を繰り返し解きました。実技試験対策では、ミスをしないように、工夫した練習をしました。合格をいただいて、高圧の電気工事に従事する事ができるので嬉しかったです。",
|
439 |
+
"decoded_text": "筆記試験対策では理解できるまで、過去問を繰り返し解きました。実技試験対策では、ミスをしないように、工夫した練習をしました。合格をいただいて、高圧の電気工事に従事する事ができるので嬉しかったです。",
|
440 |
+
"diff": [
|
441 |
+
"replace text[6:8] --> decoded_text[6:7] 'で' --> 'で'",
|
442 |
+
"replace text[11:13] --> decoded_text[10:11] 'で' --> 'で'",
|
443 |
+
"replace text[16:18] --> decoded_text[14:15] 'で' --> 'で'",
|
444 |
+
"replace text[39:41] --> decoded_text[36:37] 'で' --> 'で'",
|
445 |
+
"replace text[70:72] --> decoded_text[66:67] 'だ' --> 'だ'",
|
446 |
+
"replace text[88:92] --> decoded_text[83:85] 'がで' --> 'がで'",
|
447 |
+
"replace text[95:97] --> decoded_text[88:89] 'で' --> 'で'",
|
448 |
+
"replace text[102:104] --> decoded_text[94:95] 'で' --> 'で'"
|
449 |
+
],
|
450 |
+
"n_oov_chars": 0,
|
451 |
+
"oov_ratio": 0.0,
|
452 |
+
"oov_charset": "[]"
|
453 |
+
},
|
454 |
+
{
|
455 |
+
"text": "入学してから受ける国家試験の中で少し難しいですが、頑張れば取れる資格なので、最後まで努力してみてください。",
|
456 |
+
"decoded_text": "入学してから受ける国家試験の中で少し難しいですが、頑張れば取れる資格なので、最後まで努力してみてください。",
|
457 |
+
"diff": [
|
458 |
+
"replace text[15:17] --> decoded_text[15:16] 'で' --> 'で'",
|
459 |
+
"replace text[22:24] --> decoded_text[21:22] 'で' --> 'で'",
|
460 |
+
"replace text[25:27] --> decoded_text[23:24] 'が' --> 'が'",
|
461 |
+
"replace text[31:33] --> decoded_text[28:29] 'ば' --> 'ば'",
|
462 |
+
"replace text[40:42] --> decoded_text[36:37] 'で' --> 'で'",
|
463 |
+
"replace text[46:48] --> decoded_text[41:42] 'で' --> 'で'",
|
464 |
+
"replace text[55:57] --> decoded_text[49:50] 'だ' --> 'だ'"
|
465 |
+
],
|
466 |
+
"n_oov_chars": 0,
|
467 |
+
"oov_ratio": 0.0,
|
468 |
+
"oov_charset": "[]"
|
469 |
+
},
|
470 |
+
{
|
471 |
+
"text": "先生が丁寧に教えてくださるので日々の授業を大切にし、分からない所はその日に先生に聞きに行きました。就職に有利になるように取得した資格を就職活動のため、また社会にでて仕事に活かしていきたいです。",
|
472 |
+
"decoded_text": "先生が丁寧に教えてくださるので日々の授業を大切にし、分からない所はその日に先生に聞きに行きました。就職に有利になるように取得した資格を就職活動のため、また社会にでて仕事に活かしていきたいです。",
|
473 |
+
"diff": [
|
474 |
+
"replace text[2:4] --> decoded_text[2:3] 'が' --> 'が'",
|
475 |
+
"replace text[11:13] --> decoded_text[10:11] 'だ' --> 'だ'",
|
476 |
+
"replace text[16:18] --> decoded_text[14:15] 'で' --> 'で'",
|
477 |
+
"replace text[83:85] --> decoded_text[80:81] 'で' --> 'で'",
|
478 |
+
"replace text[97:99] --> decoded_text[93:94] 'で' --> 'で'"
|
479 |
+
],
|
480 |
+
"n_oov_chars": 0,
|
481 |
+
"oov_ratio": 0.0,
|
482 |
+
"oov_charset": "[]"
|
483 |
+
},
|
484 |
+
{
|
485 |
+
"text": "入学した頃は電気に関して分からなかった私でも取得することができました。皆さんも自信を持って頑張ってください。",
|
486 |
+
"decoded_text": "入学した頃は電気に関して分からなかった私でも取得することができました。皆さんも自信を持って頑張ってください。",
|
487 |
+
"diff": [
|
488 |
+
"replace text[20:22] --> decoded_text[20:21] 'で' --> 'で'",
|
489 |
+
"replace text[29:33] --> decoded_text[28:30] 'がで' --> 'がで'",
|
490 |
+
"replace text[53:55] --> decoded_text[50:51] 'だ' --> 'だ'"
|
491 |
+
],
|
492 |
+
"n_oov_chars": 0,
|
493 |
+
"oov_ratio": 0.0,
|
494 |
+
"oov_charset": "[]"
|
495 |
+
},
|
496 |
+
{
|
497 |
+
"text": "とにかく過去問を何回も解き、疑問があれば先生に質問して分かるまで毎日の勉強を頑張りました。合格できて本当に良かったです。今後は第三種電気主任技術者の資格取得に向けて頑張ります。",
|
498 |
+
"decoded_text": "とにかく過去問を何回も解き、疑問があれば先生に質問して分かるまで毎日の勉強を頑張りました。合格できて本当に良かったです。今後は第三種電気主任技術者の資格取得に向けて頑張ります。",
|
499 |
+
"diff": [
|
500 |
+
"replace text[16:18] --> decoded_text[16:17] 'が' --> 'が'",
|
501 |
+
"replace text[32:34] --> decoded_text[31:32] 'で' --> 'で'",
|
502 |
+
"replace text[49:51] --> decoded_text[47:48] 'で' --> 'で'",
|
503 |
+
"replace text[60:62] --> decoded_text[57:58] 'で' --> 'で'"
|
504 |
+
],
|
505 |
+
"n_oov_chars": 0,
|
506 |
+
"oov_ratio": 0.0,
|
507 |
+
"oov_charset": "[]"
|
508 |
+
},
|
509 |
+
{
|
510 |
+
"text": "第一種電気工事士の本の要点をノートにおさえて復習しました。実技では配線図が分からず先生に教えていただき、早い段階で理解 きたのも合格できた要因ではないかと思います。合格して本当に嬉しかったです。これから就職して仕事に活かしていきたいと思います。",
|
511 |
+
"decoded_text": "第一種電気工事士の本の要点をノートにおさえて復習しました。実技では配線図が分からず先生に教えていただき、早い段階で理解 きたのも合格できた要因ではないかと思います。合格して本当に嬉しかったです。これから就職��て仕事に活かしていきたいと思います。",
|
512 |
+
"diff": [
|
513 |
+
"replace text[31:33] --> decoded_text[31:32] 'で' --> 'で'",
|
514 |
+
"replace text[37:39] --> decoded_text[36:37] 'が' --> 'が'",
|
515 |
+
"replace text[42:44] --> decoded_text[40:41] 'ず' --> 'ず'",
|
516 |
+
"replace text[52:54] --> decoded_text[49:50] 'だ' --> 'だ'",
|
517 |
+
"replace text[60:62] --> decoded_text[56:57] 'で' --> 'で'",
|
518 |
+
"replace text[71:73] --> decoded_text[66:67] 'で' --> 'で'",
|
519 |
+
"replace text[77:79] --> decoded_text[71:72] 'で' --> 'で'",
|
520 |
+
"replace text[101:103] --> decoded_text[94:95] 'で' --> 'で'"
|
521 |
+
],
|
522 |
+
"n_oov_chars": 0,
|
523 |
+
"oov_ratio": 0.0,
|
524 |
+
"oov_charset": "[]"
|
525 |
+
},
|
526 |
+
{
|
527 |
+
"text": "分からない場合は先生に聞けば、すぐ教えて下さるので早めに聞いたら良いと思います。",
|
528 |
+
"decoded_text": "分からない場合は先生に聞けば、すぐ教えて下さるので早めに聞いたら良いと思います。",
|
529 |
+
"diff": [
|
530 |
+
"replace text[13:15] --> decoded_text[13:14] 'ば' --> 'ば'",
|
531 |
+
"replace text[17:19] --> decoded_text[16:17] 'ぐ' --> 'ぐ'",
|
532 |
+
"replace text[26:28] --> decoded_text[24:25] 'で' --> 'で'"
|
533 |
+
],
|
534 |
+
"n_oov_chars": 0,
|
535 |
+
"oov_ratio": 0.0,
|
536 |
+
"oov_charset": "[]"
|
537 |
+
},
|
538 |
+
{
|
539 |
+
"text": "夏休みも学校へ行き、先生に教えていただきました。試験が終わってからは合格しているようにと祈っていました。合格と聞いて、とても嬉しかったです。これからは取得して終わりではなく、取得した資格をさらに活かせれるように勉強していきたいと思います。",
|
540 |
+
"decoded_text": "夏休みも学校へ行き、先生に教えていただきました。試験が終わってからは合格しているようにと祈っていました。合格と聞いて、とても嬉しかったです。これからは取得して終わりではなく、取得した資格をさらに活かせれるように勉強していきたいと思います。",
|
541 |
+
"diff": [
|
542 |
+
"replace text[18:20] --> decoded_text[18:19] 'だ' --> 'だ'",
|
543 |
+
"replace text[27:29] --> decoded_text[26:27] 'が' --> 'が'",
|
544 |
+
"replace text[69:71] --> decoded_text[67:68] 'で' --> 'で'",
|
545 |
+
"replace text[85:87] --> decoded_text[82:83] 'で' --> 'で'"
|
546 |
+
],
|
547 |
+
"n_oov_chars": 0,
|
548 |
+
"oov_ratio": 0.0,
|
549 |
+
"oov_charset": "[]"
|
550 |
+
},
|
551 |
+
{
|
552 |
+
"text": "自分の苦手な問題を1つでも無くし、どの問題が出題されても対応できるように勉強すれば大丈夫です!",
|
553 |
+
"decoded_text": "自分の苦手な問題を1つでも無くし、どの問題が出題されても対応できるように勉強すれば大丈夫です!",
|
554 |
+
"diff": [
|
555 |
+
"replace text[11:13] --> decoded_text[11:12] 'で' --> 'で'",
|
556 |
+
"replace text[18:20] --> decoded_text[17:18] 'ど' --> 'ど'",
|
557 |
+
"replace text[23:25] --> decoded_text[21:22] 'が' --> 'が'",
|
558 |
+
"replace text[33:35] --> decoded_text[30:31] 'で' --> 'で'",
|
559 |
+
"replace text[44:46] --> decoded_text[40:41] 'ば' --> 'ば'",
|
560 |
+
"replace text[49:51] --> decoded_text[44:45] 'で' --> 'で'"
|
561 |
+
],
|
562 |
+
"n_oov_chars": 0,
|
563 |
+
"oov_ratio": 0.0,
|
564 |
+
"oov_charset": "[]"
|
565 |
+
},
|
566 |
+
{
|
567 |
+
"text": "私は計算が苦手なので、マンツーマンで先生に教えていただき、頑張りました。資格取得を目標に専門学校に入学したので合格通知が届いたときは本当に嬉しかったです。さらに次の資格取得に向け頑張りたいと思います。",
|
568 |
+
"decoded_text": "私は計算が苦手なので、マンツーマンで先生に教えていただき、頑張りました。資格取得を目標に専門学校に入学したので合格通知が届いたときは本当に嬉しかったです。さらに次の資格取得に向け頑張りたいと思います。",
|
569 |
+
"diff": [
|
570 |
+
"replace text[4:6] --> decoded_text[4:5] 'が' --> 'が'",
|
571 |
+
"replace text[10:12] --> decoded_text[9:10] 'で' --> 'で'",
|
572 |
+
"replace text[19:21] --> decoded_text[17:18] 'で' --> 'で'",
|
573 |
+
"replace text[29:31] --> decoded_text[26:27] 'だ' --> 'だ'",
|
574 |
+
"replace text[58:60] --> decoded_text[54:55] 'で' --> 'で'",
|
575 |
+
"replace text[64:66] --> decoded_text[59:60] 'が' --> 'が'",
|
576 |
+
"replace text[80:82] --> decoded_text[74:75] 'で' --> 'で'"
|
577 |
+
],
|
578 |
+
"n_oov_chars": 0,
|
579 |
+
"oov_ratio": 0.0,
|
580 |
+
"oov_charset": "[]"
|
581 |
+
},
|
582 |
+
{
|
583 |
+
"text": "得意分野より苦手分野を克服することで合格に近づけると思います。最後まで諦めずに頑張ってください。",
|
584 |
+
"decoded_text": "得意分野より苦手分野を克服することで合格に近づけると���います。最後まで諦めずに頑張ってください。",
|
585 |
+
"diff": [
|
586 |
+
"replace text[17:19] --> decoded_text[17:18] 'で' --> 'で'",
|
587 |
+
"replace text[23:25] --> decoded_text[22:23] 'づ' --> 'づ'",
|
588 |
+
"replace text[36:38] --> decoded_text[34:35] 'で' --> 'で'",
|
589 |
+
"replace text[40:42] --> decoded_text[37:38] 'ず' --> 'ず'",
|
590 |
+
"replace text[48:50] --> decoded_text[44:45] 'だ' --> 'だ'"
|
591 |
+
],
|
592 |
+
"n_oov_chars": 0,
|
593 |
+
"oov_ratio": 0.0,
|
594 |
+
"oov_charset": "[]"
|
595 |
+
},
|
596 |
+
{
|
597 |
+
"text": "実技が苦手だったので、学校に放課後も残って頑張りました。復習も必ずしました。この資格を仕事で活かせるように頑張りたいと思います。、これからも、まだまだ他の資格にも挑戦して行きます。",
|
598 |
+
"decoded_text": "実技が苦手だったので、学校に放課後も残って頑張りました。復習も必ずしました。この資格を仕事で活かせるように頑張りたいと思います。、これからも、まだまだ他の資格にも挑戦して行きます。",
|
599 |
+
"diff": [
|
600 |
+
"replace text[2:4] --> decoded_text[2:3] 'が' --> 'が'",
|
601 |
+
"replace text[6:8] --> decoded_text[5:6] 'だ' --> 'だ'",
|
602 |
+
"replace text[11:13] --> decoded_text[9:10] 'で' --> 'で'",
|
603 |
+
"replace text[35:37] --> decoded_text[32:33] 'ず' --> 'ず'",
|
604 |
+
"replace text[49:51] --> decoded_text[45:46] 'で' --> 'で'",
|
605 |
+
"replace text[77:79] --> decoded_text[72:73] 'だ' --> 'だ'",
|
606 |
+
"replace text[80:82] --> decoded_text[74:75] 'だ' --> 'だ'"
|
607 |
+
],
|
608 |
+
"n_oov_chars": 0,
|
609 |
+
"oov_ratio": 0.0,
|
610 |
+
"oov_charset": "[]"
|
611 |
+
},
|
612 |
+
{
|
613 |
+
"text": "自分の苦手なことで壁にぶつかっても、諦めずに頑張ってください。後悔だけはしないように、やるからには全力で挑んでください。",
|
614 |
+
"decoded_text": "自分の苦手なことで壁にぶつかっても、諦めずに頑張ってください。後悔だけはしないように、やるからには全力で挑んでください。",
|
615 |
+
"diff": [
|
616 |
+
"replace text[8:10] --> decoded_text[8:9] 'で' --> 'で'",
|
617 |
+
"replace text[12:14] --> decoded_text[11:12] 'ぶ' --> 'ぶ'",
|
618 |
+
"replace text[22:24] --> decoded_text[20:21] 'ず' --> 'ず'",
|
619 |
+
"replace text[30:32] --> decoded_text[27:28] 'だ' --> 'だ'",
|
620 |
+
"replace text[37:39] --> decoded_text[33:34] 'だ' --> 'だ'",
|
621 |
+
"replace text[56:58] --> decoded_text[51:52] 'で' --> 'で'",
|
622 |
+
"replace text[60:62] --> decoded_text[54:55] 'で' --> 'で'",
|
623 |
+
"replace text[63:65] --> decoded_text[56:57] 'だ' --> 'だ'"
|
624 |
+
],
|
625 |
+
"n_oov_chars": 0,
|
626 |
+
"oov_ratio": 0.0,
|
627 |
+
"oov_charset": "[]"
|
628 |
+
},
|
629 |
+
{
|
630 |
+
"text": "前回正解率の低かったアルゴリズムや表計算を重点的に復習しました。本番では諦めずに問題を読み続けました。大学編入しても、情報処理技術者試験の勉強は続けていき、次は応用情報技術者試験にもチャレンジしていきたいです。",
|
631 |
+
"decoded_text": "前回正解率の低かったアルゴリズムや表計算を重点的に復習しました。本番では諦めずに問題を読み続けました。大学編入しても、情報処理技術者試験の勉強は続けていき、次は応用情報技術者試験にもチャレンジしていきたいです。",
|
632 |
+
"diff": [
|
633 |
+
"replace text[12:14] --> decoded_text[12:13] 'ゴ' --> 'ゴ'",
|
634 |
+
"replace text[15:17] --> decoded_text[14:15] 'ズ' --> 'ズ'",
|
635 |
+
"replace text[36:38] --> decoded_text[34:35] 'で' --> 'で'",
|
636 |
+
"replace text[41:43] --> decoded_text[38:39] 'ず' --> 'ず'",
|
637 |
+
"replace text[99:101] --> decoded_text[95:96] 'ジ' --> 'ジ'",
|
638 |
+
"replace text[107:109] --> decoded_text[102:103] 'で' --> 'で'"
|
639 |
+
],
|
640 |
+
"n_oov_chars": 0,
|
641 |
+
"oov_ratio": 0.0,
|
642 |
+
"oov_charset": "[]"
|
643 |
+
},
|
644 |
+
{
|
645 |
+
"text": "普段の授業をしっかり聞くことと、試験中は諦めない気持ちがあれば大丈夫です。",
|
646 |
+
"decoded_text": "普段の授業をしっかり聞くことと、試験中は諦めない気持ちがあれば大丈夫です。",
|
647 |
+
"diff": [
|
648 |
+
"replace text[27:29] --> decoded_text[27:28] 'が' --> 'が'",
|
649 |
+
"replace text[31:33] --> decoded_text[30:31] 'ば' --> 'ば'",
|
650 |
+
"replace text[36:38] --> decoded_text[34:35] 'で' --> 'で'"
|
651 |
+
],
|
652 |
+
"n_oov_chars": 0,
|
653 |
+
"oov_ratio": 0.0,
|
654 |
+
"oov_charset": "[]"
|
655 |
+
},
|
656 |
+
{
|
657 |
+
"text": "この資格が関係する、舞台現場でアルバイトをしていました。結果にはあまり自信がなかったのですが、合格通知が届いた時は何回も見直し、とても嬉しかったて��す。",
|
658 |
+
"decoded_text": "この資格が関係する、舞台現場でアルバイトをしていました。結果にはあまり自信がなかったのですが、合格通知が届いた時は何回も見直し、とても嬉しかったです。",
|
659 |
+
"diff": [
|
660 |
+
"replace text[4:6] --> decoded_text[4:5] 'が' --> 'が'",
|
661 |
+
"replace text[15:17] --> decoded_text[14:15] 'で' --> 'で'",
|
662 |
+
"replace text[19:21] --> decoded_text[17:18] 'バ' --> 'バ'",
|
663 |
+
"replace text[40:42] --> decoded_text[37:38] 'が' --> 'が'",
|
664 |
+
"replace text[47:49] --> decoded_text[43:44] 'で' --> 'で'",
|
665 |
+
"replace text[50:52] --> decoded_text[45:46] 'が' --> 'が'",
|
666 |
+
"replace text[57:59] --> decoded_text[51:52] 'が' --> 'が'",
|
667 |
+
"replace text[79:81] --> decoded_text[72:73] 'で' --> 'で'"
|
668 |
+
],
|
669 |
+
"n_oov_chars": 0,
|
670 |
+
"oov_ratio": 0.0,
|
671 |
+
"oov_charset": "[]"
|
672 |
+
},
|
673 |
+
{
|
674 |
+
"text": "資格対策授業で分からないところは積極的に質問しました。また復習はかかさずしました。資格を取得したからといって、勉強したことを忘れたら意味がありませんので、資格に恥じないよう に今後も頑張りたいと思います。",
|
675 |
+
"decoded_text": "資格対策授業で分からないところは積極的に質問しました。また復習はかかさずしました。資格を取得したからといって、勉強したことを忘れたら意味がありませんので、資格に恥じないよう に今後も頑張りたいと思います。",
|
676 |
+
"diff": [
|
677 |
+
"replace text[6:8] --> decoded_text[6:7] 'で' --> 'で'",
|
678 |
+
"replace text[36:38] --> decoded_text[35:36] 'ず' --> 'ず'",
|
679 |
+
"replace text[70:72] --> decoded_text[68:69] 'が' --> 'が'",
|
680 |
+
"replace text[78:80] --> decoded_text[75:76] 'で' --> 'で'",
|
681 |
+
"replace text[85:87] --> decoded_text[81:82] 'じ' --> 'じ'"
|
682 |
+
],
|
683 |
+
"n_oov_chars": 0,
|
684 |
+
"oov_ratio": 0.0,
|
685 |
+
"oov_charset": "[]"
|
686 |
+
},
|
687 |
+
{
|
688 |
+
"text": "試験を想定して取り組みました。同じ試験を受けるクラスメイトに分からない所を聞きました。国家資格なので、持っていて損はないと思います。",
|
689 |
+
"decoded_text": "試験を想定して取り組みました。同じ試験を受けるクラスメイトに分からない所を聞きました。国家資格なので、持っていて損はないと思います。",
|
690 |
+
"diff": [
|
691 |
+
"replace text[16:18] --> decoded_text[16:17] 'じ' --> 'じ'",
|
692 |
+
"replace text[50:52] --> decoded_text[49:50] 'で' --> 'で'"
|
693 |
+
],
|
694 |
+
"n_oov_chars": 0,
|
695 |
+
"oov_ratio": 0.0,
|
696 |
+
"oov_charset": "[]"
|
697 |
+
},
|
698 |
+
{
|
699 |
+
"text": "何度も、学科、実技、判断(旧要素)の対策をして合格を目指して頑張ってください。",
|
700 |
+
"decoded_text": "何度も、学科、実技、判断(旧要素)の対策をして合格を目指して頑張ってください。",
|
701 |
+
"diff": [
|
702 |
+
"replace text[35:37] --> decoded_text[35:36] 'だ' --> 'だ'"
|
703 |
+
],
|
704 |
+
"n_oov_chars": 0,
|
705 |
+
"oov_ratio": 0.0,
|
706 |
+
"oov_charset": "[]"
|
707 |
+
},
|
708 |
+
{
|
709 |
+
"text": "自分の力を信じて落ち着いて望んでください。練習が実を結ぶはずです。",
|
710 |
+
"decoded_text": "自分の力を信じて落ち着いて望んでください。練習が実を結ぶはずです。",
|
711 |
+
"diff": [
|
712 |
+
"replace text[6:8] --> decoded_text[6:7] 'じ' --> 'じ'",
|
713 |
+
"replace text[16:18] --> decoded_text[15:16] 'で' --> 'で'",
|
714 |
+
"replace text[19:21] --> decoded_text[17:18] 'だ' --> 'だ'",
|
715 |
+
"replace text[26:28] --> decoded_text[23:24] 'が' --> 'が'",
|
716 |
+
"replace text[31:33] --> decoded_text[27:28] 'ぶ' --> 'ぶ'",
|
717 |
+
"replace text[34:38] --> decoded_text[29:31] 'ずで' --> 'ずで'"
|
718 |
+
],
|
719 |
+
"n_oov_chars": 0,
|
720 |
+
"oov_ratio": 0.0,
|
721 |
+
"oov_charset": "[]"
|
722 |
+
},
|
723 |
+
{
|
724 |
+
"text": "分からない所は、先生に質問して勉強すれば大丈夫です。私は納得するまで解説してくださった先生に感謝しています。",
|
725 |
+
"decoded_text": "分からない所は、先生に質問して勉強すれば大丈夫です。私は納得するまで解説してくださった先生に感謝しています。",
|
726 |
+
"diff": [
|
727 |
+
"replace text[19:21] --> decoded_text[19:20] 'ば' --> 'ば'",
|
728 |
+
"replace text[24:26] --> decoded_text[23:24] 'で' --> 'で'",
|
729 |
+
"replace text[35:37] --> decoded_text[33:34] 'で' --> 'で'",
|
730 |
+
"replace text[42:44] --> decoded_text[39:40] 'だ' --> 'だ'"
|
731 |
+
],
|
732 |
+
"n_oov_chars": 0,
|
733 |
+
"oov_ratio": 0.0,
|
734 |
+
"oov_charset": "[]"
|
735 |
+
},
|
736 |
+
{
|
737 |
+
"text": "授業で分からない所は先生や友達に聞きました。問題はまず自分で解いて分からない用語は、本やネ��トで調べました。試験には合格しましたが、まだ分からないことが多いので、資格で勉強した知識をもっと深めていきたいです。",
|
738 |
+
"decoded_text": "授業で分からない所は先生や友達に聞きました。問題はまず自分で解いて分からない用語は、本やネットで調べました。試験には合格しましたが、まだ分からないことが多いので、資格で勉強した知識をもっと深めていきたいです。",
|
739 |
+
"diff": [
|
740 |
+
"replace text[2:4] --> decoded_text[2:3] 'で' --> 'で'",
|
741 |
+
"replace text[27:29] --> decoded_text[26:27] 'ず' --> 'ず'",
|
742 |
+
"replace text[31:33] --> decoded_text[29:30] 'で' --> 'で'",
|
743 |
+
"replace text[50:52] --> decoded_text[47:48] 'で' --> 'で'",
|
744 |
+
"replace text[53:55] --> decoded_text[49:50] 'べ' --> 'べ'",
|
745 |
+
"replace text[69:71] --> decoded_text[64:65] 'が' --> 'が'",
|
746 |
+
"replace text[73:75] --> decoded_text[67:68] 'だ' --> 'だ'",
|
747 |
+
"replace text[82:84] --> decoded_text[75:76] 'が' --> 'が'",
|
748 |
+
"replace text[87:89] --> decoded_text[79:80] 'で' --> 'で'",
|
749 |
+
"replace text[92:94] --> decoded_text[83:84] 'で' --> 'で'",
|
750 |
+
"replace text[111:113] --> decoded_text[101:102] 'で' --> 'で'"
|
751 |
+
],
|
752 |
+
"n_oov_chars": 0,
|
753 |
+
"oov_ratio": 0.0,
|
754 |
+
"oov_charset": "[]"
|
755 |
+
},
|
756 |
+
{
|
757 |
+
"text": "私は勉強が苦手でしたが、毎日勉強してAランクを取得できました。諦めないでください。",
|
758 |
+
"decoded_text": "私は勉強が苦手でしたが、毎日勉強してAランクを取得できました。諦めないでください。",
|
759 |
+
"diff": [
|
760 |
+
"replace text[4:6] --> decoded_text[4:5] 'が' --> 'が'",
|
761 |
+
"replace text[8:10] --> decoded_text[7:8] 'で' --> 'で'",
|
762 |
+
"replace text[12:14] --> decoded_text[10:11] 'が' --> 'が'",
|
763 |
+
"replace text[38:40] --> decoded_text[35:36] 'で' --> 'で'",
|
764 |
+
"replace text[41:43] --> decoded_text[37:38] 'だ' --> 'だ'"
|
765 |
+
],
|
766 |
+
"n_oov_chars": 0,
|
767 |
+
"oov_ratio": 0.0,
|
768 |
+
"oov_charset": "[]"
|
769 |
+
},
|
770 |
+
{
|
771 |
+
"text": "受かりたい気持ちがあれば、自然と頑張れます! 最後まで諦めずに頑張ってください!",
|
772 |
+
"decoded_text": "受かりたい気持ちがあれば、自然と頑張れます! 最後まで諦めずに頑張ってください!",
|
773 |
+
"diff": [
|
774 |
+
"replace text[8:10] --> decoded_text[8:9] 'が' --> 'が'",
|
775 |
+
"replace text[12:14] --> decoded_text[11:12] 'ば' --> 'ば'",
|
776 |
+
"replace text[28:30] --> decoded_text[26:27] 'で' --> 'で'",
|
777 |
+
"replace text[32:34] --> decoded_text[29:30] 'ず' --> 'ず'",
|
778 |
+
"replace text[40:42] --> decoded_text[36:37] 'だ' --> 'だ'"
|
779 |
+
],
|
780 |
+
"n_oov_chars": 0,
|
781 |
+
"oov_ratio": 0.0,
|
782 |
+
"oov_charset": "[]"
|
783 |
+
},
|
784 |
+
{
|
785 |
+
"text": "対策授業を真面目に受けました。分からない所は、先生に質問し自分が納得するまで勉強しました。合格をいただいて、本当に嬉しかったです。",
|
786 |
+
"decoded_text": "対策授業を真面目に受けました。分からない所は、先生に質問し自分が納得するまで勉強しました。合格をいただいて、本当に嬉しかったです。",
|
787 |
+
"diff": [
|
788 |
+
"replace text[31:33] --> decoded_text[31:32] 'が' --> 'が'",
|
789 |
+
"replace text[38:40] --> decoded_text[37:38] 'で' --> 'で'",
|
790 |
+
"replace text[52:54] --> decoded_text[50:51] 'だ' --> 'だ'",
|
791 |
+
"replace text[65:67] --> decoded_text[62:63] 'で' --> 'で'"
|
792 |
+
],
|
793 |
+
"n_oov_chars": 0,
|
794 |
+
"oov_ratio": 0.0,
|
795 |
+
"oov_charset": "[]"
|
796 |
+
},
|
797 |
+
{
|
798 |
+
"text": "授業を真面目に聞いていたら、必ず合格できます! ここには専門の先生がいるので一人一人に丁寧に教えてくれますよ。",
|
799 |
+
"decoded_text": "授業を真面目に聞いていたら、必ず合格できます! ここには専門の先生がいるので一人一人に丁寧に教えてくれますよ。",
|
800 |
+
"diff": [
|
801 |
+
"replace text[15:17] --> decoded_text[15:16] 'ず' --> 'ず'",
|
802 |
+
"replace text[19:21] --> decoded_text[18:19] 'で' --> 'で'",
|
803 |
+
"replace text[35:37] --> decoded_text[33:34] 'が' --> 'が'",
|
804 |
+
"replace text[40:42] --> decoded_text[37:38] 'で' --> 'で'"
|
805 |
+
],
|
806 |
+
"n_oov_chars": 0,
|
807 |
+
"oov_ratio": 0.0,
|
808 |
+
"oov_charset": "[]"
|
809 |
+
},
|
810 |
+
{
|
811 |
+
"text": "弱点を無くすために、間違えたところは、何回も復習をしました。合格して本当に嬉しかったです。この資格を就職でも活かしたいと思います。",
|
812 |
+
"decoded_text": "弱点を無くすために、間違えたところは、何回も復習をしました。合格して本当に嬉しかったです。���の資格を就職でも活かしたいと思います。",
|
813 |
+
"diff": [
|
814 |
+
"replace text[42:44] --> decoded_text[42:43] 'で' --> 'で'",
|
815 |
+
"replace text[53:55] --> decoded_text[52:53] 'で' --> 'で'"
|
816 |
+
],
|
817 |
+
"n_oov_chars": 0,
|
818 |
+
"oov_ratio": 0.0,
|
819 |
+
"oov_charset": "[]"
|
820 |
+
},
|
821 |
+
{
|
822 |
+
"text": "受かりたい気持ちがあればきっと合格できます! 分からない所は、諦めずに先生に質問して克服してください!",
|
823 |
+
"decoded_text": "受かりたい気持ちがあればきっと合格できます! 分からない所は、諦めずに先生に質問して克服してください!",
|
824 |
+
"diff": [
|
825 |
+
"replace text[8:10] --> decoded_text[8:9] 'が' --> 'が'",
|
826 |
+
"replace text[12:14] --> decoded_text[11:12] 'ば' --> 'ば'",
|
827 |
+
"replace text[19:21] --> decoded_text[17:18] 'で' --> 'で'",
|
828 |
+
"replace text[36:38] --> decoded_text[33:34] 'ず' --> 'ず'",
|
829 |
+
"replace text[51:53] --> decoded_text[47:48] 'だ' --> 'だ'"
|
830 |
+
],
|
831 |
+
"n_oov_chars": 0,
|
832 |
+
"oov_ratio": 0.0,
|
833 |
+
"oov_charset": "[]"
|
834 |
+
},
|
835 |
+
{
|
836 |
+
"text": "問題集を何回も解くことが一番大切です。また電車通学なので通学中にも勉強しました。資格を取得できたので、今後の就職活動に役立てたいと思います。また将来の夢のために、この資格を有効に活用したいと思います。",
|
837 |
+
"decoded_text": "問題集を何回も解くことが一番大切です。また電車通学なので通学中にも勉強しました。資格を取得できたので、今後の就職活動に役立てたいと思います。また将来の夢のために、この資格を有効に活用したいと思います。",
|
838 |
+
"diff": [
|
839 |
+
"replace text[11:13] --> decoded_text[11:12] 'が' --> 'が'",
|
840 |
+
"replace text[17:19] --> decoded_text[16:17] 'で' --> 'で'",
|
841 |
+
"replace text[29:31] --> decoded_text[27:28] 'で' --> 'で'",
|
842 |
+
"replace text[48:50] --> decoded_text[45:46] 'で' --> 'で'",
|
843 |
+
"replace text[53:55] --> decoded_text[49:50] 'で' --> 'で'"
|
844 |
+
],
|
845 |
+
"n_oov_chars": 0,
|
846 |
+
"oov_ratio": 0.0,
|
847 |
+
"oov_charset": "[]"
|
848 |
+
},
|
849 |
+
{
|
850 |
+
"text": "授業で手厚い資格対策をしてくださったので、それを元に復習しました。資格を取得することも、もちろんですが、その資格で勉強した事をきちんと身につけ、今後に役立てていきたいです。",
|
851 |
+
"decoded_text": "授業で手厚い資格対策をしてくださったので、それを元に復習しました。資格を取得することも、もちろんですが、その資格で勉強した事をきちんと身につけ、今後に役立てていきたいです。",
|
852 |
+
"diff": [
|
853 |
+
"replace text[2:4] --> decoded_text[2:3] 'で' --> 'で'",
|
854 |
+
"replace text[15:17] --> decoded_text[14:15] 'だ' --> 'だ'",
|
855 |
+
"replace text[21:23] --> decoded_text[19:20] 'で' --> 'で'",
|
856 |
+
"replace text[51:53] --> decoded_text[48:49] 'で' --> 'で'",
|
857 |
+
"replace text[54:56] --> decoded_text[50:51] 'が' --> 'が'",
|
858 |
+
"replace text[61:63] --> decoded_text[56:57] 'で' --> 'で'",
|
859 |
+
"replace text[89:91] --> decoded_text[83:84] 'で' --> 'で'"
|
860 |
+
],
|
861 |
+
"n_oov_chars": 0,
|
862 |
+
"oov_ratio": 0.0,
|
863 |
+
"oov_charset": "[]"
|
864 |
+
},
|
865 |
+
{
|
866 |
+
"text": "自分が勉強をした分だけ結果はついてきます。自分の夢を後押ししてくれる資格は絶対持ってるべきです。自分の夢の為に頑張りましょう。",
|
867 |
+
"decoded_text": "自分が勉強をした分だけ結果はついてきます。自分の夢を後押ししてくれる資格は絶対持ってるべきです。自分の夢の為に頑張りましょう。",
|
868 |
+
"diff": [
|
869 |
+
"replace text[2:4] --> decoded_text[2:3] 'が' --> 'が'",
|
870 |
+
"replace text[10:12] --> decoded_text[9:10] 'だ' --> 'だ'",
|
871 |
+
"replace text[45:47] --> decoded_text[43:44] 'べ' --> 'べ'",
|
872 |
+
"replace text[48:50] --> decoded_text[45:46] 'で' --> 'で'"
|
873 |
+
],
|
874 |
+
"n_oov_chars": 0,
|
875 |
+
"oov_ratio": 0.0,
|
876 |
+
"oov_charset": "[]"
|
877 |
+
},
|
878 |
+
{
|
879 |
+
"text": "対策授業には積極的に参加し、真面目に勉強しました。将来の事を考えたとき、この資格は絶対取得すべきだと思い、受験しました。資格試験の勉強は、自分にプラスになったと思います。",
|
880 |
+
"decoded_text": "対策授業には積極的に参加し、真面目に勉強しました。将来の事を考えたとき、この資格は絶対取得すべきだと思い、受験しました。資格試験の勉強は、自分にプラスになったと思います。",
|
881 |
+
"diff": [
|
882 |
+
"replace text[46:48] --> decoded_text[46:47] 'べ' --> 'べ'",
|
883 |
+
"replace text[49:51] --> decoded_text[48:49] 'だ' --> 'だ'",
|
884 |
+
"replace text[74:76] --> decoded_text[72:73] 'プ' --> 'プ'"
|
885 |
+
],
|
886 |
+
"n_oov_chars": 0,
|
887 |
+
"oov_ratio": 0.0,
|
888 |
+
"oov_charset": "[]"
|
889 |
+
},
|
890 |
+
{
|
891 |
+
"text": "対策用の教科書を繰り返し解いて覚えました。合格をいただいて本当に嬉しかったです。頑張った結果だったので自分に自信がつきました。",
|
892 |
+
"decoded_text": "対策用の教科書を繰り返し解いて覚えました。合格をいただいて本当に嬉しかったです。頑張った結果だったので自分に自信がつきました。",
|
893 |
+
"diff": [
|
894 |
+
"replace text[26:28] --> decoded_text[26:27] 'だ' --> 'だ'",
|
895 |
+
"replace text[38:40] --> decoded_text[37:38] 'で' --> 'で'",
|
896 |
+
"replace text[48:50] --> decoded_text[46:47] 'だ' --> 'だ'",
|
897 |
+
"replace text[53:55] --> decoded_text[50:51] 'で' --> 'で'",
|
898 |
+
"replace text[60:62] --> decoded_text[56:57] 'が' --> 'が'"
|
899 |
+
],
|
900 |
+
"n_oov_chars": 0,
|
901 |
+
"oov_ratio": 0.0,
|
902 |
+
"oov_charset": "[]"
|
903 |
+
},
|
904 |
+
{
|
905 |
+
"text": "この試験に限らず、どの資格を勉強するにもまず、繰り返し問題を解いて覚えることが大事です。そうする事で問題にも慣れ解けるようになります。",
|
906 |
+
"decoded_text": "この試験に限らず、どの資格を勉強するにもまず、繰り返し問題を解いて覚えることが大事です。そうする事で問題にも慣れ解けるようになります。",
|
907 |
+
"diff": [
|
908 |
+
"replace text[9:11] --> decoded_text[9:10] 'ど' --> 'ど'",
|
909 |
+
"replace text[22:24] --> decoded_text[21:22] 'ず' --> 'ず'",
|
910 |
+
"replace text[40:42] --> decoded_text[38:39] 'が' --> 'が'",
|
911 |
+
"replace text[44:46] --> decoded_text[41:42] 'で' --> 'で'",
|
912 |
+
"replace text[53:55] --> decoded_text[49:50] 'で' --> 'で'"
|
913 |
+
],
|
914 |
+
"n_oov_chars": 0,
|
915 |
+
"oov_ratio": 0.0,
|
916 |
+
"oov_charset": "[]"
|
917 |
+
},
|
918 |
+
{
|
919 |
+
"text": "練習で間違えた問題は2回、3回と繰り返し解きました。やっぱり一番の目的は就職です。就職する際に資格を持っていると有利になるので、合格して本当に良かったです。",
|
920 |
+
"decoded_text": "練習で間違えた問題は2回、3回と繰り返し解きました。やっぱり一番の目的は就職です。就職する際に資格を持っていると有利になるので、合格して本当に良かったです。",
|
921 |
+
"diff": [
|
922 |
+
"replace text[28:30] --> decoded_text[28:29] 'ぱ' --> 'ぱ'",
|
923 |
+
"replace text[39:41] --> decoded_text[38:39] 'で' --> 'で'",
|
924 |
+
"replace text[64:66] --> decoded_text[62:63] 'で' --> 'で'",
|
925 |
+
"replace text[78:80] --> decoded_text[75:76] 'で' --> 'で'"
|
926 |
+
],
|
927 |
+
"n_oov_chars": 0,
|
928 |
+
"oov_ratio": 0.0,
|
929 |
+
"oov_charset": "[]"
|
930 |
+
},
|
931 |
+
{
|
932 |
+
"text": "やはり資格がないと就職は難しいと思います。取得できるかどうか不安はあると思いますが、絶対受かっているという意思を持って頑張ってください。また、対策授業には積極的に参加してください。",
|
933 |
+
"decoded_text": "やはり資格がないと就職は難しいと思います。取得できるかどうか不安はあると思いますが、絶対受かっているという意思を持って頑張ってください。また、対策授業には積極的に参加してください。",
|
934 |
+
"diff": [
|
935 |
+
"replace text[5:7] --> decoded_text[5:6] 'が' --> 'が'",
|
936 |
+
"replace text[24:26] --> decoded_text[23:24] 'で' --> 'で'",
|
937 |
+
"replace text[29:31] --> decoded_text[27:28] 'ど' --> 'ど'",
|
938 |
+
"replace text[67:69] --> decoded_text[64:65] 'だ' --> 'だ'",
|
939 |
+
"replace text[90:92] --> decoded_text[86:87] 'だ' --> 'だ'"
|
940 |
+
],
|
941 |
+
"n_oov_chars": 0,
|
942 |
+
"oov_ratio": 0.0,
|
943 |
+
"oov_charset": "[]"
|
944 |
+
},
|
945 |
+
{
|
946 |
+
"text": "試験範囲は広いですが、出題は同じようなパターンが多いので、繰り返し過去問を解いていれば合格できますよ。",
|
947 |
+
"decoded_text": "試験範囲は広いですが、出題は同じようなパターンが多いので、繰り返し過去問を解いていれば合格できますよ。",
|
948 |
+
"diff": [
|
949 |
+
"replace text[7:9] --> decoded_text[7:8] 'で' --> 'で'",
|
950 |
+
"replace text[10:12] --> decoded_text[9:10] 'が' --> 'が'",
|
951 |
+
"replace text[44:46] --> decoded_text[42:43] 'ば' --> 'ば'",
|
952 |
+
"replace text[48:50] --> decoded_text[45:46] 'で' --> 'で'"
|
953 |
+
],
|
954 |
+
"n_oov_chars": 0,
|
955 |
+
"oov_ratio": 0.0,
|
956 |
+
"oov_charset": "[]"
|
957 |
+
},
|
958 |
+
{
|
959 |
+
"text": "通学時の電車の中でひたすら参考書を読み解き、最後の最後まで一生懸命取り組みました。また今までの授業をしっかり聞いていたので、分からないところも��く、無事合格できました。",
|
960 |
+
"decoded_text": "通学時の電車の中でひたすら参考書を読み解き、最後の最後まで一生懸命取り組みました。また今までの授業をしっかり聞いていたので、分からないところも無く、無事合格できました。",
|
961 |
+
"diff": [
|
962 |
+
"replace text[8:10] --> decoded_text[8:9] 'で' --> 'で'",
|
963 |
+
"replace text[29:31] --> decoded_text[28:29] 'で' --> 'で'",
|
964 |
+
"replace text[47:49] --> decoded_text[45:46] 'で' --> 'で'",
|
965 |
+
"replace text[63:65] --> decoded_text[60:61] 'で' --> 'で'",
|
966 |
+
"replace text[82:84] --> decoded_text[78:79] 'で' --> 'で'"
|
967 |
+
],
|
968 |
+
"n_oov_chars": 0,
|
969 |
+
"oov_ratio": 0.0,
|
970 |
+
"oov_charset": "[]"
|
971 |
+
},
|
972 |
+
{
|
973 |
+
"text": "数多くの資格に挑戦し、合格した時の達成感を自分で感じ取ってもらえればと思います。",
|
974 |
+
"decoded_text": "数多くの資格に挑戦し、合格した時の達成感を自分で感じ取ってもらえればと思います。",
|
975 |
+
"diff": [
|
976 |
+
"replace text[23:25] --> decoded_text[23:24] 'で' --> 'で'",
|
977 |
+
"replace text[26:28] --> decoded_text[25:26] 'じ' --> 'じ'",
|
978 |
+
"replace text[35:37] --> decoded_text[33:34] 'ば' --> 'ば'"
|
979 |
+
],
|
980 |
+
"n_oov_chars": 0,
|
981 |
+
"oov_ratio": 0.0,
|
982 |
+
"oov_charset": "[]"
|
983 |
+
},
|
984 |
+
{
|
985 |
+
"text": "学校の授業だけではなく、家でもプリントや過去問を何度も解いて勉強しました。資格を取ったらそこで終わりではなく、就職してからも役に立つ知識を身につける事ができたので、今後も活かしていこうと思います。",
|
986 |
+
"decoded_text": "学校の授業だけではなく、家でもプリントや過去問を何度も解いて勉強しました。資格を取ったらそこで終わりではなく、就職してからも役に立つ知識を身につける事ができたので、今後も活かしていこうと思います。",
|
987 |
+
"diff": [
|
988 |
+
"replace text[5:7] --> decoded_text[5:6] 'だ' --> 'だ'",
|
989 |
+
"replace text[8:10] --> decoded_text[7:8] 'で' --> 'で'",
|
990 |
+
"replace text[15:17] --> decoded_text[13:14] 'で' --> 'で'",
|
991 |
+
"replace text[18:20] --> decoded_text[15:16] 'プ' --> 'プ'",
|
992 |
+
"replace text[50:52] --> decoded_text[46:47] 'で' --> 'で'",
|
993 |
+
"replace text[55:57] --> decoded_text[50:51] 'で' --> 'で'",
|
994 |
+
"replace text[81:85] --> decoded_text[75:77] 'がで' --> 'がで'",
|
995 |
+
"replace text[88:90] --> decoded_text[80:81] 'で' --> 'で'"
|
996 |
+
],
|
997 |
+
"n_oov_chars": 0,
|
998 |
+
"oov_ratio": 0.0,
|
999 |
+
"oov_charset": "[]"
|
1000 |
+
},
|
1001 |
+
{
|
1002 |
+
"text": "頑張ったら合格できるので機会があれば、ぜひ資格はとっておくべきだと思います。",
|
1003 |
+
"decoded_text": "頑張ったら合格できるので機会があれば、ぜひ資格はとっておくべきだと思います。",
|
1004 |
+
"diff": [
|
1005 |
+
"replace text[7:9] --> decoded_text[7:8] 'で' --> 'で'",
|
1006 |
+
"replace text[12:14] --> decoded_text[11:12] 'で' --> 'で'",
|
1007 |
+
"replace text[16:18] --> decoded_text[14:15] 'が' --> 'が'",
|
1008 |
+
"replace text[20:22] --> decoded_text[17:18] 'ば' --> 'ば'",
|
1009 |
+
"replace text[23:25] --> decoded_text[19:20] 'ぜ' --> 'ぜ'",
|
1010 |
+
"replace text[34:36] --> decoded_text[29:30] 'べ' --> 'べ'",
|
1011 |
+
"replace text[37:39] --> decoded_text[31:32] 'だ' --> 'だ'"
|
1012 |
+
],
|
1013 |
+
"n_oov_chars": 0,
|
1014 |
+
"oov_ratio": 0.0,
|
1015 |
+
"oov_charset": "[]"
|
1016 |
+
},
|
1017 |
+
{
|
1018 |
+
"text": "この資格は音響関係の仕事で、どこに行っても活かしていけるので取得しました。 過去問は大事です。たくさんの過去問を解き、数式も出てくるので暗記だけではなく、公式も覚えるようにしました。",
|
1019 |
+
"decoded_text": "この資格は音響関係の仕事で、どこに行っても活かしていけるので取得しました。 過去問は大事です。たくさんの過去問を解き、数式も出てくるので暗記だけではなく、公式も覚えるようにしました。",
|
1020 |
+
"diff": [
|
1021 |
+
"replace text[12:14] --> decoded_text[12:13] 'で' --> 'で'",
|
1022 |
+
"replace text[15:17] --> decoded_text[14:15] 'ど' --> 'ど'",
|
1023 |
+
"replace text[31:33] --> decoded_text[29:30] 'で' --> 'で'",
|
1024 |
+
"replace text[47:49] --> decoded_text[44:45] 'で' --> 'で'",
|
1025 |
+
"replace text[71:73] --> decoded_text[67:68] 'で' --> 'で'",
|
1026 |
+
"replace text[75:77] --> decoded_text[70:71] 'だ' --> 'だ'",
|
1027 |
+
"replace text[78:80] --> decoded_text[72:73] 'で' --> 'で'"
|
1028 |
+
],
|
1029 |
+
"n_oov_chars": 0,
|
1030 |
+
"oov_ratio": 0.0,
|
1031 |
+
"oov_charset": "[]"
|
1032 |
+
},
|
1033 |
+
{
|
1034 |
+
"text": "資格を取りたいという気持ちがあれば��丈夫です。頑張ってください。",
|
1035 |
+
"decoded_text": "資格を取りたいという気持ちがあれば大丈夫です。頑張ってください。",
|
1036 |
+
"diff": [
|
1037 |
+
"replace text[13:15] --> decoded_text[13:14] 'が' --> 'が'",
|
1038 |
+
"replace text[17:19] --> decoded_text[16:17] 'ば' --> 'ば'",
|
1039 |
+
"replace text[22:24] --> decoded_text[20:21] 'で' --> 'で'",
|
1040 |
+
"replace text[31:33] --> decoded_text[28:29] 'だ' --> 'だ'"
|
1041 |
+
],
|
1042 |
+
"n_oov_chars": 0,
|
1043 |
+
"oov_ratio": 0.0,
|
1044 |
+
"oov_charset": "[]"
|
1045 |
+
}
|
1046 |
+
]
|
stats/compression_rate/Qwen.Qwen3-Embedding-0.6B @ cc100.ko.diff.json
ADDED
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "낙천적(樂天的) : 세상과 인생을 즐겁고 좋은 것으로 여기는. 또는 그런 것. 인생을 즐길 줄 안다는 건 정말 행복한 일 아닐까요? 심리적으로 자신감과 낙천적인 태도를 갖게 하며, 새로운 아이디어를 얻도록 도움을 주는 노란색의 예쁜벽을 오디오로 채우신 고객님댁을 소개드립니다. ..more",
|
4 |
+
"decoded_text": "낙천적(樂天的) : 세상과 인생을 즐겁고 좋은 것으로 여기는. 또는 그런 것. 인생을 즐길 줄 안다는 건 정말 행복한 일 아닐까요? 심리적으로 자신감과 낙천적인 태도를 갖게 하며, 새로운 아이디어를 얻도록 도움을 주는 노란색의 예쁜벽을 오디오로 채우신 고객님댁을 소개드립니다. ..more",
|
5 |
+
"diff": [
|
6 |
+
"replace text[4:5] --> decoded_text[4:5] '樂' --> '樂'"
|
7 |
+
],
|
8 |
+
"n_oov_chars": 1,
|
9 |
+
"oov_ratio": 0.006211180124223602,
|
10 |
+
"oov_charset": "[\"樂\"]"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"text": "명 태조는 포의(布衣)에서 몸을 일으켜 천하를 평정하였다. 천하 평정뒤에는 관리 등용시험에 경의(敬意)를 주로 하였으므로 문교(文敎)가 융성하게 되어 많은 학자를 배출시켰다. 그러나 당시의 학자는 대개 정주학(程朱學)을 신봉한 사람들 뿐이라 명대(明代)의 특색이 아직 나타나고 있지 않다. 그러므로 영락(永樂) 12년 호(胡)에게 명하여 「사서대전」,「오경대전」 을 찬술케 하였을때 어느 경(經)이나 모두 주자의 주(主)에 기본을 두고 있다. 이것에 의해서만도 명초(明初)의 학문이 주자학을 그대로 계승하였을뿐 독창성이 없었다는 것을 상상할 수 있다. 본론에서는 오강재(吳康齋) 사상에 대해 논하고 순수한 주자학자의 설선에 대한 내용을 먼저 언급한 뒤에 다시 오강재의 문인인 호거인(胡居仁), 진헌장(陳獻章), 누량(累諒)에 대해 알아보고자 한다.",
|
14 |
+
"decoded_text": "명 태조는 포의(布衣)에서 몸을 일으켜 천하를 평정하였다. 천하 평정뒤에는 관리 등용시험에 경의(敬意)를 주로 하였으므로 문교(文敎)가 융성하게 되어 많은 학자를 배출시켰다. 그러나 당시의 학자는 대개 정주학(程朱學)을 신봉한 사람들 뿐이라 명대(明代)의 특색이 아직 나타나고 있지 않다. 그러므로 영락(永樂) 12년 호(胡)에게 명하여 「사서대전」,「오경대전」 을 찬술케 하였을때 어느 경(經)이나 모두 주자의 주(主)에 기본을 두고 있다. 이것에 의해서만도 명초(明初)의 학문이 주자학을 그대로 계승하였을뿐 독창성이 없었다는 것을 상상할 수 있다. 본론에서는 오강재(吳康齋) 사상에 대해 논하고 순수한 주자학자의 설선에 대한 내용을 먼저 언급한 뒤에 다시 오강재의 문인인 호거인(胡居仁), 진헌장(陳獻章), 누량(累諒)에 대해 알아보고자 한다.",
|
15 |
+
"diff": [
|
16 |
+
"replace text[171:172] --> decoded_text[171:172] '樂' --> '樂'",
|
17 |
+
"replace text[401:402] --> decoded_text[401:402] '累' --> '累'"
|
18 |
+
],
|
19 |
+
"n_oov_chars": 2,
|
20 |
+
"oov_ratio": 0.004784688995215311,
|
21 |
+
"oov_charset": "[\"樂\", \"累\"]"
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"text": "역할을 수행해갔는지를 살펴보도록 하겠다. 1. 양명학 이전의 주자학 ... 과 육학(陸學) 1) 송대 주자학의 성립 데니스 트위쳇이『케임브리지 중국사 ... 독재적인 명 초기의 황제들{ 특히 홍무제의 경우 주자학을 체제교학으로",
|
25 |
+
"decoded_text": "역할을 수행해갔는지를 살펴보도록 하겠다. 1. 양명학 이전의 주자학 ... 과 육학(陸學) 1) 송대 주자학의 성립 데니스 트위쳇이『케임브리지 중국사 ... 독재적인 명 초기의 황제들{ 특히 홍무제의 경우 주자학을 체제교학으로",
|
26 |
+
"diff": [
|
27 |
+
"replace text[47:48] --> decoded_text[47:48] '陸' --> '陸'"
|
28 |
+
],
|
29 |
+
"n_oov_chars": 1,
|
30 |
+
"oov_ratio": 0.007936507936507936,
|
31 |
+
"oov_charset": "[\"陸\"]"
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"text": "확고한 지반을 굳혔다. 주자학이 명대에 와서 국가의 지도이념으로 확립되어 ... 양명학의 성립과 전개 1. 서론 朱子學(주자학)이 宋代(송대)의 학술 ... 기점으로 知行合一說(지행합일설), 致良知說(치양지설)을 속속 제출하여 주자학",
|
35 |
+
"decoded_text": "확고한 지반을 굳혔다. 주자학이 명대에 와서 국가의 지도이념으로 확립되어 ... 양명학의 성립과 전개 1. 서론 朱子學(주자학)이 宋代(송대)의 학술 ... 기점으로 知行合一說(지행합일설), 致良知說(치양지설)을 속속 제출하여 주자학",
|
36 |
+
"diff": [
|
37 |
+
"replace text[108:109] --> decoded_text[108:109] '良' --> '良'"
|
38 |
+
],
|
39 |
+
"n_oov_chars": 1,
|
40 |
+
"oov_ratio": 0.007692307692307693,
|
41 |
+
"oov_charset": "[\"良\"]"
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"text": "▷주자:이(理)�� 만물의 근원이 되는 이치이자, 기(氣)의 활동 근거인 반면 기(氣)는 만물을 구성하는 재료로서 사물을 낳는 도구이다",
|
45 |
+
"decoded_text": "▷주자:이(理)란 만물의 근원이 되는 이치이자, 기(氣)의 활동 근거인 반면 기(氣)는 만물을 구성하는 재료로서 사물을 낳는 도구이다",
|
46 |
+
"diff": [
|
47 |
+
"replace text[6:7] --> decoded_text[6:7] '理' --> '理'"
|
48 |
+
],
|
49 |
+
"n_oov_chars": 1,
|
50 |
+
"oov_ratio": 0.013513513513513514,
|
51 |
+
"oov_charset": "[\"理\"]"
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"text": "예술이라는 한자(漢字)에서 ‘예(藝)’에는 본디 ‘심는다(種 ·樹)’는 뜻이 있으며, 따라서 그것은‘기능(機能)’‘기술(技術)’을 의미하며 고대 동양에서 사대부가 필수적으로 갖추어야 했다. 육예(六藝:禮 ·樂 ·射 ·御 ·書 ·數)에서의 ‘예’는 인간적 결실을 얻기 위해 필요한 기초 교양의 씨를 뿌리고 인격의 꽃을 피우는 수단으로 여겼던 만큼 거기에는 인격도야의 의의도 있다고 하겠다.",
|
55 |
+
"decoded_text": "예술이라는 한자(漢字)에서 ‘예(藝)’에는 본디 ‘심는다(種 ·樹)’는 뜻이 있으며, 따라서 그것은‘기능(機能)’‘기술(技術)’을 의미하며 고대 동양에서 사대부가 필수적으로 갖추어야 했다. 육예(六藝:禮 ·樂 ·射 ·御 ·書 ·數)에서의 ‘예’는 인간적 결실을 얻기 위해 필요한 기초 교양의 씨를 뿌리고 인격의 꽃을 피우는 수단으로 여겼던 만큼 거기에는 인격도야의 의의도 있다고 하겠다.",
|
56 |
+
"diff": [
|
57 |
+
"replace text[115:116] --> decoded_text[115:116] '樂' --> '樂'"
|
58 |
+
],
|
59 |
+
"n_oov_chars": 1,
|
60 |
+
"oov_ratio": 0.004629629629629629,
|
61 |
+
"oov_charset": "[\"樂\"]"
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"text": "입지(立志)를 강조해 자경문, 성학집요, 격몽요결, 학교모범에서 항상 ... 이루어진다. 순언은 율곡이 도덕경 81장 중에서 유교 경전의 내용과 일치하며 ... 사상가 연구 - 율곡 이이 수강 과목 : 담당 교수 : 교수님 제출",
|
65 |
+
"decoded_text": "입지(立志)를 강조해 자경문, 성학집요, 격몽요결, 학교모범에서 항상 ... 이루어진다. 순언은 율곡이 도덕경 81장 중에서 유교 경전의 내용과 일치하며 ... 사상가 연구 - 율곡 이이 수강 과목 : 담당 교수 : 교수님 제출",
|
66 |
+
"diff": [
|
67 |
+
"replace text[3:4] --> decoded_text[3:4] '立' --> '立'"
|
68 |
+
],
|
69 |
+
"n_oov_chars": 1,
|
70 |
+
"oov_ratio": 0.007874015748031496,
|
71 |
+
"oov_charset": "[\"立\"]"
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"text": "Ⅰ. 김용과 중국 무협문학 1. 작가 소개 김용(金用)은 1924년 ... 것이다. 그러므로 무협소설 작가 김용(金用)은 언론인이자 정치가인 차량융의 ... 출간하기 시작하였다. 이렇게 보면 무협소설 작가인 김용(金用)은 언론인",
|
75 |
+
"decoded_text": "Ⅰ. 김용과 중국 무협문학 1. 작가 소개 김용(金用)은 1924년 ... 것이다. 그러므로 무협소설 작가 김용(金用)은 언론인이자 정치가인 차량융의 ... 출간하기 시작하였다. 이렇게 보면 무협소설 작가인 김용(金用)은 언론인",
|
76 |
+
"diff": [
|
77 |
+
"replace text[27:28] --> decoded_text[27:28] '金' --> '金'",
|
78 |
+
"replace text[63:64] --> decoded_text[63:64] '金' --> '金'",
|
79 |
+
"replace text[119:120] --> decoded_text[119:120] '金' --> '金'"
|
80 |
+
],
|
81 |
+
"n_oov_chars": 3,
|
82 |
+
"oov_ratio": 0.023622047244094488,
|
83 |
+
"oov_charset": "[\"金\"]"
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"text": "3 이 때 상제님께서 미처 말씀을 마치지 아니하셨는데 면장 양 모(梁某)와 이장이 세금을 받으러 오거늘",
|
87 |
+
"decoded_text": "3 이 때 상제님께서 미처 말씀을 마치지 아니하셨는데 면장 양 모(梁某)와 이장이 세금을 받으러 오거늘",
|
88 |
+
"diff": [
|
89 |
+
"replace text[37:38] --> decoded_text[37:38] '梁' --> '梁'"
|
90 |
+
],
|
91 |
+
"n_oov_chars": 1,
|
92 |
+
"oov_ratio": 0.017543859649122806,
|
93 |
+
"oov_charset": "[\"梁\"]"
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"text": "2 26일 새벽이 되자 백낙두(白樂斗)를 비롯하여 무장한 순검 수십 명이 공신의 집을 에워싸고 형렬과 자현 등 여러 사람을 결박한 뒤에 상제님의 처소를 묻거늘",
|
97 |
+
"decoded_text": "2 26일 새벽이 되자 백낙두(白樂斗)를 비롯하여 무장한 순검 수십 명이 공신의 집을 에워싸고 형렬과 자현 등 여러 사람을 결박한 뒤에 상제님의 처소를 묻거늘",
|
98 |
+
"diff": [
|
99 |
+
"replace text[18:19] --> decoded_text[18:19] '樂' --> '樂'"
|
100 |
+
],
|
101 |
+
"n_oov_chars": 1,
|
102 |
+
"oov_ratio": 0.011363636363636364,
|
103 |
+
"oov_charset": "[\"樂\"]"
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"text": "상제님이 진주공사에서 원래는 33명에게 새 세상 일꾼 기운을 붙여서 공사를 보시려고 했는데 12명이 변심을 해서 그 자리에서 빼 버렸다. 그래서 신경수 성도 집과 문공신 성도 집에 있던 21명의 성도들이 잡혀서 왔다. 지금 여기 다내(月乃)가 상제님이 수명소 공사 주인 신경수 성도 집에서 일본 경찰들한테 체포돼서 고부경찰서까지 잡혀 가는 40리 길의 딱 중간이야. 상제님이 오신 길이 그러면 어디냐? 정토칠봉의 끝자리 수금리水金里에서 배를 타고 정읍천을 건너서 이쪽 용두龍頭마을 방향으로 오신 거다. 여기 삼거리에 주막이 있었다. 여기서 상제님이 21명의 성도들과 순검들에게 “너희도 배가 고플 테니까 가져온 음식과 고기를 여기서 배불리 먹고 가자.” 하셨다. 그래서 상제님이 여기서 술 한 잔을 드시고 나서 갑자기 일어나셔서 외치셨다.",
|
107 |
+
"decoded_text": "상제님이 진주공사에서 원래는 33명에게 새 세상 일꾼 기운을 붙여서 공사를 보시려고 했는데 12명이 변심을 해서 그 자리에서 빼 버렸다. 그래서 신경수 성도 집과 문공신 성도 집에 있던 21명의 성도들이 잡혀서 왔다. 지금 여기 다내(月乃)가 상제님이 수명소 공사 주인 신경수 성도 집에서 일본 경찰들한테 체포돼서 고부경찰서까지 잡혀 가는 40리 길의 딱 중간이야. 상제님이 오신 길이 그러면 어디냐? 정토칠봉의 끝자리 수금리水金里에서 배를 타고 정읍천을 건너서 이쪽 용두龍頭마을 방향으로 오신 거다. 여기 삼거리에 주막이 있었다. 여기서 상제님이 21명의 성도들과 순검들에게 “너희도 배가 고플 테니까 가져온 음식과 고기를 여기서 배불리 먹고 가자.” 하셨다. 그래서 상제님이 여기서 술 한 잔을 드시고 나서 갑자기 일어나셔서 외치셨다.",
|
108 |
+
"diff": [
|
109 |
+
"replace text[239:240] --> decoded_text[239:240] '金' --> '金'"
|
110 |
+
],
|
111 |
+
"n_oov_chars": 1,
|
112 |
+
"oov_ratio": 0.002421307506053269,
|
113 |
+
"oov_charset": "[\"金\"]"
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"text": "이광수의 조혼을 다룬 희곡 <규한>보다는 진일보를 보인 작품. 3)金祐鎭 ... ) 그러나, 희곡이 무대상연을 전제로 하는 문학이라면, <不孝天罰 ... 의미로서의 창작 희곡은 아님. -대부분의 작품이 일본 신파 연극을 번역",
|
117 |
+
"decoded_text": "이광수의 조혼을 다룬 희곡 <규한>보다는 진일보를 보인 작품. 3)金祐鎭 ... ) 그러나, 희곡이 무대상연을 전제로 하는 문학이라면, <不孝天罰 ... 의미로서의 창작 희곡은 아님. -대부분의 작품이 일본 신파 연극을 번역",
|
118 |
+
"diff": [
|
119 |
+
"replace text[77:78] --> decoded_text[77:78] '不' --> '不'"
|
120 |
+
],
|
121 |
+
"n_oov_chars": 1,
|
122 |
+
"oov_ratio": 0.008,
|
123 |
+
"oov_charset": "[\"不\"]"
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"text": "{{ 중국의 고리대금업 { 중국(中國)의 고리대금업(高利貸金業) 차 ... 아니라, 국가에서 대출을 해주는 시스템도 존재하지 않았다. 고리대금업은 전통 ... 례 서론 본론 상인(商人)의 유래 대금업의 탄생 - 위진남북조 시대의",
|
127 |
+
"decoded_text": "{{ 중국의 고리대금업 { 중국(中國)의 고리대금업(高利貸金業) 차 ... 아니라, 국가에서 대출을 해주는 시스템도 존재하지 않았다. 고리대금업은 전통 ... 례 서론 본론 상인(商人)의 유래 대금업의 탄생 - 위진남북조 시대의",
|
128 |
+
"diff": [
|
129 |
+
"replace text[32:33] --> decoded_text[32:33] '金' --> '金'"
|
130 |
+
],
|
131 |
+
"n_oov_chars": 1,
|
132 |
+
"oov_ratio": 0.007874015748031496,
|
133 |
+
"oov_charset": "[\"金\"]"
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"text": "..PAGE:1 종교 속의 성차별 ..PAGE:2 목 차 카톨릭 속의 ... 성차별 토론 ..PAGE:3 대부분 종교가 사제직 '여성 不可' 한국천주교 ... 대한 부정적 시각 ..PAGE:4 카톨릭 속의 성차별 사제는 남성이어야",
|
137 |
+
"decoded_text": "..PAGE:1 종교 속의 성차별 ..PAGE:2 목 차 카톨릭 속의 ... 성차별 토론 ..PAGE:3 대부분 종교가 사제직 '여성 不可' 한국천주교 ... 대한 부정적 시각 ..PAGE:4 카톨릭 속의 성차별 사제는 남성이어야",
|
138 |
+
"diff": [
|
139 |
+
"replace text[75:76] --> decoded_text[75:76] '不' --> '不'"
|
140 |
+
],
|
141 |
+
"n_oov_chars": 1,
|
142 |
+
"oov_ratio": 0.0078125,
|
143 |
+
"oov_charset": "[\"不\"]"
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"text": "不壞)의 몸이기 때문이다. 거기에 음독금련사는 더 했다. 그의 입에서 나오는 액은 모든 것을 녹아버리",
|
147 |
+
"decoded_text": "不壞)의 몸이기 때문이다. 거기에 음독금련사는 더 했다. 그의 입에서 나오는 액은 모든 것을 녹아버리",
|
148 |
+
"diff": [
|
149 |
+
"replace text[0:1] --> decoded_text[0:1] '不' --> '不'"
|
150 |
+
],
|
151 |
+
"n_oov_chars": 1,
|
152 |
+
"oov_ratio": 0.017857142857142856,
|
153 |
+
"oov_charset": "[\"不\"]"
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"text": "백리무극 옆에 서 있는 인물은 바로 그의 첫제자 용구찬(龍九燦)이 서 있었다. 그는 천황의 지시하는 일",
|
157 |
+
"decoded_text": "백리무극 옆에 서 있는 인물은 바로 그의 첫제자 용구찬(龍九燦)이 서 있���다. 그는 천황의 지시하는 일",
|
158 |
+
"diff": [
|
159 |
+
"replace text[31:32] --> decoded_text[31:32] '龍' --> '龍'"
|
160 |
+
],
|
161 |
+
"n_oov_chars": 1,
|
162 |
+
"oov_ratio": 0.017543859649122806,
|
163 |
+
"oov_charset": "[\"龍\"]"
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"text": "면 정액이 고갈(枯渴)되어 죽고 말았지만 지금 북리천과 유나영은 용(龍)의 내단을 먹어 삼일까지 이렇",
|
167 |
+
"decoded_text": "면 정액이 고갈(枯渴)되어 죽고 말았지만 지금 북리천과 유나영은 용(龍)의 내단을 먹어 삼일까지 이렇",
|
168 |
+
"diff": [
|
169 |
+
"replace text[38:39] --> decoded_text[38:39] '龍' --> '龍'"
|
170 |
+
],
|
171 |
+
"n_oov_chars": 1,
|
172 |
+
"oov_ratio": 0.017857142857142856,
|
173 |
+
"oov_charset": "[\"龍\"]"
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"text": "라 영약(靈藥)뿐이였다.",
|
177 |
+
"decoded_text": "라 영약(靈藥)뿐이였다.",
|
178 |
+
"diff": [
|
179 |
+
"replace text[5:6] --> decoded_text[5:6] '靈' --> '靈'"
|
180 |
+
],
|
181 |
+
"n_oov_chars": 1,
|
182 |
+
"oov_ratio": 0.07692307692307693,
|
183 |
+
"oov_charset": "[\"靈\"]"
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"text": "5 '선릉역 사건' 가해女, 피해女 만남 전 흉기 챙긴 이유는? 친구 데려온다는 말에",
|
187 |
+
"decoded_text": "5 '선릉역 사건' 가해女, 피해女 만남 전 흉기 챙긴 이유는? 친구 데려온다는 말에",
|
188 |
+
"diff": [
|
189 |
+
"replace text[13:14] --> decoded_text[13:14] '女' --> '女'",
|
190 |
+
"replace text[18:19] --> decoded_text[18:19] '女' --> '女'"
|
191 |
+
],
|
192 |
+
"n_oov_chars": 2,
|
193 |
+
"oov_ratio": 0.0425531914893617,
|
194 |
+
"oov_charset": "[\"女\"]"
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"text": "알프레드 노스 화이트헤드(Alfred North Whitehead)는 20세기의 대표적인 철학자 가운데 한 사람이다. 영국 케임브리지의 트리니티 칼리지에서 수학을 전공하였고, 그 후에 동 대학의 특별연구원(Fellow)과 수석 강사(1885~1911), 런던대학의 임페리얼 칼리지 응용수학교수(1914~1924), 그리고 미국 하버드대학 철학교수(1924~1937)를 역임했다. 수학자였지만 고전에도 정통했으며, 새로운 물리학의 의미를 정확히 인식하고 있었을 뿐만 아니라 전통적인 철학을 오랫동안 깊이 연구해 왔다. 수제자 버트런드 러셀과의 공저 『수학 원리』(전 3권, 1910~1913)와 같은 수리논리학 분야에서 획기적인 업적을 남긴 수학자, 논리학자로서도 높이 평가된다. 또 한편으로는 특히 아인슈타인의 상대성 원리 등 현대 자연과학의 발전을 계기로, 현대 과학설을 철학에 도입시켜 철학 사상사에 새로운 국면을 전개한 과학철학자 그리고 “유기체 철학”(philosophy of organism)의 철학자로서도 높이 평가된다. 화이트헤드는 신중한 사람이었다. “진리를 그 가장 깊은 뿌리에서 부터 탐구”(본문 제2장 중에서) 하는 작업을 평생 멈추지 않았던 사상가였으며, 오랫동안 수학의 전문가였다. 그의 최초의 철학적 저작인 『과학과 근대세계』(1925)는 그가 63세 때, 대표작 『과정과 실재』(1929)는 68세 때에, 그로부터 4년 후에는 『관념의 모험』(1933)이 출간되었다. 그 당시 사람들은 사멸된 것으로 알았던 형이상학이 우주에 관한 상상적 사유라는 형태로 당당하게 부활하는 데 놀랐다. 그의 형이상학 체계는 사물의 유동(流動)을 둘러싸고 전개되는 체계라는 형태의 우주론으로서, 어디까지나 개방된 체계였다. 형이상학을 싫어했던 존 듀이도 화이트헤드의 “유기체 철학”에 대하여 “철학에의 혁명적 공헌” 이라는 찬사를 보냈으며, 영국의 시인이자 문학평론가·철학자였던 허버트 리드는 화이트헤드를 “20세기의 데카르트”라 평하기도 했다. 현대 프랑스의 포스트모더니즘 철학의 기수로 불리는 질 들뢰즈 같은 이는 화이트헤드를 가리켜 “영미권의 마지막 위대한 철학자”로 평하였다. 지은 책으로는 『화이트헤드의 수학이란 무엇인가』『교육의 목적』『화이트헤드와의 대화』등이 있다.",
|
198 |
+
"decoded_text": "알프레드 노스 화이트헤드(Alfred North Whitehead)는 20세기의 대표적인 철학자 가운데 한 사람이다. 영국 케임브리지의 트리니티 칼리지에서 수학을 전공하였고, 그 후에 동 대학의 특별연구원(Fellow)과 수석 강사(1885~1911), 런던대학의 임페리얼 칼리지 응용수학교수(1914~1924), 그리고 미국 하버드대학 철학교수(1924~1937)를 역임했다. 수학자였지만 고전에도 정통했으며, 새로운 물리학의 의미를 정확히 인식하고 있었을 뿐만 아니라 전통적인 철학을 오랫동안 깊이 연구해 왔다. 수제자 버트런드 러셀과의 공저 『수학 원리』(전 3권, 1910~1913)와 같은 수리논리학 분야에서 획기적인 업적을 남긴 수학자, 논리학자로서도 높이 평가된다. 또 한편으로는 특히 아인슈타인의 상���성 원리 등 현대 자연과학의 발전을 계기로, 현대 과학설을 철학에 도입시켜 철학 사상사에 새로운 국면을 전개한 과학철학자 그리고 “유기체 철학”(philosophy of organism)의 철학자로서도 높이 평가된다. 화이트헤드는 신중한 사람이었다. “진리를 그 가장 깊은 뿌리에서 부터 탐구”(본문 제2장 중에서) 하는 작업을 평생 멈추지 않았던 사상가였으며, 오랫동안 수학의 전문가였다. 그의 최초의 철학적 저작인 『과학과 근대세계』(1925)는 그가 63세 때, 대표작 『과정과 실재』(1929)는 68세 때에, 그로부터 4년 후에는 『관념의 모험』(1933)이 출간되었다. 그 당시 사람들은 사멸된 것으로 알았던 형이상학이 우주에 관한 상상적 사유라는 형태로 당당하게 부활하는 데 놀랐다. 그의 형이상학 체계는 사물의 유동(流動)을 둘러싸고 전개되는 체계라는 형태의 우주론으로서, 어디까지나 개방된 체계였다. 형이상학을 싫어했던 존 듀이도 화이트헤드의 “유기체 철학”에 대하여 “철학에의 혁명적 공헌” 이라는 찬사를 보냈으며, 영국의 시인이자 문학평론가·철학자였던 허버트 리드는 화이트헤드를 “20세기의 데카르트”라 평하기도 했다. 현대 프랑스의 포스트모더니즘 철학의 기수로 불리는 질 들뢰즈 같은 이는 화이트헤드를 가리켜 “영미권의 마지막 위대한 철학자”로 평하였다. 지은 책으로는 『화이트헤드의 수학이란 무엇인가』『교육의 목적』『화이트헤드와의 대화』등이 있다.",
|
199 |
+
"diff": [
|
200 |
+
"replace text[809:810] --> decoded_text[809:810] '流' --> '流'"
|
201 |
+
],
|
202 |
+
"n_oov_chars": 1,
|
203 |
+
"oov_ratio": 0.0009000900090009,
|
204 |
+
"oov_charset": "[\"流\"]"
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"text": "<부가설명> 오늘 청지기의 표지(標識, 표식은 잘못된 읽기이다.) 열거한, 신실함, 신뢰할 만하다는 말은 전부 헬라어 ‘피스티스, 피스토스, 피스튜오’에서 나온 말이다. 깨끗한 양심과 순종만이 다른 단어이다. 청지기는 믿을 수 있는 인격이어야 한다는 말이다. 그런 사람은 깨끗한 양심으로 하나님의 말씀을 수행하는 지체들이 된다는 것을 나타낸다. 순종이라는 단어는 ‘휘포쿠오’ ‘휘포탓소’등인데 ‘휘포’라는 말은 ‘…아래’라는 뜻이다. 순종은 누군가가 순종하는 사람 위에 있다는 것을 나타낸다. 위에서 시키는 사람의 말을 듣는 것이 ‘휘포쿠오’이고 위에서 시키는 대로 정돈하는 것이 ‘휘포탓소’이다. 청지기는 위에 주인이 계신다. 주인이 시키는 말을 잘 듣고 시키는 말대로 하는 것이고, 시킨 대로 정돈하는 것이다. 그리스도인 청지기는 예수께서 주인이시기 때문에 예수님의 말씀을 잘 듣고 그대로 수행해야 하고 그대로 정리해야 한다. 그렇게 할 때 주인의 신임을 얻고 하늘나라를 상속받게 된다. 하나님이 주신 것을 하나님의 뜻대로 관리하지 못할 것을 아시면 하나님께서 관리할 것을 맡기겠는가?",
|
208 |
+
"decoded_text": "<부가설명> 오늘 청지기의 표지(標識, 표식은 잘못된 읽기이다.) 열거한, 신실함, 신뢰할 만하다는 말은 전부 헬라어 ‘피스티스, 피스토스, 피스튜오’에서 나온 말이다. 깨끗한 양심과 순종만이 다른 단어이다. 청지기는 믿을 수 있는 인격이어야 한다는 말이다. 그런 사람은 깨끗한 양심으로 하나님의 말씀을 수행하는 지체들이 된다는 것을 나타낸다. 순종이라는 단어는 ‘휘포쿠오’ ‘휘포탓소’등인데 ‘휘포’라는 말은 ‘…아래’라는 뜻이다. 순종은 누군가가 순종하는 사람 위에 있다는 것을 나타낸다. 위에서 시키는 사람의 말을 듣는 것이 ‘휘포쿠오’이고 위에서 시키는 대로 정돈하는 것이 ‘휘포탓소’이다. 청지기는 위에 주인이 계신다. 주인이 시키는 말을 잘 듣고 시키는 말대로 하는 것이고, 시킨 대로 정돈하는 것이다. 그리스도인 청지기는 예수께서 주인이시기 때문에 예수님의 말씀을 잘 듣고 그대로 수행해야 하고 그대로 정리해야 한다. 그렇게 할 때 주인의 신임을 얻고 하늘나라를 상속받게 된다. 하나님이 주신 것을 하나님의 뜻대로 관리하지 못할 것을 아시면 하나님께서 관리할 것을 맡기겠는가?",
|
209 |
+
"diff": [
|
210 |
+
"replace text[19:20] --> decoded_text[19:20] '識' --> '識'"
|
211 |
+
],
|
212 |
+
"n_oov_chars": 1,
|
213 |
+
"oov_ratio": 0.0018083182640144665,
|
214 |
+
"oov_charset": "[\"識\"]"
|
215 |
+
}
|
216 |
+
]
|
stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.ar.diff.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.de.diff.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.en.diff.json
ADDED
@@ -0,0 +1,1325 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "and yeah im a boy,and no, im not g*y, im a nice guy. i dont love his songs or anything , but he's not that bad tbh.",
|
4 |
+
"decoded_text": "and yeah im a boy,and no, im not g*y, im a nice guy. i dont love his songs or anything, but he's not that bad tbh.",
|
5 |
+
"diff": [
|
6 |
+
"delete text[86:87] --> decoded_text[86:86] ' ' --> ''"
|
7 |
+
],
|
8 |
+
"n_oov_chars": 0,
|
9 |
+
"oov_ratio": 0.0,
|
10 |
+
"oov_charset": "[]"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"text": "Justin serenaded wonderful or better than a great I like popular songs, particularly as it is talented. all those who hate Justin are g**s because they feel jealous of him because he is handsome at the same time a rising singer and a small age. I myself appreciate the wonderful artist with this beautiful and talented .",
|
14 |
+
"decoded_text": "Justin serenaded wonderful or better than a great I like popular songs, particularly as it is talented. all those who hate Justin are g**s because they feel jealous of him because he is handsome at the same time a rising singer and a small age. I myself appreciate the wonderful artist with this beautiful and talented.",
|
15 |
+
"diff": [
|
16 |
+
"delete text[318:319] --> decoded_text[318:318] ' ' --> ''"
|
17 |
+
],
|
18 |
+
"n_oov_chars": 0,
|
19 |
+
"oov_ratio": 0.0,
|
20 |
+
"oov_charset": "[]"
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"text": "Soften the landing zones with a pair of Rubber Mats , made from dyed rubber chips, heat compressed and available in dark green or brick red.",
|
24 |
+
"decoded_text": "Soften the landing zones with a pair of Rubber Mats, made from dyed rubber chips, heat compressed and available in dark green or brick red.",
|
25 |
+
"diff": [
|
26 |
+
"delete text[51:52] --> decoded_text[51:51] ' ' --> ''"
|
27 |
+
],
|
28 |
+
"n_oov_chars": 0,
|
29 |
+
"oov_ratio": 0.0,
|
30 |
+
"oov_charset": "[]"
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"text": "We're not so rough and over the top these days, so they miiiiight survive ._.",
|
34 |
+
"decoded_text": "We're not so rough and over the top these days, so they miiiiight survive._.",
|
35 |
+
"diff": [
|
36 |
+
"delete text[73:74] --> decoded_text[73:73] ' ' --> ''"
|
37 |
+
],
|
38 |
+
"n_oov_chars": 0,
|
39 |
+
"oov_ratio": 0.0,
|
40 |
+
"oov_charset": "[]"
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"text": "Just finished Hulse's \"Black River\" and simply adored the book. So pretty, overall, and much like the Kent Haruf novels, such as \"Plainsong\" that I've enjoyed over the years. \"Black River\" is surely one of the best five I've read this year. Solid Pulitzer choice, in my opinion. Side note: As I've mentioned before, I surely don't understand all of the hoopla surrounding \"The Sellout,\" with so many other worthy contenders. But, what do I know? I'm only a reader. :-) Read on ...",
|
44 |
+
"decoded_text": "Just finished Hulse's \"Black River\" and simply adored the book. So pretty, overall, and much like the Kent Haruf novels, such as \"Plainsong\" that I've enjoyed over the years. \"Black River\" is surely one of the best five I've read this year. Solid Pulitzer choice, in my opinion. Side note: As I've mentioned before, I surely don't understand all of the hoopla surrounding \"The Sellout,\" with so many other worthy contenders. But, what do I know? I'm only a reader. :-) Read on...",
|
45 |
+
"diff": [
|
46 |
+
"replace text[476:480] --> decoded_text[476:479] ' ...' --> '...'"
|
47 |
+
],
|
48 |
+
"n_oov_chars": 0,
|
49 |
+
"oov_ratio": 0.0,
|
50 |
+
"oov_charset": "[]"
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"text": "I really don't understand all of the hoopla over THE SELLOUT. Just a so-so book, in my opinion. Minor work. I struggled through it, and can never get back the time spent on that tome. EILEEN and HONEYDEW are sooooooo much better, not to mention THE TURNER HOUSE, TSAR, DID YOU EVER, and others. I'm reading DELICIOUS FOODS right now, and think it's a major-serious contender as well. BLACK RIVER is next on my list, and I can't wait. But, what do I know? :-) Read on ...",
|
54 |
+
"decoded_text": "I really don't understand all of the hoopla over THE SELLOUT. Just a so-so book, in my opinion. Minor work. I struggled through it, and can never get back the time spent on that tome. EILEEN and HONEYDEW are sooooooo much better, not to mention THE TURNER HOUSE, TSAR, DID YOU EVER, and others. I'm reading DELICIOUS FOODS right now, and think it's a major-serious contender as well. BLACK RIVER is next on my list, and I can't wait. But, what do I know? :-) Read on...",
|
55 |
+
"diff": [
|
56 |
+
"replace text[466:470] --> decoded_text[466:469] ' ...' --> '...'"
|
57 |
+
],
|
58 |
+
"n_oov_chars": 0,
|
59 |
+
"oov_ratio": 0.0,
|
60 |
+
"oov_charset": "[]"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"text": "I have also read The Shore ,Alex, yes I agree its very good, maybe a chance. The last years I have just waited to last in the year to see who the genral public have been siding and gone for that, from a collectors point of view, it would be nice if something won which did not have a 100,000 in the first print run.",
|
64 |
+
"decoded_text": "I have also read The Shore,Alex, yes I agree its very good, maybe a chance. The last years I have just waited to last in the year to see who the genral public have been siding and gone for that, from a collectors point of view, it would be nice if something won which did not have a 100,000 in the first print run.",
|
65 |
+
"diff": [
|
66 |
+
"delete text[26:27] --> decoded_text[26:26] ' ' --> ''"
|
67 |
+
],
|
68 |
+
"n_oov_chars": 0,
|
69 |
+
"oov_ratio": 0.0,
|
70 |
+
"oov_charset": "[]"
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"text": "Moving to K-W can be confusing for anybody: how can you explain King Street, that runs north, south, east and west ?! Or streets like King and Weber, that are sometimes parallel, and yet cross each other in two places ? For someone new to the country, adjusting to life here can be even much more confusing.",
|
74 |
+
"decoded_text": "Moving to K-W can be confusing for anybody: how can you explain King Street, that runs north, south, east and west?! Or streets like King and Weber, that are sometimes parallel, and yet cross each other in two places? For someone new to the country, adjusting to life here can be even much more confusing.",
|
75 |
+
"diff": [
|
76 |
+
"delete text[114:115] --> decoded_text[114:114] ' ' --> ''",
|
77 |
+
"delete text[217:218] --> decoded_text[216:216] ' ' --> ''"
|
78 |
+
],
|
79 |
+
"n_oov_chars": 0,
|
80 |
+
"oov_ratio": 0.0,
|
81 |
+
"oov_charset": "[]"
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"text": "Just in case you are getting the impression that it’s all work and no fun, let me remind you of the Multicultural Festival, which is held every year at Victoria Park during the Canada Day (July 1) weekend. For two fun-filled days, the whole family can enjoy crafts, traditional dancing and especially foods from around the world ! This event is something Kitchener-Waterloo always looks forward to.",
|
85 |
+
"decoded_text": "Just in case you are getting the impression that it’s all work and no fun, let me remind you of the Multicultural Festival, which is held every year at Victoria Park during the Canada Day (July 1) weekend. For two fun-filled days, the whole family can enjoy crafts, traditional dancing and especially foods from around the world! This event is something Kitchener-Waterloo always looks forward to.",
|
86 |
+
"diff": [
|
87 |
+
"delete text[328:329] --> decoded_text[328:328] ' ' --> ''"
|
88 |
+
],
|
89 |
+
"n_oov_chars": 0,
|
90 |
+
"oov_ratio": 0.0,
|
91 |
+
"oov_charset": "[]"
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"text": "Centralized vacuum system can be used to clean production lines, floors and installations during or ..",
|
95 |
+
"decoded_text": "Centralized vacuum system can be used to clean production lines, floors and installations during or..",
|
96 |
+
"diff": [
|
97 |
+
"delete text[99:100] --> decoded_text[99:99] ' ' --> ''"
|
98 |
+
],
|
99 |
+
"n_oov_chars": 0,
|
100 |
+
"oov_ratio": 0.0,
|
101 |
+
"oov_charset": "[]"
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"text": "REFRIGERATION MECHANIC Employees in this job participate in and oversee the installation of refrigeration, air conditioning, chemicals, and electricity. Some jobs require an employee to some risk of sustaining illness and injury from the use of chemicals, high-pressure laboratory systems ... Visit Document",
|
105 |
+
"decoded_text": "REFRIGERATION MECHANIC Employees in this job participate in and oversee the installation of refrigeration, air conditioning, chemicals, and electricity. Some jobs require an employee to some risk of sustaining illness and injury from the use of chemicals, high-pressure laboratory systems... Visit Document",
|
106 |
+
"diff": [
|
107 |
+
"delete text[288:289] --> decoded_text[288:288] ' ' --> ''"
|
108 |
+
],
|
109 |
+
"n_oov_chars": 0,
|
110 |
+
"oov_ratio": 0.0,
|
111 |
+
"oov_charset": "[]"
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"text": "AMMONIA REFRIGERATION IN WAREHOUSES What is ammonia? Pure ammonia (NH3), also known as anhydrous ammonia, is a colorless gas, chemicals; how these chemicals can be detected (such as by monitoring devices, or by smell), what the employer is going to do to protect workers, including emergency ... Read Here",
|
115 |
+
"decoded_text": "AMMONIA REFRIGERATION IN WAREHOUSES What is ammonia? Pure ammonia (NH3), also known as anhydrous ammonia, is a colorless gas, chemicals; how these chemicals can be detected (such as by monitoring devices, or by smell), what the employer is going to do to protect workers, including emergency... Read Here",
|
116 |
+
"diff": [
|
117 |
+
"delete text[291:292] --> decoded_text[291:291] ' ' --> ''"
|
118 |
+
],
|
119 |
+
"n_oov_chars": 0,
|
120 |
+
"oov_ratio": 0.0,
|
121 |
+
"oov_charset": "[]"
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"text": "A company in Washington has agreed to pay $50,805 in federal penalties after releasing ammonia gas into the atmosphere. ... Read News",
|
125 |
+
"decoded_text": "A company in Washington has agreed to pay $50,805 in federal penalties after releasing ammonia gas into the atmosphere.... Read News",
|
126 |
+
"diff": [
|
127 |
+
"delete text[119:120] --> decoded_text[119:119] ' ' --> ''"
|
128 |
+
],
|
129 |
+
"n_oov_chars": 0,
|
130 |
+
"oov_ratio": 0.0,
|
131 |
+
"oov_charset": "[]"
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"text": "A scientist trying to find a cure for his wife's disease is kicked into a container of chemicals which results in his needing a refrigeration suit to survive ... View Video",
|
135 |
+
"decoded_text": "A scientist trying to find a cure for his wife's disease is kicked into a container of chemicals which results in his needing a refrigeration suit to survive... View Video",
|
136 |
+
"diff": [
|
137 |
+
"delete text[157:158] --> decoded_text[157:157] ' ' --> ''"
|
138 |
+
],
|
139 |
+
"n_oov_chars": 0,
|
140 |
+
"oov_ratio": 0.0,
|
141 |
+
"oov_charset": "[]"
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"text": "Beverages, chemicals and petrochemicals, pharmaceuticals, starch, sugar and biofuels. Refrigeration and air-conditioning consume a lot of energy. Therefore, it is essential that the technical solutions economize on the use of energy and fulfil their mission ... Doc Viewer",
|
145 |
+
"decoded_text": "Beverages, chemicals and petrochemicals, pharmaceuticals, starch, sugar and biofuels. Refrigeration and air-conditioning consume a lot of energy. Therefore, it is essential that the technical solutions economize on the use of energy and fulfil their mission... Doc Viewer",
|
146 |
+
"diff": [
|
147 |
+
"delete text[257:258] --> decoded_text[257:257] ' ' --> ''"
|
148 |
+
],
|
149 |
+
"n_oov_chars": 0,
|
150 |
+
"oov_ratio": 0.0,
|
151 |
+
"oov_charset": "[]"
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"text": "Electrical equipment, blunt instruments, chemicals, lubricants, or any other tools or equipment seen or implied in this video. Due to factors beyond the control of EricTheCarGuy, ... View Video",
|
155 |
+
"decoded_text": "Electrical equipment, blunt instruments, chemicals, lubricants, or any other tools or equipment seen or implied in this video. Due to factors beyond the control of EricTheCarGuy,... View Video",
|
156 |
+
"diff": [
|
157 |
+
"delete text[178:179] --> decoded_text[178:178] ' ' --> ''"
|
158 |
+
],
|
159 |
+
"n_oov_chars": 0,
|
160 |
+
"oov_ratio": 0.0,
|
161 |
+
"oov_charset": "[]"
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"text": "Summary Of Ammonia Accidents In The United States To Which ...",
|
165 |
+
"decoded_text": "Summary Of Ammonia Accidents In The United States To Which...",
|
166 |
+
"diff": [
|
167 |
+
"delete text[58:59] --> decoded_text[58:58] ' ' --> ''"
|
168 |
+
],
|
169 |
+
"n_oov_chars": 0,
|
170 |
+
"oov_ratio": 0.0,
|
171 |
+
"oov_charset": "[]"
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"text": "Under current conditions, moderate or slow-onset health effects of GM foods could take decades to become known, just as it took decades for the damaging effects of trans-fats (another type of artificial food) to be recognized. ‘Slow poison’ effects from trans-fats have caused millions of premature deaths across the world6 .",
|
175 |
+
"decoded_text": "Under current conditions, moderate or slow-onset health effects of GM foods could take decades to become known, just as it took decades for the damaging effects of trans-fats (another type of artificial food) to be recognized. ‘Slow poison’ effects from trans-fats have caused millions of premature deaths across the world6.",
|
176 |
+
"diff": [
|
177 |
+
"delete text[323:324] --> decoded_text[323:323] ' ' --> ''"
|
178 |
+
],
|
179 |
+
"n_oov_chars": 0,
|
180 |
+
"oov_ratio": 0.0,
|
181 |
+
"oov_charset": "[]"
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"text": "Nevertheless, there are signs that all is not well with the US food supply. A report by the US Centers for Disease Control shows that food-related illnesses increased 2- to 10-fold in the years between 1994 (just before GM food was commercialized) and 19997 . Is there a link with GM food? No one knows, because studies on humans have not been done.",
|
185 |
+
"decoded_text": "Nevertheless, there are signs that all is not well with the US food supply. A report by the US Centers for Disease Control shows that food-related illnesses increased 2- to 10-fold in the years between 1994 (just before GM food was commercialized) and 19997. Is there a link with GM food? No one knows, because studies on humans have not been done.",
|
186 |
+
"diff": [
|
187 |
+
"delete text[257:258] --> decoded_text[257:257] ' ' --> ''"
|
188 |
+
],
|
189 |
+
"n_oov_chars": 0,
|
190 |
+
"oov_ratio": 0.0,
|
191 |
+
"oov_charset": "[]"
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"text": "Engine . . . I think that's a pretty exotic engine, can't be cheap to rebuild.",
|
195 |
+
"decoded_text": "Engine... I think that's a pretty exotic engine, can't be cheap to rebuild.",
|
196 |
+
"diff": [
|
197 |
+
"delete text[6:7] --> decoded_text[6:6] ' ' --> ''",
|
198 |
+
"delete text[8:9] --> decoded_text[7:7] ' ' --> ''",
|
199 |
+
"delete text[10:11] --> decoded_text[8:8] ' ' --> ''"
|
200 |
+
],
|
201 |
+
"n_oov_chars": 0,
|
202 |
+
"oov_ratio": 0.0,
|
203 |
+
"oov_charset": "[]"
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"text": "Put some $$ aside for electrics and rust, if still within the budget ...do it! You'll love it.",
|
207 |
+
"decoded_text": "Put some $$ aside for electrics and rust, if still within the budget...do it! You'll love it.",
|
208 |
+
"diff": [
|
209 |
+
"delete text[68:69] --> decoded_text[68:68] ' ' --> ''"
|
210 |
+
],
|
211 |
+
"n_oov_chars": 0,
|
212 |
+
"oov_ratio": 0.0,
|
213 |
+
"oov_charset": "[]"
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"text": "I bought my M5 10 years ago when prices were still 'reasonable' and I wouldn't take less than $50k today.",
|
217 |
+
"decoded_text": "I bought my M5 10 years ago when prices were still'reasonable' and I wouldn't take less than $50k today.",
|
218 |
+
"diff": [
|
219 |
+
"delete text[50:51] --> decoded_text[50:50] ' ' --> ''"
|
220 |
+
],
|
221 |
+
"n_oov_chars": 0,
|
222 |
+
"oov_ratio": 0.0,
|
223 |
+
"oov_charset": "[]"
|
224 |
+
},
|
225 |
+
{
|
226 |
+
"text": "Yes my clothes fit well...right now..since I lost weight ,they are a little big, but they are comfortable.",
|
227 |
+
"decoded_text": "Yes my clothes fit well...right now..since I lost weight,they are a little big, but they are comfortable.",
|
228 |
+
"diff": [
|
229 |
+
"delete text[56:57] --> decoded_text[56:56] ' ' --> ''"
|
230 |
+
],
|
231 |
+
"n_oov_chars": 0,
|
232 |
+
"oov_ratio": 0.0,
|
233 |
+
"oov_charset": "[]"
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"text": "Thanks to everyone who's posted so far, and I look forward to seeing more responses. It's really cool to get to 'meet' the people I've been hanging out with.",
|
237 |
+
"decoded_text": "Thanks to everyone who's posted so far, and I look forward to seeing more responses. It's really cool to get to'meet' the people I've been hanging out with.",
|
238 |
+
"diff": [
|
239 |
+
"delete text[111:112] --> decoded_text[111:111] ' ' --> ''"
|
240 |
+
],
|
241 |
+
"n_oov_chars": 0,
|
242 |
+
"oov_ratio": 0.0,
|
243 |
+
"oov_charset": "[]"
|
244 |
+
},
|
245 |
+
{
|
246 |
+
"text": "Flickr's API tells us that a particular photo has not been added to a group with the a code error. For the case we are using it is number 5. More information about error codes can be found here (https://www.flickr.com/services/api/flickr.groups.pools.add.html) .",
|
247 |
+
"decoded_text": "Flickr's API tells us that a particular photo has not been added to a group with the a code error. For the case we are using it is number 5. More information about error codes can be found here (https://www.flickr.com/services/api/flickr.groups.pools.add.html).",
|
248 |
+
"diff": [
|
249 |
+
"replace text[260:262] --> decoded_text[260:261] ' .' --> '.'"
|
250 |
+
],
|
251 |
+
"n_oov_chars": 0,
|
252 |
+
"oov_ratio": 0.0,
|
253 |
+
"oov_charset": "[]"
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"text": "I called Chris to celebrate... .... ... and to ask him if he would help break it down so we could get it home.",
|
257 |
+
"decoded_text": "I called Chris to celebrate.......... and to ask him if he would help break it down so we could get it home.",
|
258 |
+
"diff": [
|
259 |
+
"delete text[30:31] --> decoded_text[30:30] ' ' --> ''",
|
260 |
+
"delete text[35:36] --> decoded_text[34:34] ' ' --> ''"
|
261 |
+
],
|
262 |
+
"n_oov_chars": 0,
|
263 |
+
"oov_ratio": 0.0,
|
264 |
+
"oov_charset": "[]"
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"text": "I thought these would be pretty and bright in an entry way... .. ... of that mansion we are someday going to own.",
|
268 |
+
"decoded_text": "I thought these would be pretty and bright in an entry way........ of that mansion we are someday going to own.",
|
269 |
+
"diff": [
|
270 |
+
"delete text[61:62] --> decoded_text[61:61] ' ' --> ''",
|
271 |
+
"delete text[64:65] --> decoded_text[63:63] ' ' --> ''"
|
272 |
+
],
|
273 |
+
"n_oov_chars": 0,
|
274 |
+
"oov_ratio": 0.0,
|
275 |
+
"oov_charset": "[]"
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"text": "The Bisons are careening to their 12th straight non-playoff season spanning three parent clubs. A team that started the season 13-5 and was still eight games over .500 in early June just put up the most losses in any month in its history.",
|
279 |
+
"decoded_text": "The Bisons are careening to their 12th straight non-playoff season spanning three parent clubs. A team that started the season 13-5 and was still eight games over.500 in early June just put up the most losses in any month in its history.",
|
280 |
+
"diff": [
|
281 |
+
"delete text[162:163] --> decoded_text[162:162] ' ' --> ''"
|
282 |
+
],
|
283 |
+
"n_oov_chars": 0,
|
284 |
+
"oov_ratio": 0.0,
|
285 |
+
"oov_charset": "[]"
|
286 |
+
},
|
287 |
+
{
|
288 |
+
"text": "One disappointment that's really hurt at all levels is the wrist surgery for top outfield prospect Anthony Alford. He was lighting things up at Double-A New Hampshire batting .325 and the Blue Jays wanted to give him a brief look. He would have then come to Buffalo and ostensibly anchored the lineup. But he's yet to arrive and the season has gone down the drain.",
|
289 |
+
"decoded_text": "One disappointment that's really hurt at all levels is the wrist surgery for top outfield prospect Anthony Alford. He was lighting things up at Double-A New Hampshire batting.325 and the Blue Jays wanted to give him a brief look. He would have then come to Buffalo and ostensibly anchored the lineup. But he's yet to arrive and the season has gone down the drain.",
|
290 |
+
"diff": [
|
291 |
+
"delete text[174:175] --> decoded_text[174:174] ' ' --> ''"
|
292 |
+
],
|
293 |
+
"n_oov_chars": 0,
|
294 |
+
"oov_ratio": 0.0,
|
295 |
+
"oov_charset": "[]"
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"text": "Jamieson, an Ontario native, was a Double-A Southern League all-star in 2014, when he batted .298 at Mobile. He was the Metro Atlantic co-player of the year in 2011 and a three-time representative of Team Canada, including a gold medal at the Pan An Games in 2015.",
|
299 |
+
"decoded_text": "Jamieson, an Ontario native, was a Double-A Southern League all-star in 2014, when he batted.298 at Mobile. He was the Metro Atlantic co-player of the year in 2011 and a three-time representative of Team Canada, including a gold medal at the Pan An Games in 2015.",
|
300 |
+
"diff": [
|
301 |
+
"delete text[92:93] --> decoded_text[92:92] ' ' --> ''"
|
302 |
+
],
|
303 |
+
"n_oov_chars": 0,
|
304 |
+
"oov_ratio": 0.0,
|
305 |
+
"oov_charset": "[]"
|
306 |
+
},
|
307 |
+
{
|
308 |
+
"text": "In this tutorial, we learn how to get big bouncy curls like Kim Kardashian. First, have straight and clean hair and heat up a 1.5\" curling iron. Now, section your hair off and start with the bottom half. Wrap your hair around the curling iron, then after 15 seconds release the ...more",
|
309 |
+
"decoded_text": "In this tutorial, we learn how to get big bouncy curls like Kim Kardashian. First, have straight and clean hair and heat up a 1.5\" curling iron. Now, section your hair off and start with the bottom half. Wrap your hair around the curling iron, then after 15 seconds release the...more",
|
310 |
+
"diff": [
|
311 |
+
"delete text[277:278] --> decoded_text[277:277] ' ' --> ''"
|
312 |
+
],
|
313 |
+
"n_oov_chars": 0,
|
314 |
+
"oov_ratio": 0.0,
|
315 |
+
"oov_charset": "[]"
|
316 |
+
},
|
317 |
+
{
|
318 |
+
"text": "In this tutorial, we learn how to create a Kim Kardashian-inspired cat eye makeup look. Start by using a nude color eyeshadow on the eyes and tape to create an edge on the outer corner of the eye. After you do this, apply a white shadow underneath the eyebrows to create a high ...more",
|
319 |
+
"decoded_text": "In this tutorial, we learn how to create a Kim Kardashian-inspired cat eye makeup look. Start by using a nude color eyeshadow on the eyes and tape to create an edge on the outer corner of the eye. After you do this, apply a white shadow underneath the eyebrows to create a high...more",
|
320 |
+
"diff": [
|
321 |
+
"delete text[277:278] --> decoded_text[277:277] ' ' --> ''"
|
322 |
+
],
|
323 |
+
"n_oov_chars": 0,
|
324 |
+
"oov_ratio": 0.0,
|
325 |
+
"oov_charset": "[]"
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"text": "Love the makeup styles of Kim Kardashian? Then you might like this makeup tutorial, which shows you how to recreate her looks. This Kardashian-inspired smokey eyes look uses minimal product, so you don't need to load up on the cosmetics. It's a simple look, perfect for any occ ...more",
|
329 |
+
"decoded_text": "Love the makeup styles of Kim Kardashian? Then you might like this makeup tutorial, which shows you how to recreate her looks. This Kardashian-inspired smokey eyes look uses minimal product, so you don't need to load up on the cosmetics. It's a simple look, perfect for any occ...more",
|
330 |
+
"diff": [
|
331 |
+
"replace text[277:285] --> decoded_text[277:284] ' ...more' --> '...more'"
|
332 |
+
],
|
333 |
+
"n_oov_chars": 0,
|
334 |
+
"oov_ratio": 0.0,
|
335 |
+
"oov_charset": "[]"
|
336 |
+
},
|
337 |
+
{
|
338 |
+
"text": "You've probably seen tutorials on YouTube before on how to achieve Kim Kardashian curls, or Victoria's Secret waves, or the hair look of the fashionable deviants on \"Pretty Little Liars,\" but why go through so many tutorials when they're all really the same thing? Check out t ...more",
|
339 |
+
"decoded_text": "You've probably seen tutorials on YouTube before on how to achieve Kim Kardashian curls, or Victoria's Secret waves, or the hair look of the fashionable deviants on \"Pretty Little Liars,\" but why go through so many tutorials when they're all really the same thing? Check out t...more",
|
340 |
+
"diff": [
|
341 |
+
"delete text[276:277] --> decoded_text[276:276] ' ' --> ''"
|
342 |
+
],
|
343 |
+
"n_oov_chars": 0,
|
344 |
+
"oov_ratio": 0.0,
|
345 |
+
"oov_charset": "[]"
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"text": "In the Age of the Smokey Eye, women who get glammed up with red lips and cat eyes are endangered species. Fifty years ago you would have seen nothing but, yet today the predominance of Kim Kardashian smokey eyes and nude lips has made anyone wearing red lips a wonder. Which i ...more",
|
349 |
+
"decoded_text": "In the Age of the Smokey Eye, women who get glammed up with red lips and cat eyes are endangered species. Fifty years ago you would have seen nothing but, yet today the predominance of Kim Kardashian smokey eyes and nude lips has made anyone wearing red lips a wonder. Which i...more",
|
350 |
+
"diff": [
|
351 |
+
"replace text[276:284] --> decoded_text[276:283] ' ...more' --> '...more'"
|
352 |
+
],
|
353 |
+
"n_oov_chars": 0,
|
354 |
+
"oov_ratio": 0.0,
|
355 |
+
"oov_charset": "[]"
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"text": "TGIF. What better day to take a break from the week's dwindling grind? Below, a video demo plus instructions for indulging in a little tech-aided vanity during your next water cooler hiatus. An iPhone is necessary, so if you don't have one, find a co-worker stat. 1. Buy & Down ...more",
|
359 |
+
"decoded_text": "TGIF. What better day to take a break from the week's dwindling grind? Below, a video demo plus instructions for indulging in a little tech-aided vanity during your next water cooler hiatus. An iPhone is necessary, so if you don't have one, find a co-worker stat. 1. Buy & Down...more",
|
360 |
+
"diff": [
|
361 |
+
"delete text[277:278] --> decoded_text[277:277] ' ' --> ''"
|
362 |
+
],
|
363 |
+
"n_oov_chars": 0,
|
364 |
+
"oov_ratio": 0.0,
|
365 |
+
"oov_charset": "[]"
|
366 |
+
},
|
367 |
+
{
|
368 |
+
"text": "It's been a rough week for Instagram. First they came out with a new terms of service that suggested the right to let companies use people's photos for advertisements without the user's permission. Then everyone started freaking out and debating whether or not to leave the onl ...more",
|
369 |
+
"decoded_text": "It's been a rough week for Instagram. First they came out with a new terms of service that suggested the right to let companies use people's photos for advertisements without the user's permission. Then everyone started freaking out and debating whether or not to leave the onl...more",
|
370 |
+
"diff": [
|
371 |
+
"replace text[277:285] --> decoded_text[277:284] ' ...more' --> '...more'"
|
372 |
+
],
|
373 |
+
"n_oov_chars": 0,
|
374 |
+
"oov_ratio": 0.0,
|
375 |
+
"oov_charset": "[]"
|
376 |
+
},
|
377 |
+
{
|
378 |
+
"text": "Kim Kardashian and Kanye West have fittingly named their second child \"Saint,\" but I'm sure you don't care about that—and neither do I. Unfortunately, regardless of our pop culture interests, we're forced to know these types of stupid facts because the internet is so densely p ...more",
|
379 |
+
"decoded_text": "Kim Kardashian and Kanye West have fittingly named their second child \"Saint,\" but I'm sure you don't care about that—and neither do I. Unfortunately, regardless of our pop culture interests, we're forced to know these types of stupid facts because the internet is so densely p...more",
|
380 |
+
"diff": [
|
381 |
+
"replace text[277:285] --> decoded_text[277:284] ' ...more' --> '...more'"
|
382 |
+
],
|
383 |
+
"n_oov_chars": 0,
|
384 |
+
"oov_ratio": 0.0,
|
385 |
+
"oov_charset": "[]"
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"text": "The war may be forgotten but the warrior will always be remembered !!!! All gave Some-Some gave All. Rest in peace Alden. :-(",
|
389 |
+
"decoded_text": "The war may be forgotten but the warrior will always be remembered!!!! All gave Some-Some gave All. Rest in peace Alden. :-(",
|
390 |
+
"diff": [
|
391 |
+
"delete text[66:67] --> decoded_text[66:66] ' ' --> ''"
|
392 |
+
],
|
393 |
+
"n_oov_chars": 0,
|
394 |
+
"oov_ratio": 0.0,
|
395 |
+
"oov_charset": "[]"
|
396 |
+
},
|
397 |
+
{
|
398 |
+
"text": "“How about,” the boy suggested, “we go swimming, have a barbecue and watch the Indy 500 like we did last year . . . in honor of those who died for this great nation and our fun, American way of life.”",
|
399 |
+
"decoded_text": "“How about,” the boy suggested, “we go swimming, have a barbecue and watch the Indy 500 like we did last year... in honor of those who died for this great nation and our fun, American way of life.”",
|
400 |
+
"diff": [
|
401 |
+
"delete text[109:110] --> decoded_text[109:109] ' ' --> ''",
|
402 |
+
"delete text[111:112] --> decoded_text[110:110] ' ' --> ''",
|
403 |
+
"delete text[113:114] --> decoded_text[111:111] ' ' --> ''"
|
404 |
+
],
|
405 |
+
"n_oov_chars": 0,
|
406 |
+
"oov_ratio": 0.0,
|
407 |
+
"oov_charset": "[]"
|
408 |
+
},
|
409 |
+
{
|
410 |
+
"text": "Description : Brandon does it again as he brings us the lovely redhead slut Mylie Moore. This babe has got some serious attitude when it comes to sucking cock, and Brandon can tell right away that this is going to ...",
|
411 |
+
"decoded_text": "Description : Brandon does it again as he brings us the lovely redhead slut Mylie Moore. This babe has got some serious attitude when it comes to sucking cock, and Brandon can tell right away that this is going to...",
|
412 |
+
"diff": [
|
413 |
+
"replace text[213:217] --> decoded_text[213:216] ' ...' --> '...'"
|
414 |
+
],
|
415 |
+
"n_oov_chars": 0,
|
416 |
+
"oov_ratio": 0.0,
|
417 |
+
"oov_charset": "[]"
|
418 |
+
},
|
419 |
+
{
|
420 |
+
"text": "Description : Presley Maddox is in front of a chinese restaurant looking at the menu. \"What are you looking for?\" Brandon asks her. \"Creamofsomeyoungguy...\" she replies, a naughty look on her face. \"Oh, i've heard ...",
|
421 |
+
"decoded_text": "Description : Presley Maddox is in front of a chinese restaurant looking at the menu. \"What are you looking for?\" Brandon asks her. \"Creamofsomeyoungguy...\" she replies, a naughty look on her face. \"Oh, i've heard...",
|
422 |
+
"diff": [
|
423 |
+
"replace text[213:217] --> decoded_text[213:216] ' ...' --> '...'"
|
424 |
+
],
|
425 |
+
"n_oov_chars": 0,
|
426 |
+
"oov_ratio": 0.0,
|
427 |
+
"oov_charset": "[]"
|
428 |
+
},
|
429 |
+
{
|
430 |
+
"text": "I simply want to tell you that I am just all new to blogging and honestly loved this blog. Probably I’m likely to bookmark your site . You really have really good writings. Bless you for revealing your web page.",
|
431 |
+
"decoded_text": "I simply want to tell you that I am just all new to blogging and honestly loved this blog. Probably I’m likely to bookmark your site. You really have really good writings. Bless you for revealing your web page.",
|
432 |
+
"diff": [
|
433 |
+
"delete text[132:133] --> decoded_text[132:132] ' ' --> ''"
|
434 |
+
],
|
435 |
+
"n_oov_chars": 0,
|
436 |
+
"oov_ratio": 0.0,
|
437 |
+
"oov_charset": "[]"
|
438 |
+
},
|
439 |
+
{
|
440 |
+
"text": "I simply had to thank you very much once more. I’m not certain the things I might have sorted out without those methods documented by you relating to such a theme. It had been a real scary circumstance in my view, however , finding out your professional style you handled that took me to weep over delight. I’m thankful for your service and then sincerely hope you know what an amazing job you’re putting in educating some other people thru your websites. Probably you have never come across all of us.",
|
441 |
+
"decoded_text": "I simply had to thank you very much once more. I’m not certain the things I might have sorted out without those methods documented by you relating to such a theme. It had been a real scary circumstance in my view, however, finding out your professional style you handled that took me to weep over delight. I’m thankful for your service and then sincerely hope you know what an amazing job you’re putting in educating some other people thru your websites. Probably you have never come across all of us.",
|
442 |
+
"diff": [
|
443 |
+
"delete text[221:222] --> decoded_text[221:221] ' ' --> ''"
|
444 |
+
],
|
445 |
+
"n_oov_chars": 0,
|
446 |
+
"oov_ratio": 0.0,
|
447 |
+
"oov_charset": "[]"
|
448 |
+
},
|
449 |
+
{
|
450 |
+
"text": "I genuinely enjoy reading on this internet site , it contains good content . “Words are, of course, the most powerful drug used by mankind.” by Rudyard Kipling.",
|
451 |
+
"decoded_text": "I genuinely enjoy reading on this internet site, it contains good content. “Words are, of course, the most powerful drug used by mankind.” by Rudyard Kipling.",
|
452 |
+
"diff": [
|
453 |
+
"delete text[47:48] --> decoded_text[47:47] ' ' --> ''",
|
454 |
+
"delete text[74:75] --> decoded_text[73:73] ' ' --> ''"
|
455 |
+
],
|
456 |
+
"n_oov_chars": 0,
|
457 |
+
"oov_ratio": 0.0,
|
458 |
+
"oov_charset": "[]"
|
459 |
+
},
|
460 |
+
{
|
461 |
+
"text": "Hiya very cool web site!! Man .. Excellent .. Wonderful .. I’ll bookmark your web site and take the feeds additionally¡KI am satisfied to search out a lot of useful info here within the post, we need develop extra strategies in this regard, thank you for sharing. . . . . .",
|
462 |
+
"decoded_text": "Hiya very cool web site!! Man.. Excellent.. Wonderful.. I’ll bookmark your web site and take the feeds additionally¡KI am satisfied to search out a lot of useful info here within the post, we need develop extra strategies in this regard, thank you for sharing......",
|
463 |
+
"diff": [
|
464 |
+
"delete text[29:30] --> decoded_text[29:29] ' ' --> ''",
|
465 |
+
"delete text[42:43] --> decoded_text[41:41] ' ' --> ''",
|
466 |
+
"delete text[55:56] --> decoded_text[53:53] ' ' --> ''",
|
467 |
+
"replace text[263:273] --> decoded_text[260:265] ' . . . . .' --> '.....'"
|
468 |
+
],
|
469 |
+
"n_oov_chars": 0,
|
470 |
+
"oov_ratio": 0.0,
|
471 |
+
"oov_charset": "[]"
|
472 |
+
},
|
473 |
+
{
|
474 |
+
"text": "I have been exploring for a little bit for any high quality articles or weblog posts on this sort of area . Exploring in Yahoo I ultimately stumbled upon this website. Studying this info So i am satisfied to convey that I have a very good uncanny feeling I discovered just what I needed. I so much indisputably will make sure to do not forget this web site and provides it a look regularly.",
|
475 |
+
"decoded_text": "I have been exploring for a little bit for any high quality articles or weblog posts on this sort of area. Exploring in Yahoo I ultimately stumbled upon this website. Studying this info So i am satisfied to convey that I have a very good uncanny feeling I discovered just what I needed. I so much indisputably will make sure to do not forget this web site and provides it a look regularly.",
|
476 |
+
"diff": [
|
477 |
+
"delete text[105:106] --> decoded_text[105:105] ' ' --> ''"
|
478 |
+
],
|
479 |
+
"n_oov_chars": 0,
|
480 |
+
"oov_ratio": 0.0,
|
481 |
+
"oov_charset": "[]"
|
482 |
+
},
|
483 |
+
{
|
484 |
+
"text": "Howdy very nice web site!! Man .. Beautiful .. Superb .. I will bookmark your blog and take the feeds also¡KI’m glad to search out a lot of useful information right here in the post, we need develop more techniques in this regard, thank you for sharing. . . . . .",
|
485 |
+
"decoded_text": "Howdy very nice web site!! Man.. Beautiful.. Superb.. I will bookmark your blog and take the feeds also¡KI’m glad to search out a lot of useful information right here in the post, we need develop more techniques in this regard, thank you for sharing......",
|
486 |
+
"diff": [
|
487 |
+
"delete text[30:31] --> decoded_text[30:30] ' ' --> ''",
|
488 |
+
"delete text[43:44] --> decoded_text[42:42] ' ' --> ''",
|
489 |
+
"delete text[53:54] --> decoded_text[51:51] ' ' --> ''",
|
490 |
+
"replace text[253:263] --> decoded_text[250:255] ' . . . . .' --> '.....'"
|
491 |
+
],
|
492 |
+
"n_oov_chars": 0,
|
493 |
+
"oov_ratio": 0.0,
|
494 |
+
"oov_charset": "[]"
|
495 |
+
},
|
496 |
+
{
|
497 |
+
"text": "I simply desired to say thanks all over again. I am not sure what I could possibly have worked on without these solutions documented by you directly on my industry. It had become a frustrating condition for me personally, nevertheless being able to view your skilled form you dealt with the issue forced me to cry for contentment. I’m just grateful for this guidance and as well , trust you find out what a powerful job that you’re putting in teaching some other people through the use of a blog. I know that you’ve never come across any of us.",
|
498 |
+
"decoded_text": "I simply desired to say thanks all over again. I am not sure what I could possibly have worked on without these solutions documented by you directly on my industry. It had become a frustrating condition for me personally, nevertheless being able to view your skilled form you dealt with the issue forced me to cry for contentment. I’m just grateful for this guidance and as well, trust you find out what a powerful job that you’re putting in teaching some other people through the use of a blog. I know that you’ve never come across any of us.",
|
499 |
+
"diff": [
|
500 |
+
"delete text[378:379] --> decoded_text[378:378] ' ' --> ''"
|
501 |
+
],
|
502 |
+
"n_oov_chars": 0,
|
503 |
+
"oov_ratio": 0.0,
|
504 |
+
"oov_charset": "[]"
|
505 |
+
},
|
506 |
+
{
|
507 |
+
"text": "I simply had to thank you so much once again. I am not sure the things I could possibly have gone through without the type of points contributed by you regarding this topic. Entirely was a real challenging circumstance for me, however , noticing the very specialized style you solved the issue took me to cry over contentment. Now i am grateful for this help and then sincerely hope you know what a powerful job your are accomplishing educating the others using your blog. Most likely you’ve never encountered any of us.",
|
508 |
+
"decoded_text": "I simply had to thank you so much once again. I am not sure the things I could possibly have gone through without the type of points contributed by you regarding this topic. Entirely was a real challenging circumstance for me, however, noticing the very specialized style you solved the issue took me to cry over contentment. Now i am grateful for this help and then sincerely hope you know what a powerful job your are accomplishing educating the others using your blog. Most likely you’ve never encountered any of us.",
|
509 |
+
"diff": [
|
510 |
+
"delete text[234:235] --> decoded_text[234:234] ' ' --> ''"
|
511 |
+
],
|
512 |
+
"n_oov_chars": 0,
|
513 |
+
"oov_ratio": 0.0,
|
514 |
+
"oov_charset": "[]"
|
515 |
+
},
|
516 |
+
{
|
517 |
+
"text": "Terrific paintings! That is the kind of info that are supposed to be shared around the internet. Shame on the seek engines for no longer positioning this publish higher! Come on over and seek advice from my website . Thank you =)",
|
518 |
+
"decoded_text": "Terrific paintings! That is the kind of info that are supposed to be shared around the internet. Shame on the seek engines for no longer positioning this publish higher! Come on over and seek advice from my website. Thank you =)",
|
519 |
+
"diff": [
|
520 |
+
"delete text[214:215] --> decoded_text[214:214] ' ' --> ''"
|
521 |
+
],
|
522 |
+
"n_oov_chars": 0,
|
523 |
+
"oov_ratio": 0.0,
|
524 |
+
"oov_charset": "[]"
|
525 |
+
},
|
526 |
+
{
|
527 |
+
"text": "Hi there very nice site!! Guy .. Beautiful .. Wonderful .. I will bookmark your web site and take the feeds additionally¡KI’m glad to search out a lot of useful information right here in the put up, we need develop more strategies on this regard, thanks for sharing. . . . . .",
|
528 |
+
"decoded_text": "Hi there very nice site!! Guy.. Beautiful.. Wonderful.. I will bookmark your web site and take the feeds additionally¡KI’m glad to search out a lot of useful information right here in the put up, we need develop more strategies on this regard, thanks for sharing......",
|
529 |
+
"diff": [
|
530 |
+
"delete text[29:30] --> decoded_text[29:29] ' ' --> ''",
|
531 |
+
"delete text[42:43] --> decoded_text[41:41] ' ' --> ''",
|
532 |
+
"delete text[55:56] --> decoded_text[53:53] ' ' --> ''",
|
533 |
+
"replace text[266:276] --> decoded_text[263:268] ' . . . . .' --> '.....'"
|
534 |
+
],
|
535 |
+
"n_oov_chars": 0,
|
536 |
+
"oov_ratio": 0.0,
|
537 |
+
"oov_charset": "[]"
|
538 |
+
},
|
539 |
+
{
|
540 |
+
"text": "Whats up very cool website!! Man .. Excellent .. Amazing .. I will bookmark your website and take the feeds also¡KI’m glad to search out numerous useful information right here in the post, we need work out extra strategies on this regard, thank you for sharing. . . . . .",
|
541 |
+
"decoded_text": "Whats up very cool website!! Man.. Excellent.. Amazing.. I will bookmark your website and take the feeds also¡KI’m glad to search out numerous useful information right here in the post, we need work out extra strategies on this regard, thank you for sharing......",
|
542 |
+
"diff": [
|
543 |
+
"delete text[32:33] --> decoded_text[32:32] ' ' --> ''",
|
544 |
+
"delete text[45:46] --> decoded_text[44:44] ' ' --> ''",
|
545 |
+
"delete text[56:57] --> decoded_text[54:54] ' ' --> ''",
|
546 |
+
"replace text[261:271] --> decoded_text[258:263] ' . . . . .' --> '.....'"
|
547 |
+
],
|
548 |
+
"n_oov_chars": 0,
|
549 |
+
"oov_ratio": 0.0,
|
550 |
+
"oov_charset": "[]"
|
551 |
+
},
|
552 |
+
{
|
553 |
+
"text": "Thanks , I have recently been looking for information about this topic for a long time and yours is the greatest I’ve found out till now. But, what in regards to the conclusion? Are you sure about the source?",
|
554 |
+
"decoded_text": "Thanks, I have recently been looking for information about this topic for a long time and yours is the greatest I’ve found out till now. But, what in regards to the conclusion? Are you sure about the source?",
|
555 |
+
"diff": [
|
556 |
+
"delete text[6:7] --> decoded_text[6:6] ' ' --> ''"
|
557 |
+
],
|
558 |
+
"n_oov_chars": 0,
|
559 |
+
"oov_ratio": 0.0,
|
560 |
+
"oov_charset": "[]"
|
561 |
+
},
|
562 |
+
{
|
563 |
+
"text": "I simply desired to say thanks once again. I do not know the things that I would’ve accomplished in the absence of the ways contributed by you regarding my theme. It absolutely was a frightful situation for me personally, but viewing a specialised fashion you processed the issue forced me to weep over gladness. I will be happy for the assistance and as well , pray you recognize what a powerful job your are doing teaching the others through a blog. Probably you have never come across all of us.",
|
564 |
+
"decoded_text": "I simply desired to say thanks once again. I do not know the things that I would’ve accomplished in the absence of the ways contributed by you regarding my theme. It absolutely was a frightful situation for me personally, but viewing a specialised fashion you processed the issue forced me to weep over gladness. I will be happy for the assistance and as well, pray you recognize what a powerful job your are doing teaching the others through a blog. Probably you have never come across all of us.",
|
565 |
+
"diff": [
|
566 |
+
"delete text[359:360] --> decoded_text[359:359] ' ' --> ''"
|
567 |
+
],
|
568 |
+
"n_oov_chars": 0,
|
569 |
+
"oov_ratio": 0.0,
|
570 |
+
"oov_charset": "[]"
|
571 |
+
},
|
572 |
+
{
|
573 |
+
"text": "Why hedge funds are Japan's only sane, liquid asset class _ Alternatives _ AsianInvestor . By continuing to use our website, you accept our Privacy Policy and Terms & Conditions.",
|
574 |
+
"decoded_text": "Why hedge funds are Japan's only sane, liquid asset class _ Alternatives _ AsianInvestor. By continuing to use our website, you accept our Privacy Policy and Terms & Conditions.",
|
575 |
+
"diff": [
|
576 |
+
"delete text[88:89] --> decoded_text[88:88] ' ' --> ''"
|
577 |
+
],
|
578 |
+
"n_oov_chars": 0,
|
579 |
+
"oov_ratio": 0.0,
|
580 |
+
"oov_charset": "[]"
|
581 |
+
},
|
582 |
+
{
|
583 |
+
"text": "i am seve months pregnant ,this is my first pregnancy i cant wait to hold my little prince",
|
584 |
+
"decoded_text": "i am seve months pregnant,this is my first pregnancy i cant wait to hold my little prince",
|
585 |
+
"diff": [
|
586 |
+
"delete text[25:26] --> decoded_text[25:25] ' ' --> ''"
|
587 |
+
],
|
588 |
+
"n_oov_chars": 0,
|
589 |
+
"oov_ratio": 0.0,
|
590 |
+
"oov_charset": "[]"
|
591 |
+
},
|
592 |
+
{
|
593 |
+
"text": "im so scared because this is my first kid and im 18 and all my babys dady wants to do is be with other women and its like he dose not care about the baby he just cares about himself i don't have a clue on what to do or say to him ??????????????",
|
594 |
+
"decoded_text": "im so scared because this is my first kid and im 18 and all my babys dady wants to do is be with other women and its like he dose not care about the baby he just cares about himself i don't have a clue on what to do or say to him??????????????",
|
595 |
+
"diff": [
|
596 |
+
"replace text[229:244] --> decoded_text[229:243] ' ??????????????' --> '??????????????'"
|
597 |
+
],
|
598 |
+
"n_oov_chars": 0,
|
599 |
+
"oov_ratio": 0.0,
|
600 |
+
"oov_charset": "[]"
|
601 |
+
},
|
602 |
+
{
|
603 |
+
"text": "Yes there are some very strange women out there, scary thing is that they are pregnant these spiteful women and they are going to raise the children of our future, futures not lookin so bright with retarded parents !",
|
604 |
+
"decoded_text": "Yes there are some very strange women out there, scary thing is that they are pregnant these spiteful women and they are going to raise the children of our future, futures not lookin so bright with retarded parents!",
|
605 |
+
"diff": [
|
606 |
+
"delete text[214:215] --> decoded_text[214:214] ' ' --> ''"
|
607 |
+
],
|
608 |
+
"n_oov_chars": 0,
|
609 |
+
"oov_ratio": 0.0,
|
610 |
+
"oov_charset": "[]"
|
611 |
+
},
|
612 |
+
{
|
613 |
+
"text": "dissect that reason because what you might find is that all that 'smoke and mirrors' talk about why you shouldn't be doing something",
|
614 |
+
"decoded_text": "dissect that reason because what you might find is that all that'smoke and mirrors' talk about why you shouldn't be doing something",
|
615 |
+
"diff": [
|
616 |
+
"delete text[64:65] --> decoded_text[64:64] ' ' --> ''"
|
617 |
+
],
|
618 |
+
"n_oov_chars": 0,
|
619 |
+
"oov_ratio": 0.0,
|
620 |
+
"oov_charset": "[]"
|
621 |
+
},
|
622 |
+
{
|
623 |
+
"text": "What we offer here is not a hack , but a marketing of our otherwise compensated services.",
|
624 |
+
"decoded_text": "What we offer here is not a hack, but a marketing of our otherwise compensated services.",
|
625 |
+
"diff": [
|
626 |
+
"delete text[32:33] --> decoded_text[32:32] ' ' --> ''"
|
627 |
+
],
|
628 |
+
"n_oov_chars": 0,
|
629 |
+
"oov_ratio": 0.0,
|
630 |
+
"oov_charset": "[]"
|
631 |
+
},
|
632 |
+
{
|
633 |
+
"text": "that ca n't be comprehended by me.",
|
634 |
+
"decoded_text": "that can't be comprehended by me.",
|
635 |
+
"diff": [
|
636 |
+
"delete text[7:8] --> decoded_text[7:7] ' ' --> ''"
|
637 |
+
],
|
638 |
+
"n_oov_chars": 0,
|
639 |
+
"oov_ratio": 0.0,
|
640 |
+
"oov_charset": "[]"
|
641 |
+
},
|
642 |
+
{
|
643 |
+
"text": "The easiest way to seek for it's with , the most important unclaimed money database on the planet.",
|
644 |
+
"decoded_text": "The easiest way to seek for it's with, the most important unclaimed money database on the planet.",
|
645 |
+
"diff": [
|
646 |
+
"delete text[37:38] --> decoded_text[37:37] ' ' --> ''"
|
647 |
+
],
|
648 |
+
"n_oov_chars": 0,
|
649 |
+
"oov_ratio": 0.0,
|
650 |
+
"oov_charset": "[]"
|
651 |
+
},
|
652 |
+
{
|
653 |
+
"text": "Hi there, i believe that i noticed you seen my own blog thus my partner and i reached rewind this choose? . I am just wanting to to find items to strengthen this site! I guess their sufficient to make use of some of your own aspects!",
|
654 |
+
"decoded_text": "Hi there, i believe that i noticed you seen my own blog thus my partner and i reached rewind this choose?. I am just wanting to to find items to strengthen this site! I guess their sufficient to make use of some of your own aspects!",
|
655 |
+
"diff": [
|
656 |
+
"delete text[105:106] --> decoded_text[105:105] ' ' --> ''"
|
657 |
+
],
|
658 |
+
"n_oov_chars": 0,
|
659 |
+
"oov_ratio": 0.0,
|
660 |
+
"oov_charset": "[]"
|
661 |
+
},
|
662 |
+
{
|
663 |
+
"text": "You managed to hit the nail upon the top and defined out the whole thing without having side effect , people could",
|
664 |
+
"decoded_text": "You managed to hit the nail upon the top and defined out the whole thing without having side effect, people could",
|
665 |
+
"diff": [
|
666 |
+
"delete text[99:100] --> decoded_text[99:99] ' ' --> ''"
|
667 |
+
],
|
668 |
+
"n_oov_chars": 0,
|
669 |
+
"oov_ratio": 0.0,
|
670 |
+
"oov_charset": "[]"
|
671 |
+
},
|
672 |
+
{
|
673 |
+
"text": "Sure, it was the name of a white abolitionist ' from Ali's own",
|
674 |
+
"decoded_text": "Sure, it was the name of a white abolitionist'from Ali's own",
|
675 |
+
"diff": [
|
676 |
+
"delete text[45:46] --> decoded_text[45:45] ' ' --> ''",
|
677 |
+
"delete text[47:48] --> decoded_text[46:46] ' ' --> ''"
|
678 |
+
],
|
679 |
+
"n_oov_chars": 0,
|
680 |
+
"oov_ratio": 0.0,
|
681 |
+
"oov_charset": "[]"
|
682 |
+
},
|
683 |
+
{
|
684 |
+
"text": "the fact that these programmes are full of 'stars' who want",
|
685 |
+
"decoded_text": "the fact that these programmes are full of'stars' who want",
|
686 |
+
"diff": [
|
687 |
+
"delete text[42:43] --> decoded_text[42:42] ' ' --> ''"
|
688 |
+
],
|
689 |
+
"n_oov_chars": 0,
|
690 |
+
"oov_ratio": 0.0,
|
691 |
+
"oov_charset": "[]"
|
692 |
+
},
|
693 |
+
{
|
694 |
+
"text": "You managed to hit the nail upon the top as smartly as outlined out the entire thing without having side-effects , folks can take a signal.",
|
695 |
+
"decoded_text": "You managed to hit the nail upon the top as smartly as outlined out the entire thing without having side-effects, folks can take a signal.",
|
696 |
+
"diff": [
|
697 |
+
"delete text[112:113] --> decoded_text[112:112] ' ' --> ''"
|
698 |
+
],
|
699 |
+
"n_oov_chars": 0,
|
700 |
+
"oov_ratio": 0.0,
|
701 |
+
"oov_charset": "[]"
|
702 |
+
},
|
703 |
+
{
|
704 |
+
"text": "I've been exploring for a little bit for any high-quality articles or weblog posts in this sort of space . Exploring in Yahoo I finally stumbled upon this website. Reading this information So i am glad to convey that I've an incredibly excellent uncanny feeling I discovered exactly what I needed. I so much for sure will make sure to do not disregard this web site and give it a look regularly.",
|
705 |
+
"decoded_text": "I've been exploring for a little bit for any high-quality articles or weblog posts in this sort of space. Exploring in Yahoo I finally stumbled upon this website. Reading this information So i am glad to convey that I've an incredibly excellent uncanny feeling I discovered exactly what I needed. I so much for sure will make sure to do not disregard this web site and give it a look regularly.",
|
706 |
+
"diff": [
|
707 |
+
"delete text[104:105] --> decoded_text[104:104] ' ' --> ''"
|
708 |
+
],
|
709 |
+
"n_oov_chars": 0,
|
710 |
+
"oov_ratio": 0.0,
|
711 |
+
"oov_charset": "[]"
|
712 |
+
},
|
713 |
+
{
|
714 |
+
"text": "top and defined out the entire thing with no need side-effects , other folks can take a signal.",
|
715 |
+
"decoded_text": "top and defined out the entire thing with no need side-effects, other folks can take a signal.",
|
716 |
+
"diff": [
|
717 |
+
"delete text[62:63] --> decoded_text[62:62] ' ' --> ''"
|
718 |
+
],
|
719 |
+
"n_oov_chars": 0,
|
720 |
+
"oov_ratio": 0.0,
|
721 |
+
"oov_charset": "[]"
|
722 |
+
},
|
723 |
+
{
|
724 |
+
"text": "Hi there, i believe that i personally noticed you actually frequented the blog as a result my spouse and i stumbled on return back your favor? . I'm just looking to to seek out what to enhance my website! I reckon that it's blog pendidikan adequate to work with several of your ideas!",
|
725 |
+
"decoded_text": "Hi there, i believe that i personally noticed you actually frequented the blog as a result my spouse and i stumbled on return back your favor?. I'm just looking to to seek out what to enhance my website! I reckon that it's blog pendidikan adequate to work with several of your ideas!",
|
726 |
+
"diff": [
|
727 |
+
"delete text[142:143] --> decoded_text[142:142] ' ' --> ''"
|
728 |
+
],
|
729 |
+
"n_oov_chars": 0,
|
730 |
+
"oov_ratio": 0.0,
|
731 |
+
"oov_charset": "[]"
|
732 |
+
},
|
733 |
+
{
|
734 |
+
"text": "Places To Stay On The Big Island is free HD wallpaper. This wallpaper was upload at December 12, 2018 upload by admin in .You can download it in your computer by clicking resolution image in Download by size:. Don't forget to rate and comment if you interest with this wallpaper.",
|
735 |
+
"decoded_text": "Places To Stay On The Big Island is free HD wallpaper. This wallpaper was upload at December 12, 2018 upload by admin in.You can download it in your computer by clicking resolution image in Download by size:. Don't forget to rate and comment if you interest with this wallpaper.",
|
736 |
+
"diff": [
|
737 |
+
"delete text[120:121] --> decoded_text[120:120] ' ' --> ''"
|
738 |
+
],
|
739 |
+
"n_oov_chars": 0,
|
740 |
+
"oov_ratio": 0.0,
|
741 |
+
"oov_charset": "[]"
|
742 |
+
},
|
743 |
+
{
|
744 |
+
"text": "LADY T.: ... But they are more respectable than the Socialists.",
|
745 |
+
"decoded_text": "LADY T.:... But they are more respectable than the Socialists.",
|
746 |
+
"diff": [
|
747 |
+
"delete text[8:9] --> decoded_text[8:8] ' ' --> ''"
|
748 |
+
],
|
749 |
+
"n_oov_chars": 0,
|
750 |
+
"oov_ratio": 0.0,
|
751 |
+
"oov_charset": "[]"
|
752 |
+
},
|
753 |
+
{
|
754 |
+
"text": "LADY T.: You needn't be sarcastic with me. (The emphasis is on 'me')",
|
755 |
+
"decoded_text": "LADY T.: You needn't be sarcastic with me. (The emphasis is on'me')",
|
756 |
+
"diff": [
|
757 |
+
"delete text[62:63] --> decoded_text[62:62] ' ' --> ''"
|
758 |
+
],
|
759 |
+
"n_oov_chars": 0,
|
760 |
+
"oov_ratio": 0.0,
|
761 |
+
"oov_charset": "[]"
|
762 |
+
},
|
763 |
+
{
|
764 |
+
"text": "LADY C.: (counting) ... six, seven, eight, nine. Good gracious!",
|
765 |
+
"decoded_text": "LADY C.: (counting)... six, seven, eight, nine. Good gracious!",
|
766 |
+
"diff": [
|
767 |
+
"delete text[19:20] --> decoded_text[19:19] ' ' --> ''"
|
768 |
+
],
|
769 |
+
"n_oov_chars": 0,
|
770 |
+
"oov_ratio": 0.0,
|
771 |
+
"oov_charset": "[]"
|
772 |
+
},
|
773 |
+
{
|
774 |
+
"text": "a perfect fifth) which I take to be 'something to keep the wet out'.",
|
775 |
+
"decoded_text": "a perfect fifth) which I take to be'something to keep the wet out'.",
|
776 |
+
"diff": [
|
777 |
+
"delete text[35:36] --> decoded_text[35:35] ' ' --> ''"
|
778 |
+
],
|
779 |
+
"n_oov_chars": 0,
|
780 |
+
"oov_ratio": 0.0,
|
781 |
+
"oov_charset": "[]"
|
782 |
+
},
|
783 |
+
{
|
784 |
+
"text": "LADY C.: But you also represent, I take it, the ...",
|
785 |
+
"decoded_text": "LADY C.: But you also represent, I take it, the...",
|
786 |
+
"diff": [
|
787 |
+
"delete text[47:48] --> decoded_text[47:47] ' ' --> ''"
|
788 |
+
],
|
789 |
+
"n_oov_chars": 0,
|
790 |
+
"oov_ratio": 0.0,
|
791 |
+
"oov_charset": "[]"
|
792 |
+
},
|
793 |
+
{
|
794 |
+
"text": "LADY C.: But I feel that it has. If I had foreseen ... If I ...",
|
795 |
+
"decoded_text": "LADY C.: But I feel that it has. If I had foreseen... If I...",
|
796 |
+
"diff": [
|
797 |
+
"delete text[50:51] --> decoded_text[50:50] ' ' --> ''",
|
798 |
+
"delete text[59:60] --> decoded_text[58:58] ' ' --> ''"
|
799 |
+
],
|
800 |
+
"n_oov_chars": 0,
|
801 |
+
"oov_ratio": 0.0,
|
802 |
+
"oov_charset": "[]"
|
803 |
+
},
|
804 |
+
{
|
805 |
+
"text": "especially if the American has superior advantages in the way of climate and other things. ,",
|
806 |
+
"decoded_text": "especially if the American has superior advantages in the way of climate and other things. ,",
|
807 |
+
"diff": [
|
808 |
+
"delete text[93:94] --> decoded_text[93:93] ' ' --> ''"
|
809 |
+
],
|
810 |
+
"n_oov_chars": 0,
|
811 |
+
"oov_ratio": 0.0,
|
812 |
+
"oov_charset": "[]"
|
813 |
+
},
|
814 |
+
{
|
815 |
+
"text": "(a) The name, address and occupation of the applicant—Orland P. ,",
|
816 |
+
"decoded_text": "(a) The name, address and occupation of the applicant—Orland P.,",
|
817 |
+
"diff": [
|
818 |
+
"delete text[64:65] --> decoded_text[64:64] ' ' --> ''"
|
819 |
+
],
|
820 |
+
"n_oov_chars": 0,
|
821 |
+
"oov_ratio": 0.0,
|
822 |
+
"oov_charset": "[]"
|
823 |
+
},
|
824 |
+
{
|
825 |
+
"text": "is the first step towards the exploitation and .the subordination of",
|
826 |
+
"decoded_text": "is the first step towards the exploitation and.the subordination of",
|
827 |
+
"diff": [
|
828 |
+
"delete text[46:47] --> decoded_text[46:46] ' ' --> ''"
|
829 |
+
],
|
830 |
+
"n_oov_chars": 0,
|
831 |
+
"oov_ratio": 0.0,
|
832 |
+
"oov_charset": "[]"
|
833 |
+
},
|
834 |
+
{
|
835 |
+
"text": "Have you stopped to consider the saving of $ $ $ on the House Furnishing you require ?",
|
836 |
+
"decoded_text": "Have you stopped to consider the saving of $ $ $ on the House Furnishing you require?",
|
837 |
+
"diff": [
|
838 |
+
"delete text[84:85] --> decoded_text[84:84] ' ' --> ''"
|
839 |
+
],
|
840 |
+
"n_oov_chars": 0,
|
841 |
+
"oov_ratio": 0.0,
|
842 |
+
"oov_charset": "[]"
|
843 |
+
},
|
844 |
+
{
|
845 |
+
"text": "\"At the .very best, the whole matter of the agreement is an experiment— a foolish interference with",
|
846 |
+
"decoded_text": "\"At the.very best, the whole matter of the agreement is an experiment— a foolish interference with",
|
847 |
+
"diff": [
|
848 |
+
"delete text[7:8] --> decoded_text[7:7] ' ' --> ''"
|
849 |
+
],
|
850 |
+
"n_oov_chars": 0,
|
851 |
+
"oov_ratio": 0.0,
|
852 |
+
"oov_charset": "[]"
|
853 |
+
},
|
854 |
+
{
|
855 |
+
"text": "My 7 years old boys is autistic and we live in 3 bed rooms house in the best neighborhood in San Diego ... more",
|
856 |
+
"decoded_text": "My 7 years old boys is autistic and we live in 3 bed rooms house in the best neighborhood in San Diego... more",
|
857 |
+
"diff": [
|
858 |
+
"delete text[102:103] --> decoded_text[102:102] ' ' --> ''"
|
859 |
+
],
|
860 |
+
"n_oov_chars": 0,
|
861 |
+
"oov_ratio": 0.0,
|
862 |
+
"oov_charset": "[]"
|
863 |
+
},
|
864 |
+
{
|
865 |
+
"text": "take full responsibility of managing my kids time, extra activities, play dates, pick up and drop off to school, keep routine in place accompanying them, follow up school work ... more",
|
866 |
+
"decoded_text": "take full responsibility of managing my kids time, extra activities, play dates, pick up and drop off to school, keep routine in place accompanying them, follow up school work... more",
|
867 |
+
"diff": [
|
868 |
+
"delete text[175:176] --> decoded_text[175:175] ' ' --> ''"
|
869 |
+
],
|
870 |
+
"n_oov_chars": 0,
|
871 |
+
"oov_ratio": 0.0,
|
872 |
+
"oov_charset": "[]"
|
873 |
+
},
|
874 |
+
{
|
875 |
+
"text": "Skills Required: Perform a variety of cleaning activities such as washing, ironing, sweeping, mopping, dusting and polishing. Adhere strictly to rules regarding health and safety. Ability to work with little supervision and maintain ...",
|
876 |
+
"decoded_text": "Skills Required: Perform a variety of cleaning activities such as washing, ironing, sweeping, mopping, dusting and polishing. Adhere strictly to rules regarding health and safety. Ability to work with little supervision and maintain...",
|
877 |
+
"diff": [
|
878 |
+
"replace text[232:236] --> decoded_text[232:235] ' ...' --> '...'"
|
879 |
+
],
|
880 |
+
"n_oov_chars": 0,
|
881 |
+
"oov_ratio": 0.0,
|
882 |
+
"oov_charset": "[]"
|
883 |
+
},
|
884 |
+
{
|
885 |
+
"text": "Hello, We are small family husband and wife with little girl and expected to have new baby girl soon. We need housekeeper with experience. she is willing to learn cocking some of our dishes. No much work. We will provide accommodation, ... more",
|
886 |
+
"decoded_text": "Hello, We are small family husband and wife with little girl and expected to have new baby girl soon. We need housekeeper with experience. she is willing to learn cocking some of our dishes. No much work. We will provide accommodation,... more",
|
887 |
+
"diff": [
|
888 |
+
"replace text[235:244] --> decoded_text[235:243] ' ... more' --> '... more'"
|
889 |
+
],
|
890 |
+
"n_oov_chars": 0,
|
891 |
+
"oov_ratio": 0.0,
|
892 |
+
"oov_charset": "[]"
|
893 |
+
},
|
894 |
+
{
|
895 |
+
"text": "I am attending Plovdiv medical school and needs someone to stay with my kids at my apartment until I finish my classes and come back home, appreciate quick response ... more",
|
896 |
+
"decoded_text": "I am attending Plovdiv medical school and needs someone to stay with my kids at my apartment until I finish my classes and come back home, appreciate quick response... more",
|
897 |
+
"diff": [
|
898 |
+
"delete text[164:165] --> decoded_text[164:164] ' ' --> ''"
|
899 |
+
],
|
900 |
+
"n_oov_chars": 0,
|
901 |
+
"oov_ratio": 0.0,
|
902 |
+
"oov_charset": "[]"
|
903 |
+
},
|
904 |
+
{
|
905 |
+
"text": "I need for a personal assistant for our fashion manager at one of the biggest companies in Kuwait. If interested please ... more",
|
906 |
+
"decoded_text": "I need for a personal assistant for our fashion manager at one of the biggest companies in Kuwait. If interested please... more",
|
907 |
+
"diff": [
|
908 |
+
"delete text[119:120] --> decoded_text[119:119] ' ' --> ''"
|
909 |
+
],
|
910 |
+
"n_oov_chars": 0,
|
911 |
+
"oov_ratio": 0.0,
|
912 |
+
"oov_charset": "[]"
|
913 |
+
},
|
914 |
+
{
|
915 |
+
"text": "We are looking for Private Nurses(Basic Nursing Qualification required) for Elders, Parents, b Opening is in one of the royal family in Dammam. Taking care of old Parents(AGE 70 to 80) ,having knowledge of giving medicines. Traveling ...",
|
916 |
+
"decoded_text": "We are looking for Private Nurses(Basic Nursing Qualification required) for Elders, Parents, b Opening is in one of the royal family in Dammam. Taking care of old Parents(AGE 70 to 80),having knowledge of giving medicines. Traveling...",
|
917 |
+
"diff": [
|
918 |
+
"delete text[184:185] --> decoded_text[184:184] ' ' --> ''",
|
919 |
+
"replace text[233:237] --> decoded_text[232:235] ' ...' --> '...'"
|
920 |
+
],
|
921 |
+
"n_oov_chars": 0,
|
922 |
+
"oov_ratio": 0.0,
|
923 |
+
"oov_charset": "[]"
|
924 |
+
},
|
925 |
+
{
|
926 |
+
"text": "It’s a long list To write it all down , but I will fill you up via face time interview or call . Mostly it’s two jobs Personal assistance to the employer and her children and light house keeping and it involve a lot off traveling .... more",
|
927 |
+
"decoded_text": "It’s a long list To write it all down, but I will fill you up via face time interview or call. Mostly it’s two jobs Personal assistance to the employer and her children and light house keeping and it involve a lot off traveling.... more",
|
928 |
+
"diff": [
|
929 |
+
"delete text[37:38] --> decoded_text[37:37] ' ' --> ''",
|
930 |
+
"delete text[94:95] --> decoded_text[93:93] ' ' --> ''",
|
931 |
+
"delete text[229:230] --> decoded_text[227:227] ' ' --> ''"
|
932 |
+
],
|
933 |
+
"n_oov_chars": 0,
|
934 |
+
"oov_ratio": 0.0,
|
935 |
+
"oov_charset": "[]"
|
936 |
+
},
|
937 |
+
{
|
938 |
+
"text": "I am looking for a helper who can start IMMEDIATELY for the next 2 weeks. We are a small family of 4 who will have guests staying with us for almost 2 weeks and the extra cleaning help will be needed for the time they are here. Live in ... more",
|
939 |
+
"decoded_text": "I am looking for a helper who can start IMMEDIATELY for the next 2 weeks. We are a small family of 4 who will have guests staying with us for almost 2 weeks and the extra cleaning help will be needed for the time they are here. Live in... more",
|
940 |
+
"diff": [
|
941 |
+
"replace text[235:244] --> decoded_text[235:243] ' ... more' --> '... more'"
|
942 |
+
],
|
943 |
+
"n_oov_chars": 0,
|
944 |
+
"oov_ratio": 0.0,
|
945 |
+
"oov_charset": "[]"
|
946 |
+
},
|
947 |
+
{
|
948 |
+
"text": "We are living in Langata, Karen, Nairobi. Would provide your own room and bathroom. We are a very caring family, and looking for a very caring person to join our family. ... more",
|
949 |
+
"decoded_text": "We are living in Langata, Karen, Nairobi. Would provide your own room and bathroom. We are a very caring family, and looking for a very caring person to join our family.... more",
|
950 |
+
"diff": [
|
951 |
+
"delete text[169:170] --> decoded_text[169:169] ' ' --> ''"
|
952 |
+
],
|
953 |
+
"n_oov_chars": 0,
|
954 |
+
"oov_ratio": 0.0,
|
955 |
+
"oov_charset": "[]"
|
956 |
+
},
|
957 |
+
{
|
958 |
+
"text": "Hello, We are looking for a housemaid/nanny for our home. We have two children, aged 2 and 4. The children go to school from 8am to 3.30pk. We live in a small house as follows: First floor: 1 Living Room for Guests which is ... more",
|
959 |
+
"decoded_text": "Hello, We are looking for a housemaid/nanny for our home. We have two children, aged 2 and 4. The children go to school from 8am to 3.30pk. We live in a small house as follows: First floor: 1 Living Room for Guests which is... more",
|
960 |
+
"diff": [
|
961 |
+
"replace text[223:232] --> decoded_text[223:231] ' ... more' --> '... more'"
|
962 |
+
],
|
963 |
+
"n_oov_chars": 0,
|
964 |
+
"oov_ratio": 0.0,
|
965 |
+
"oov_charset": "[]"
|
966 |
+
},
|
967 |
+
{
|
968 |
+
"text": "So far we have 2 nurses and looking for an additional one + 1 care giver to work on 12 hours shift duties, one day off day when the team is complete. If not then we offer overtime for the extra days. We look forward to have you as a ... more",
|
969 |
+
"decoded_text": "So far we have 2 nurses and looking for an additional one + 1 care giver to work on 12 hours shift duties, one day off day when the team is complete. If not then we offer overtime for the extra days. We look forward to have you as a... more",
|
970 |
+
"diff": [
|
971 |
+
"replace text[232:241] --> decoded_text[232:240] ' ... more' --> '... more'"
|
972 |
+
],
|
973 |
+
"n_oov_chars": 0,
|
974 |
+
"oov_ratio": 0.0,
|
975 |
+
"oov_charset": "[]"
|
976 |
+
},
|
977 |
+
{
|
978 |
+
"text": "We 4 persons my wife, son, my mother in law's and me ... more",
|
979 |
+
"decoded_text": "We 4 persons my wife, son, my mother in law's and me... more",
|
980 |
+
"diff": [
|
981 |
+
"delete text[52:53] --> decoded_text[52:52] ' ' --> ''"
|
982 |
+
],
|
983 |
+
"n_oov_chars": 0,
|
984 |
+
"oov_ratio": 0.0,
|
985 |
+
"oov_charset": "[]"
|
986 |
+
},
|
987 |
+
{
|
988 |
+
"text": "I need an experienced housekeeper who know how to work with cleaning machines all kinds. Im about to start my cleaning company. for a start there will be a salary for first 6 months. if we pull it off and secceed. I will make salary by ... more",
|
989 |
+
"decoded_text": "I need an experienced housekeeper who know how to work with cleaning machines all kinds. Im about to start my cleaning company. for a start there will be a salary for first 6 months. if we pull it off and secceed. I will make salary by... more",
|
990 |
+
"diff": [
|
991 |
+
"replace text[235:244] --> decoded_text[235:243] ' ... more' --> '... more'"
|
992 |
+
],
|
993 |
+
"n_oov_chars": 0,
|
994 |
+
"oov_ratio": 0.0,
|
995 |
+
"oov_charset": "[]"
|
996 |
+
},
|
997 |
+
{
|
998 |
+
"text": "We are a Kuwaiti couple with a baby on the way ,we would like in our housekeeper to assist us and be true to her words and work. We would love to have a respectful employer-employee relationship. We can have an arrangement that suit a... more",
|
999 |
+
"decoded_text": "We are a Kuwaiti couple with a baby on the way,we would like in our housekeeper to assist us and be true to her words and work. We would love to have a respectful employer-employee relationship. We can have an arrangement that suit a... more",
|
1000 |
+
"diff": [
|
1001 |
+
"delete text[46:47] --> decoded_text[46:46] ' ' --> ''"
|
1002 |
+
],
|
1003 |
+
"n_oov_chars": 0,
|
1004 |
+
"oov_ratio": 0.0,
|
1005 |
+
"oov_charset": "[]"
|
1006 |
+
},
|
1007 |
+
{
|
1008 |
+
"text": "Hello .. i want someone who like and love baby.. my baby is very friendly and he is love playing ... more",
|
1009 |
+
"decoded_text": "Hello.. i want someone who like and love baby.. my baby is very friendly and he is love playing... more",
|
1010 |
+
"diff": [
|
1011 |
+
"delete text[5:6] --> decoded_text[5:5] ' ' --> ''",
|
1012 |
+
"delete text[96:97] --> decoded_text[95:95] ' ' --> ''"
|
1013 |
+
],
|
1014 |
+
"n_oov_chars": 0,
|
1015 |
+
"oov_ratio": 0.0,
|
1016 |
+
"oov_charset": "[]"
|
1017 |
+
},
|
1018 |
+
{
|
1019 |
+
"text": "I require an assistant who speaks fluent English and Armenian. The job is to basically assign you with tasks to search for suppliers and contractors who can do specific tasks, negotiate with them, and report back to me the findings. In ... more",
|
1020 |
+
"decoded_text": "I require an assistant who speaks fluent English and Armenian. The job is to basically assign you with tasks to search for suppliers and contractors who can do specific tasks, negotiate with them, and report back to me the findings. In... more",
|
1021 |
+
"diff": [
|
1022 |
+
"replace text[235:244] --> decoded_text[235:243] ' ... more' --> '... more'"
|
1023 |
+
],
|
1024 |
+
"n_oov_chars": 0,
|
1025 |
+
"oov_ratio": 0.0,
|
1026 |
+
"oov_charset": "[]"
|
1027 |
+
},
|
1028 |
+
{
|
1029 |
+
"text": "Yeah ... no thanks. When I make an appointment next year I'll ask for Demerol/Versed, which worked fine the first time. My father, a surgeon, always emphasized the slim but serious risks of anesthesia. Never be put under unless it's a medical necessity, he told me. So skipping the propofol seems like a wise medical approach, as well as a money-saver.",
|
1030 |
+
"decoded_text": "Yeah... no thanks. When I make an appointment next year I'll ask for Demerol/Versed, which worked fine the first time. My father, a surgeon, always emphasized the slim but serious risks of anesthesia. Never be put under unless it's a medical necessity, he told me. So skipping the propofol seems like a wise medical approach, as well as a money-saver.",
|
1031 |
+
"diff": [
|
1032 |
+
"delete text[4:5] --> decoded_text[4:4] ' ' --> ''"
|
1033 |
+
],
|
1034 |
+
"n_oov_chars": 0,
|
1035 |
+
"oov_ratio": 0.0,
|
1036 |
+
"oov_charset": "[]"
|
1037 |
+
},
|
1038 |
+
{
|
1039 |
+
"text": "Carolinas HealthCare System is still offering financial aid to help some low-income patients pay insurance premiums for 2015. As I reported ...",
|
1040 |
+
"decoded_text": "Carolinas HealthCare System is still offering financial aid to help some low-income patients pay insurance premiums for 2015. As I reported...",
|
1041 |
+
"diff": [
|
1042 |
+
"delete text[139:140] --> decoded_text[139:139] ' ' --> ''"
|
1043 |
+
],
|
1044 |
+
"n_oov_chars": 0,
|
1045 |
+
"oov_ratio": 0.0,
|
1046 |
+
"oov_charset": "[]"
|
1047 |
+
},
|
1048 |
+
{
|
1049 |
+
"text": "He’ll be 27 in March and just came off the best season of his career, hitting .300/.389/.474 with 13 HR, and 11 triples.",
|
1050 |
+
"decoded_text": "He’ll be 27 in March and just came off the best season of his career, hitting.300/.389/.474 with 13 HR, and 11 triples.",
|
1051 |
+
"diff": [
|
1052 |
+
"delete text[77:78] --> decoded_text[77:77] ' ' --> ''"
|
1053 |
+
],
|
1054 |
+
"n_oov_chars": 0,
|
1055 |
+
"oov_ratio": 0.0,
|
1056 |
+
"oov_charset": "[]"
|
1057 |
+
},
|
1058 |
+
{
|
1059 |
+
"text": "You don’t freak out when 25 year old Alex Rodriguez strikes out 135 times because he’s hitting .318/.399/.622 with 52 bombs and 18 steals and he’s playing GG quality defense.",
|
1060 |
+
"decoded_text": "You don’t freak out when 25 year old Alex Rodriguez strikes out 135 times because he’s hitting.318/.399/.622 with 52 bombs and 18 steals and he’s playing GG quality defense.",
|
1061 |
+
"diff": [
|
1062 |
+
"delete text[94:95] --> decoded_text[94:94] ' ' --> ''"
|
1063 |
+
],
|
1064 |
+
"n_oov_chars": 0,
|
1065 |
+
"oov_ratio": 0.0,
|
1066 |
+
"oov_charset": "[]"
|
1067 |
+
},
|
1068 |
+
{
|
1069 |
+
"text": "46 of those players OPSed .800 or more.",
|
1070 |
+
"decoded_text": "46 of those players OPSed.800 or more.",
|
1071 |
+
"diff": [
|
1072 |
+
"delete text[25:26] --> decoded_text[25:25] ' ' --> ''"
|
1073 |
+
],
|
1074 |
+
"n_oov_chars": 0,
|
1075 |
+
"oov_ratio": 0.0,
|
1076 |
+
"oov_charset": "[]"
|
1077 |
+
},
|
1078 |
+
{
|
1079 |
+
"text": "From May 20th until June 29th Lee had an ERA of 5.68 and opposing batters hit over .300 off of him. During that almost six week stretch, which made up over 25% of his starts in ‘12, he was awful and run support was irrelevant as he put the Phillies back on their heels in start after start.",
|
1080 |
+
"decoded_text": "From May 20th until June 29th Lee had an ERA of 5.68 and opposing batters hit over.300 off of him. During that almost six week stretch, which made up over 25% of his starts in ‘12, he was awful and run support was irrelevant as he put the Phillies back on their heels in start after start.",
|
1081 |
+
"diff": [
|
1082 |
+
"delete text[82:83] --> decoded_text[82:82] ' ' --> ''"
|
1083 |
+
],
|
1084 |
+
"n_oov_chars": 0,
|
1085 |
+
"oov_ratio": 0.0,
|
1086 |
+
"oov_charset": "[]"
|
1087 |
+
},
|
1088 |
+
{
|
1089 |
+
"text": "There will be the first EVENT for 2nd CBT ! If you have any interest just join !",
|
1090 |
+
"decoded_text": "There will be the first EVENT for 2nd CBT! If you have any interest just join!",
|
1091 |
+
"diff": [
|
1092 |
+
"delete text[41:42] --> decoded_text[41:41] ' ' --> ''",
|
1093 |
+
"delete text[78:79] --> decoded_text[77:77] ' ' --> ''"
|
1094 |
+
],
|
1095 |
+
"n_oov_chars": 0,
|
1096 |
+
"oov_ratio": 0.0,
|
1097 |
+
"oov_charset": "[]"
|
1098 |
+
},
|
1099 |
+
{
|
1100 |
+
"text": "\"A film — it should be seen on a screen,\" she says. \"You should be able to witness it at the same proportion or bigger than life. ... I guess maybe it would make the job a little easier — I wouldn't have to worry about if the third button matched — but I don't want to do it that way.\"",
|
1101 |
+
"decoded_text": "\"A film — it should be seen on a screen,\" she says. \"You should be able to witness it at the same proportion or bigger than life.... I guess maybe it would make the job a little easier — I wouldn't have to worry about if the third button matched — but I don't want to do it that way.\"",
|
1102 |
+
"diff": [
|
1103 |
+
"delete text[129:130] --> decoded_text[129:129] ' ' --> ''"
|
1104 |
+
],
|
1105 |
+
"n_oov_chars": 0,
|
1106 |
+
"oov_ratio": 0.0,
|
1107 |
+
"oov_charset": "[]"
|
1108 |
+
},
|
1109 |
+
{
|
1110 |
+
"text": "The travel takes a toll; Dresser has two young children and he wants to watch them grow up. And it isn't just his kids who notice he's gone, he says: \"After I came back [shooting] in North Carolina ... my dry cleaner asked me, 'Where have you been? I haven't seen you in a very long time — did you go to another drycleaner?'\"",
|
1111 |
+
"decoded_text": "The travel takes a toll; Dresser has two young children and he wants to watch them grow up. And it isn't just his kids who notice he's gone, he says: \"After I came back [shooting] in North Carolina... my dry cleaner asked me, 'Where have you been? I haven't seen you in a very long time — did you go to another drycleaner?'\"",
|
1112 |
+
"diff": [
|
1113 |
+
"delete text[197:198] --> decoded_text[197:197] ' ' --> ''"
|
1114 |
+
],
|
1115 |
+
"n_oov_chars": 0,
|
1116 |
+
"oov_ratio": 0.0,
|
1117 |
+
"oov_charset": "[]"
|
1118 |
+
},
|
1119 |
+
{
|
1120 |
+
"text": "Artisans still had to paint the gun to look antique, but the 3-D printer lets the prop master duplicate the gun easily. \"We made two of them,\" Glenn says. \"Because with an action prop, if it breaks ... you lose a day of shooting.\" The gun isn't on screen for more than a few seconds but each one cost about $20,000.",
|
1121 |
+
"decoded_text": "Artisans still had to paint the gun to look antique, but the 3-D printer lets the prop master duplicate the gun easily. \"We made two of them,\" Glenn says. \"Because with an action prop, if it breaks... you lose a day of shooting.\" The gun isn't on screen for more than a few seconds but each one cost about $20,000.",
|
1122 |
+
"diff": [
|
1123 |
+
"delete text[197:198] --> decoded_text[197:197] ' ' --> ''"
|
1124 |
+
],
|
1125 |
+
"n_oov_chars": 0,
|
1126 |
+
"oov_ratio": 0.0,
|
1127 |
+
"oov_charset": "[]"
|
1128 |
+
},
|
1129 |
+
{
|
1130 |
+
"text": "My mother was very wary at first and now she's come around 180 degrees. She's like one of my biggest fans, now. Like, she'll come over to my house and she'll be like, \"OK, listen: I need two t-shirts from the comedy show and give me three DVDs. The neighbors are asking for them.\" ...",
|
1131 |
+
"decoded_text": "My mother was very wary at first and now she's come around 180 degrees. She's like one of my biggest fans, now. Like, she'll come over to my house and she'll be like, \"OK, listen: I need two t-shirts from the comedy show and give me three DVDs. The neighbors are asking for them.\"...",
|
1132 |
+
"diff": [
|
1133 |
+
"replace text[280:284] --> decoded_text[280:283] ' ...' --> '...'"
|
1134 |
+
],
|
1135 |
+
"n_oov_chars": 0,
|
1136 |
+
"oov_ratio": 0.0,
|
1137 |
+
"oov_charset": "[]"
|
1138 |
+
},
|
1139 |
+
{
|
1140 |
+
"text": "My thoughts go to two places: One is that when cops are attacked, they close ranks. I'm not talking about the blue wall of silence, but I think what happens is \"us versus them.\" I'm talking about incidents which the cops — like [in] Ferguson where an unarmed man was shot, when they get under attack with the media, they just close ranks. It's like buffalo when they see lions out there. ...",
|
1141 |
+
"decoded_text": "My thoughts go to two places: One is that when cops are attacked, they close ranks. I'm not talking about the blue wall of silence, but I think what happens is \"us versus them.\" I'm talking about incidents which the cops — like [in] Ferguson where an unarmed man was shot, when they get under attack with the media, they just close ranks. It's like buffalo when they see lions out there....",
|
1142 |
+
"diff": [
|
1143 |
+
"replace text[387:391] --> decoded_text[387:390] ' ...' --> '...'"
|
1144 |
+
],
|
1145 |
+
"n_oov_chars": 0,
|
1146 |
+
"oov_ratio": 0.0,
|
1147 |
+
"oov_charset": "[]"
|
1148 |
+
},
|
1149 |
+
{
|
1150 |
+
"text": "\"We ... want to capture the potential of unmanned aircraft and we have been working to develop the framework for the safe integration of this technology into our airspace,\" Department of Transportation Secretary Anthony Foxx said during a teleconference with journalists about the new proposed rules.",
|
1151 |
+
"decoded_text": "\"We... want to capture the potential of unmanned aircraft and we have been working to develop the framework for the safe integration of this technology into our airspace,\" Department of Transportation Secretary Anthony Foxx said during a teleconference with journalists about the new proposed rules.",
|
1152 |
+
"diff": [
|
1153 |
+
"delete text[3:4] --> decoded_text[3:3] ' ' --> ''"
|
1154 |
+
],
|
1155 |
+
"n_oov_chars": 0,
|
1156 |
+
"oov_ratio": 0.0,
|
1157 |
+
"oov_charset": "[]"
|
1158 |
+
},
|
1159 |
+
{
|
1160 |
+
"text": "The statement says that the proposed rules ensure \"that the Federal Government's use of UAS takes into account ... important concerns and in service of them, promotes better accountability and transparent use of this technology.\"",
|
1161 |
+
"decoded_text": "The statement says that the proposed rules ensure \"that the Federal Government's use of UAS takes into account... important concerns and in service of them, promotes better accountability and transparent use of this technology.\"",
|
1162 |
+
"diff": [
|
1163 |
+
"delete text[110:111] --> decoded_text[110:110] ' ' --> ''"
|
1164 |
+
],
|
1165 |
+
"n_oov_chars": 0,
|
1166 |
+
"oov_ratio": 0.0,
|
1167 |
+
"oov_charset": "[]"
|
1168 |
+
},
|
1169 |
+
{
|
1170 |
+
"text": "Context and Background Homeless (H) children may periodically live out of their parent’s car or sleep on a friend’s couch (sofa) – there are approximately 1.5 million homeless children in the United States. A recent study of homeless street youth showed that some had attended as many as 19 schools . Highly mobile (HM) or transient children attend two or more schools each school year and have been shown to be at high risk for dropping out (leaving early). There is no way to measure the size of this population.",
|
1171 |
+
"decoded_text": "Context and Background Homeless (H) children may periodically live out of their parent’s car or sleep on a friend’s couch (sofa) – there are approximately 1.5 million homeless children in the United States. A recent study of homeless street youth showed that some had attended as many as 19 schools. Highly mobile (HM) or transient children attend two or more schools each school year and have been shown to be at high risk for dropping out (leaving early). There is no way to measure the size of this population.",
|
1172 |
+
"diff": [
|
1173 |
+
"delete text[298:299] --> decoded_text[298:298] ' ' --> ''"
|
1174 |
+
],
|
1175 |
+
"n_oov_chars": 0,
|
1176 |
+
"oov_ratio": 0.0,
|
1177 |
+
"oov_charset": "[]"
|
1178 |
+
},
|
1179 |
+
{
|
1180 |
+
"text": "Homeless (H) children may periodically live out of their parent’s car or sleep on a friend’s couch (sofa) – there are approximately 1.5 million homeless children in the United States. A recent study of homeless street youth showed that some had attended as many as 19 schools .",
|
1181 |
+
"decoded_text": "Homeless (H) children may periodically live out of their parent’s car or sleep on a friend’s couch (sofa) – there are approximately 1.5 million homeless children in the United States. A recent study of homeless street youth showed that some had attended as many as 19 schools.",
|
1182 |
+
"diff": [
|
1183 |
+
"delete text[275:276] --> decoded_text[275:275] ' ' --> ''"
|
1184 |
+
],
|
1185 |
+
"n_oov_chars": 0,
|
1186 |
+
"oov_ratio": 0.0,
|
1187 |
+
"oov_charset": "[]"
|
1188 |
+
},
|
1189 |
+
{
|
1190 |
+
"text": "Seventeen educators – 9 administrators Representing: 1 homeless shelter , 1 charter high school (ages 14-18) 2 middle schools (ages 11-13), 1 multi level school (grades 4 - 19) 4 elementary schools (ages 4- 10) Participants:",
|
1191 |
+
"decoded_text": "Seventeen educators – 9 administrators Representing: 1 homeless shelter, 1 charter high school (ages 14-18) 2 middle schools (ages 11-13), 1 multi level school (grades 4 - 19) 4 elementary schools (ages 4- 10) Participants:",
|
1192 |
+
"diff": [
|
1193 |
+
"delete text[71:72] --> decoded_text[71:71] ' ' --> ''"
|
1194 |
+
],
|
1195 |
+
"n_oov_chars": 0,
|
1196 |
+
"oov_ratio": 0.0,
|
1197 |
+
"oov_charset": "[]"
|
1198 |
+
},
|
1199 |
+
{
|
1200 |
+
"text": "Schools that were in: rural (1) , small towns (3), suburban areas (3), urban (3)",
|
1201 |
+
"decoded_text": "Schools that were in: rural (1), small towns (3), suburban areas (3), urban (3)",
|
1202 |
+
"diff": [
|
1203 |
+
"delete text[31:32] --> decoded_text[31:31] ' ' --> ''"
|
1204 |
+
],
|
1205 |
+
"n_oov_chars": 0,
|
1206 |
+
"oov_ratio": 0.0,
|
1207 |
+
"oov_charset": "[]"
|
1208 |
+
},
|
1209 |
+
{
|
1210 |
+
"text": "The purpose of this paper is to make a new genre of action research accessible to readers ... participatory action research, ... (www.mmu.ac.uk/carn) ...",
|
1211 |
+
"decoded_text": "The purpose of this paper is to make a new genre of action research accessible to readers... participatory action research,... (www.mmu.ac.uk/carn)...",
|
1212 |
+
"diff": [
|
1213 |
+
"delete text[89:90] --> decoded_text[89:89] ' ' --> ''",
|
1214 |
+
"delete text[124:125] --> decoded_text[123:123] ' ' --> ''",
|
1215 |
+
"delete text[149:150] --> decoded_text[147:147] ' ' --> ''"
|
1216 |
+
],
|
1217 |
+
"n_oov_chars": 0,
|
1218 |
+
"oov_ratio": 0.0,
|
1219 |
+
"oov_charset": "[]"
|
1220 |
+
},
|
1221 |
+
{
|
1222 |
+
"text": "... Trust in Action Research’ on ... for Collaborative Action Research Networks (CARN) ... collaborative and participatory approaches to research.",
|
1223 |
+
"decoded_text": "... Trust in Action Research’ on... for Collaborative Action Research Networks (CARN)... collaborative and participatory approaches to research.",
|
1224 |
+
"diff": [
|
1225 |
+
"delete text[32:33] --> decoded_text[32:32] ' ' --> ''",
|
1226 |
+
"delete text[86:87] --> decoded_text[85:85] ' ' --> ''"
|
1227 |
+
],
|
1228 |
+
"n_oov_chars": 0,
|
1229 |
+
"oov_ratio": 0.0,
|
1230 |
+
"oov_charset": "[]"
|
1231 |
+
},
|
1232 |
+
{
|
1233 |
+
"text": "The Contribution of Action Research to Development in ...",
|
1234 |
+
"decoded_text": "The Contribution of Action Research to Development in...",
|
1235 |
+
"diff": [
|
1236 |
+
"delete text[53:54] --> decoded_text[53:53] ' ' --> ''"
|
1237 |
+
],
|
1238 |
+
"n_oov_chars": 0,
|
1239 |
+
"oov_ratio": 0.0,
|
1240 |
+
"oov_charset": "[]"
|
1241 |
+
},
|
1242 |
+
{
|
1243 |
+
"text": "Using Participatory Action Research in a Local Authority ...",
|
1244 |
+
"decoded_text": "Using Participatory Action Research in a Local Authority...",
|
1245 |
+
"diff": [
|
1246 |
+
"delete text[56:57] --> decoded_text[56:56] ' ' --> ''"
|
1247 |
+
],
|
1248 |
+
"n_oov_chars": 0,
|
1249 |
+
"oov_ratio": 0.0,
|
1250 |
+
"oov_charset": "[]"
|
1251 |
+
},
|
1252 |
+
{
|
1253 |
+
"text": "Using Participatory Action Research in a Local ... , I. ‘Participatory Action Research: ... http://www.uea.ac.uk/care/carn/conf97/PAPERS ...",
|
1254 |
+
"decoded_text": "Using Participatory Action Research in a Local..., I. ‘Participatory Action Research:... http://www.uea.ac.uk/care/carn/conf97/PAPERS...",
|
1255 |
+
"diff": [
|
1256 |
+
"delete text[46:47] --> decoded_text[46:46] ' ' --> ''",
|
1257 |
+
"delete text[50:51] --> decoded_text[49:49] ' ' --> ''",
|
1258 |
+
"delete text[87:88] --> decoded_text[85:85] ' ' --> ''",
|
1259 |
+
"delete text[136:137] --> decoded_text[133:133] ' ' --> ''"
|
1260 |
+
],
|
1261 |
+
"n_oov_chars": 0,
|
1262 |
+
"oov_ratio": 0.0,
|
1263 |
+
"oov_charset": "[]"
|
1264 |
+
},
|
1265 |
+
{
|
1266 |
+
"text": "Now, can we get all customers to go to \"smart alarms\" - nope !",
|
1267 |
+
"decoded_text": "Now, can we get all customers to go to \"smart alarms\" - nope!",
|
1268 |
+
"diff": [
|
1269 |
+
"delete text[60:61] --> decoded_text[60:60] ' ' --> ''"
|
1270 |
+
],
|
1271 |
+
"n_oov_chars": 0,
|
1272 |
+
"oov_ratio": 0.0,
|
1273 |
+
"oov_charset": "[]"
|
1274 |
+
},
|
1275 |
+
{
|
1276 |
+
"text": "You didn't (suggest \"faster\"). However, \"reading between the lines\", I thought your tech and associated use cases created an opening for faster roundtrips, which can have a huge impact on learning. So I guess that means business case. (Sorry previous note done from a phone included an \"?\" which kind of changed the meaning. . . .)",
|
1277 |
+
"decoded_text": "You didn't (suggest \"faster\"). However, \"reading between the lines\", I thought your tech and associated use cases created an opening for faster roundtrips, which can have a huge impact on learning. So I guess that means business case. (Sorry previous note done from a phone included an \"?\" which kind of changed the meaning....)",
|
1278 |
+
"diff": [
|
1279 |
+
"replace text[324:329] --> decoded_text[324:326] ' . . ' --> '..'"
|
1280 |
+
],
|
1281 |
+
"n_oov_chars": 0,
|
1282 |
+
"oov_ratio": 0.0,
|
1283 |
+
"oov_charset": "[]"
|
1284 |
+
},
|
1285 |
+
{
|
1286 |
+
"text": "Instead of having eight teams for eight products, we have one team for eight products.. . . Encapsulation, Abstraction, Inheritance, Polymorphism almost allow you to make a new product any day you like. The big chore is the terminology which has to change when you go from one industry/app to another - nothing to do with programming.",
|
1287 |
+
"decoded_text": "Instead of having eight teams for eight products, we have one team for eight products.... Encapsulation, Abstraction, Inheritance, Polymorphism almost allow you to make a new product any day you like. The big chore is the terminology which has to change when you go from one industry/app to another - nothing to do with programming.",
|
1288 |
+
"diff": [
|
1289 |
+
"replace text[87:90] --> decoded_text[87:88] ' . ' --> '.'"
|
1290 |
+
],
|
1291 |
+
"n_oov_chars": 0,
|
1292 |
+
"oov_ratio": 0.0,
|
1293 |
+
"oov_charset": "[]"
|
1294 |
+
},
|
1295 |
+
{
|
1296 |
+
"text": "But I definitely was surprised to see the game so close, but in division games, they historically are. Regardless of records. The crux of it is do you believe pete can bring this team back, or do you think he’s done. Well fought and I was impressed in the loss. But it all boils down to coaching and decision making for me , whether it’s betting on a unproven kicker/ punter AGAIN. And management on that last drive AGAIN.",
|
1297 |
+
"decoded_text": "But I definitely was surprised to see the game so close, but in division games, they historically are. Regardless of records. The crux of it is do you believe pete can bring this team back, or do you think he’s done. Well fought and I was impressed in the loss. But it all boils down to coaching and decision making for me, whether it’s betting on a unproven kicker/ punter AGAIN. And management on that last drive AGAIN.",
|
1298 |
+
"diff": [
|
1299 |
+
"delete text[322:323] --> decoded_text[322:322] ' ' --> ''"
|
1300 |
+
],
|
1301 |
+
"n_oov_chars": 0,
|
1302 |
+
"oov_ratio": 0.0,
|
1303 |
+
"oov_charset": "[]"
|
1304 |
+
},
|
1305 |
+
{
|
1306 |
+
"text": "a spider 'retreat' into his hole when he is trying to coax the flies?",
|
1307 |
+
"decoded_text": "a spider'retreat' into his hole when he is trying to coax the flies?",
|
1308 |
+
"diff": [
|
1309 |
+
"delete text[8:9] --> decoded_text[8:8] ' ' --> ''"
|
1310 |
+
],
|
1311 |
+
"n_oov_chars": 0,
|
1312 |
+
"oov_ratio": 0.0,
|
1313 |
+
"oov_charset": "[]"
|
1314 |
+
},
|
1315 |
+
{
|
1316 |
+
"text": "Does a hawk 'retreat' into the sky when he is looking out for",
|
1317 |
+
"decoded_text": "Does a hawk'retreat' into the sky when he is looking out for",
|
1318 |
+
"diff": [
|
1319 |
+
"delete text[11:12] --> decoded_text[11:11] ' ' --> ''"
|
1320 |
+
],
|
1321 |
+
"n_oov_chars": 0,
|
1322 |
+
"oov_ratio": 0.0,
|
1323 |
+
"oov_charset": "[]"
|
1324 |
+
}
|
1325 |
+
]
|
stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.es.diff.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.fa.diff.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.fr.diff.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.ja.diff.json
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "好きなことで生きていく人生って憧れますよね。自分のやりたいことだけやって生きていけたらどんなに幸せなんだろうって。 で、ふと思ったんですよ。『やりたいことやって成功してる人って\"やりたいことしかやって ...",
|
4 |
+
"decoded_text": "好きなことで生きていく人生って憧れますよね。自分のやりたいことだけやって生きていけたらどんなに幸せなんだろうって。 で、ふと思ったんですよ。『やりたいことやって成功してる人って\"やりたいことしかやって...",
|
5 |
+
"diff": [
|
6 |
+
"delete text[100:101] --> decoded_text[100:100] ' ' --> ''"
|
7 |
+
],
|
8 |
+
"n_oov_chars": 0,
|
9 |
+
"oov_ratio": 0.0,
|
10 |
+
"oov_charset": "[]"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"text": "生きていると色んな事がある。 その中でも人生を左右する大きな出来事がきっと生きているうちに何度かあると思う。 そんな時、自分はどう生きるか。 全てに時がある。 そして祈りの中で導かれる時がある。 その実感を得られる時もあれば振り返った時にそう感じる時もあるだろう。 ...",
|
14 |
+
"decoded_text": "生きていると色んな事がある。 その中でも人生を左右する大きな出来事がきっと生きているうちに何度かあると思う。 そんな時、自分はどう生きるか。 全てに時がある。 そして祈りの中で導かれる時がある。 その実感を得られる時もあれば振り返った時にそう感じる時もあるだろう。...",
|
15 |
+
"diff": [
|
16 |
+
"delete text[132:133] --> decoded_text[132:132] ' ' --> ''"
|
17 |
+
],
|
18 |
+
"n_oov_chars": 0,
|
19 |
+
"oov_ratio": 0.0,
|
20 |
+
"oov_charset": "[]"
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"text": "久しぶりに投稿となりました。 ここ最近はお仕事もそれなりに忙しく、ドタバタ。 でも新しい業務、新しいメンバーと共に仕事をすることで脳が活性化されているのが分かります。 先月から始めた弁当生活もたまに休んでいますが、継続中です。 今日は妻が弁当を作ってくれました。 ...",
|
24 |
+
"decoded_text": "久しぶりに投稿となりました。 ここ最近はお仕事もそれなりに忙しく、ドタバタ。 でも新しい業務、新しいメンバーと共に仕事をすることで脳が活性化されているのが分かります。 先月から始めた弁当生活もたまに休んでいますが、継続中です。 今日は妻が弁当を作ってくれました。...",
|
25 |
+
"diff": [
|
26 |
+
"delete text[131:132] --> decoded_text[131:131] ' ' --> ''"
|
27 |
+
],
|
28 |
+
"n_oov_chars": 0,
|
29 |
+
"oov_ratio": 0.0,
|
30 |
+
"oov_charset": "[]"
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"text": "彼氏の元カノがまだ彼氏に未練があるかもしれません…。 私は高校1年生で、 ...",
|
34 |
+
"decoded_text": "彼氏の元カノがまだ彼氏に未練があるかもしれません…。 私は高校1年生で、...",
|
35 |
+
"diff": [
|
36 |
+
"delete text[36:37] --> decoded_text[36:36] ' ' --> ''"
|
37 |
+
],
|
38 |
+
"n_oov_chars": 0,
|
39 |
+
"oov_ratio": 0.0,
|
40 |
+
"oov_charset": "[]"
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"text": "どうもこんにちは。今まで当ブログを見てくださった方ならわかると思うのですが、今日からちょっと雰囲気が変わったことに気づきました?そうです。広告が貼られるようになったのです。この広告はGoogle Ad ...",
|
44 |
+
"decoded_text": "どうもこんにちは。今まで当ブログを見てくださった方ならわかると思うのですが、今日からちょっと雰囲気が変わったことに気づきました?そうです。広告が貼られるようになったのです。この広告はGoogle Ad...",
|
45 |
+
"diff": [
|
46 |
+
"delete text[100:101] --> decoded_text[100:100] ' ' --> ''"
|
47 |
+
],
|
48 |
+
"n_oov_chars": 0,
|
49 |
+
"oov_ratio": 0.0,
|
50 |
+
"oov_charset": "[]"
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"text": "初めて生命保険に加入する人が保険選びに失敗しないためのポイントについて解説してい ...",
|
54 |
+
"decoded_text": "初めて生命保険に加入する人が保険選びに失敗しないためのポイントについて解説してい...",
|
55 |
+
"diff": [
|
56 |
+
"delete text[40:41] --> decoded_text[40:40] ' ' --> ''"
|
57 |
+
],
|
58 |
+
"n_oov_chars": 0,
|
59 |
+
"oov_ratio": 0.0,
|
60 |
+
"oov_charset": "[]"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"text": "生命保険を選ぶ際に保険会社はどのように選べばよいのでしょうか?選び方のポイントな ...",
|
64 |
+
"decoded_text": "生命保険を選ぶ際に保険会社はどのように選べばよいのでしょうか?選び方のポイントな...",
|
65 |
+
"diff": [
|
66 |
+
"delete text[40:41] --> decoded_text[40:40] ' ' --> ''"
|
67 |
+
],
|
68 |
+
"n_oov_chars": 0,
|
69 |
+
"oov_ratio": 0.0,
|
70 |
+
"oov_charset": "[]"
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"text": "よく言われることですが、『コンポは105以上にしたほうが無難』という��があります。 これは果たしてどういうことなのか、説明していきます。 ...",
|
74 |
+
"decoded_text": "よく言われることですが、『コンポは105以上にしたほうが無難』という説があります。 これは果たしてどういうことなのか、説明していきます。...",
|
75 |
+
"diff": [
|
76 |
+
"delete text[68:69] --> decoded_text[68:68] ' ' --> ''"
|
77 |
+
],
|
78 |
+
"n_oov_chars": 0,
|
79 |
+
"oov_ratio": 0.0,
|
80 |
+
"oov_charset": "[]"
|
81 |
+
}
|
82 |
+
]
|
stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.ko.diff.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
stats/compression_rate/tencent.Hunyuan-0.5B-Instruct @ cc100.zh-Hans.diff.json
ADDED
@@ -0,0 +1,373 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "佩奇大学健康科学学院在Kaposvár(高波什瓦尔), Pécs(佩奇), Szombathely(松博特海伊) ,Zalaegerszeg (扎洛埃格塞格) 有四所地区培训中心。这些培训中心都与当地的医院和社会机构保持着良好的关系。我院在健康科学领域是匈牙利提供专业数量最多,教师数量最多和校园数量最多的教育机构。",
|
4 |
+
"decoded_text": "佩奇大学健康科学学院在Kaposvár(高波什瓦尔), Pécs(佩奇), Szombathely(松博特海伊),Zalaegerszeg (扎洛埃格塞格) 有四所地区培训中心。这些培训中心都与当地的医院和社会机构保持着良好的关系。我院在健康科学领域是匈牙利提供专业数量最多,教师数量最多和校园数量最多的教育机构。",
|
5 |
+
"diff": [
|
6 |
+
"delete text[56:57] --> decoded_text[56:56] ' ' --> ''"
|
7 |
+
],
|
8 |
+
"n_oov_chars": 0,
|
9 |
+
"oov_ratio": 0.0,
|
10 |
+
"oov_charset": "[]"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"text": "支持和 虚拟机,以及 7 . / 和的屏幕截图,在三个不同的分区中有三个文档.最大的问题之一是硬件支持可能很棘手。 我参加过一些冷酷黑暗的联赛。",
|
14 |
+
"decoded_text": "支持和 虚拟机,以及 7. / 和的屏幕截图,在三个不同的分区中有三个文档.最大的问题之一是硬件支持可能很棘手。 我参加过一些冷酷黑暗的联赛。",
|
15 |
+
"diff": [
|
16 |
+
"delete text[12:13] --> decoded_text[12:12] ' ' --> ''"
|
17 |
+
],
|
18 |
+
"n_oov_chars": 0,
|
19 |
+
"oov_ratio": 0.0,
|
20 |
+
"oov_charset": "[]"
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"text": "他指责执法部门和 其他政府机构调查他与俄罗斯的关系,作为参与党派恐怖袭击的深刻的国家阴谋,他经常对媒体愤怒作为敌人。尽管瑞士化学公司与法国建筑材料公司-达成协议以结束长期存在的法律纠纷,但 .的股价在指数水平上走势平稳,股价上涨8.7%至的顶部。",
|
24 |
+
"decoded_text": "他指责执法部门和 其他政府机构调查他与俄罗斯的关系,作为参与党派恐怖袭击的深刻的国家阴谋,他经常对媒体愤怒作为敌人。尽管瑞士化学公司与法国建筑材料公司-达成协议以结束长期存在的法律纠纷,但.的股价在指数水平上走势平稳,股价上涨8.7%至的顶部。",
|
25 |
+
"diff": [
|
26 |
+
"delete text[94:95] --> decoded_text[94:94] ' ' --> ''"
|
27 |
+
],
|
28 |
+
"n_oov_chars": 0,
|
29 |
+
"oov_ratio": 0.0,
|
30 |
+
"oov_charset": "[]"
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"text": "简介:上期,本报《快乐老年》版推出了暑假期间老人“上岗”带娃的报道,不同老人有不同的带娃方式。有的老人深感带娃是一种甜蜜的负担。确实,现在孩子的教育问题越来越受到重视,很多老人与时俱进,吸收了好的育儿理念。 ...",
|
34 |
+
"decoded_text": "简介:上期,本报《快乐老年》版推出了暑假期间老人“上岗”带娃的报道,不同老人有不同的带娃方式。有的老人深感带娃是一种甜蜜的负担。确实,现在孩子的教育问题越来越受到重视,很多老人与时俱进,吸收了好的育儿理念。...",
|
35 |
+
"diff": [
|
36 |
+
"delete text[103:104] --> decoded_text[103:103] ' ' --> ''"
|
37 |
+
],
|
38 |
+
"n_oov_chars": 0,
|
39 |
+
"oov_ratio": 0.0,
|
40 |
+
"oov_charset": "[]"
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"text": "导演:安德鲁・麦卡锡,玛吉・基莉 ,迈克・卡希尔,马克・托德莱",
|
44 |
+
"decoded_text": "导演:安德鲁・麦卡锡,玛吉・基莉,迈克・卡希尔,马克・托德莱",
|
45 |
+
"diff": [
|
46 |
+
"delete text[16:17] --> decoded_text[16:16] ' ' --> ''"
|
47 |
+
],
|
48 |
+
"n_oov_chars": 0,
|
49 |
+
"oov_ratio": 0.0,
|
50 |
+
"oov_charset": "[]"
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"text": "龙茫接过来,他知道是什么,可是现在看不到任何东西的他拿着这些东西真的有用吗?全球华人的自由讨论天地 4 ~5 ?0 G' R5 f2 b) G. v",
|
54 |
+
"decoded_text": "龙茫接过来,他知道是什么,可是现在看不到任何东西的他拿着这些东西真的有用吗?全球华人的自由讨论天地 4 ~5?0 G' R5 f2 b) G. v",
|
55 |
+
"diff": [
|
56 |
+
"delete text[54:55] --> decoded_text[54:54] ' ' --> ''"
|
57 |
+
],
|
58 |
+
"n_oov_chars": 0,
|
59 |
+
"oov_ratio": 0.0,
|
60 |
+
"oov_charset": "[]"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"text": "如果在没有失明之前,他很可能能够看到写在手心上的字,但现在是完全看不到了,他只能感到手指在他的手心上不停地划来划去,却不知道写了什么上去。全球华人的自由讨论天地 ! _& N. f/ x6 @/ x2 v; \\\\6 I' Y6 q3 _",
|
64 |
+
"decoded_text": "如果在没有失明之前,他很可能能够看到写在手心上的字,但现在是完全看不到了,他只能感到手指在他的手心上不停地划来划去,却不知道写了什么上去。全球华人的自由讨论天地! _& N. f/ x6 @/ x2 v; \\\\6 I' Y6 q3 _",
|
65 |
+
"diff": [
|
66 |
+
"delete text[80:81] --> decoded_text[80:80] ' ' --> ''"
|
67 |
+
],
|
68 |
+
"n_oov_chars": 0,
|
69 |
+
"oov_ratio": 0.0,
|
70 |
+
"oov_charset": "[]"
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"text": "是啊,对于一个刚刚恢复光明的人来说是多么想出去走走,看看这个世界啊!华人论坛0 [ b- q4 B b' [5 W7 ?$ K! M",
|
74 |
+
"decoded_text": "是啊,对于一个刚刚恢复光明的人来说是多么想出去走走,看看这个世界啊!华人论坛0 [ b- q4 B b' [5 W7?$ K! M",
|
75 |
+
"diff": [
|
76 |
+
"delete text[58:59] --> decoded_text[58:58] ' ' --> ''"
|
77 |
+
],
|
78 |
+
"n_oov_chars": 0,
|
79 |
+
"oov_ratio": 0.0,
|
80 |
+
"oov_charset": "[]"
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"text": ". ?. h& r( A8 u1 ]# L 如果是邻居的话也用不着给我发这么一条短信吧?我记得妈说过我7岁时就搬走了,那么说我还有跟他联系吗?可是为什么那个女人会叫我郭水?",
|
84 |
+
"decoded_text": ".?. h& r( A8 u1 ]# L 如果是邻居的话也用不着给我发这么一条短信吧?我记得妈说过我7岁时就搬走了,那么说我还有跟他联系吗?可是为什么那个女人会叫我郭水?",
|
85 |
+
"diff": [
|
86 |
+
"delete text[1:2] --> decoded_text[1:1] ' ' --> ''"
|
87 |
+
],
|
88 |
+
"n_oov_chars": 0,
|
89 |
+
"oov_ratio": 0.0,
|
90 |
+
"oov_charset": "[]"
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"text": "8 d7 G! M B+ X9 l/ k\" {6 ?- cbb.a4.79ae.static.theplanet.com “算了,都过去了,你就不要再问了。”蒋成说话的声音有些奇怪,好像很恐惧一样,龙茫看到他的表情似乎很紧张。",
|
94 |
+
"decoded_text": "8 d7 G! M B+ X9 l/ k\" {6?- cbb.a4.79ae.static.theplanet.com “算了,都过去了,你就不要再问了。”蒋成说话的声音有些奇怪,好像很恐惧一样,龙茫看到他的表情似乎很紧张。",
|
95 |
+
"diff": [
|
96 |
+
"delete text[24:25] --> decoded_text[24:24] ' ' --> ''"
|
97 |
+
],
|
98 |
+
"n_oov_chars": 0,
|
99 |
+
"oov_ratio": 0.0,
|
100 |
+
"oov_charset": "[]"
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"text": "“好了老婆!你看离上学的时间都还有一个多月,你就让他玩吧,等到开学后我一定会严厉管教他的!所以啊,你就取消那个补习班吧。”郭水实在也不想小茫的童年在补习班上度过,这么枯燥的人生也不是有多少人能够承受的。1 ^ `) ?4 ]% f! _",
|
104 |
+
"decoded_text": "“好了老婆!你看离上学的时间都还有一个多月,你就让他玩吧,等到开学后我一定会严厉管教他的!所以啊,你就取消那个补习班吧。”郭水实在也不想小茫的童年在补习班上度过,这么枯燥的人生也不是有多少人能够承受的。1 ^ `)?4 ]% f! _",
|
105 |
+
"diff": [
|
106 |
+
"delete text[107:108] --> decoded_text[107:107] ' ' --> ''"
|
107 |
+
],
|
108 |
+
"n_oov_chars": 0,
|
109 |
+
"oov_ratio": 0.0,
|
110 |
+
"oov_charset": "[]"
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"text": "看到这些旧家具了吗, 跟前面设计的时候渲染的颜色是不是一毛一样? 基本都是黑色的... ...我手上那个白色的板子就是渲染的时候发出神秘光线的那个宜家的隔板",
|
114 |
+
"decoded_text": "看到这些旧家具了吗, 跟前面设计的时候渲染的颜色是不是一毛一样? 基本都是黑色的......我手上那个白色的板子就是渲染的时候发出神秘光线的那个宜家的隔板",
|
115 |
+
"diff": [
|
116 |
+
"delete text[43:44] --> decoded_text[43:43] ' ' --> ''"
|
117 |
+
],
|
118 |
+
"n_oov_chars": 0,
|
119 |
+
"oov_ratio": 0.0,
|
120 |
+
"oov_charset": "[]"
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"text": "bash 支持一个特殊的变量 !$,永远保存着前一条命令的最后一个参数,例如:",
|
124 |
+
"decoded_text": "bash 支持一个特殊的变量!$,永远保存着前一条命令的最后一个参数,例如:",
|
125 |
+
"diff": [
|
126 |
+
"delete text[14:15] --> decoded_text[14:14] ' ' --> ''"
|
127 |
+
],
|
128 |
+
"n_oov_chars": 0,
|
129 |
+
"oov_ratio": 0.0,
|
130 |
+
"oov_charset": "[]"
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"text": "如果你想一次删除多个项目,还可以使用 !* ( 多谢 qt 补充 : )。",
|
134 |
+
"decoded_text": "如果你想一次删除多个项目,还可以使用!* ( 多谢 qt 补充 : )。",
|
135 |
+
"diff": [
|
136 |
+
"delete text[18:19] --> decoded_text[18:18] ' ' --> ''"
|
137 |
+
],
|
138 |
+
"n_oov_chars": 0,
|
139 |
+
"oov_ratio": 0.0,
|
140 |
+
"oov_charset": "[]"
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"text": "这家位于佛罗里达(Florida )的酒店分布在2栋建筑内,提供免费WiFi和2个室外游泳池,距离劳德代尔堡海滩公园(Fort Lauderdale Beach Park)3.6公里,距离Classic Gateway Theatre剧院3.9公里。 Tara Hotel酒店的所有客房均配有平板有线电视。每间套房和一室公寓均设有小冰箱和连接浴室。 Hotal... I believe the young mans name was chun. He was great . Helpful",
|
144 |
+
"decoded_text": "这家位于佛罗里达(Florida )的酒店分布在2栋建筑内,提供免费WiFi和2个室外游泳池,距离劳德代尔堡海滩公园(Fort Lauderdale Beach Park)3.6公里,距离Classic Gateway Theatre剧院3.9公里。 Tara Hotel酒店的所有客房均配有平板有线电视。每间套房和一室公寓均设有小冰箱和连接浴室。 Hotal... I believe the young mans name was chun. He was great. Helpful",
|
145 |
+
"diff": [
|
146 |
+
"delete text[236:237] --> decoded_text[236:236] ' ' --> ''"
|
147 |
+
],
|
148 |
+
"n_oov_chars": 0,
|
149 |
+
"oov_ratio": 0.0,
|
150 |
+
"oov_charset": "[]"
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"text": "陕国投作为陕西省国资委管理的一级企业、是国内首批上市的非银行金融机构, ,净资产79.31亿元,2017年管理的信托资产总额达到4532.22亿元,具备优秀的服务地方经济建设能力和资产管理经验。[详情]",
|
154 |
+
"decoded_text": "陕国投作为陕西省国资委管理的一级企业、是国内首批上市的非银行金融机构,,净资产79.31亿元,2017年管理的信托资产总额达到4532.22亿元,具备优秀的服务地方经济建设能力和资产管理经验。[详情]",
|
155 |
+
"diff": [
|
156 |
+
"delete text[35:36] --> decoded_text[35:35] ' ' --> ''"
|
157 |
+
],
|
158 |
+
"n_oov_chars": 0,
|
159 |
+
"oov_ratio": 0.0,
|
160 |
+
"oov_charset": "[]"
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"text": "或是黎明,或是一个巨大的、敞开的舞台上洁白的聚光灯,一切都处于显在之中,正如只有意大利歌剧的场景才能够做到的。诸多的嘴巴和巨大的、敞开的身体被部署着,以宣告空间的纯粹碎片——dinanzi al re ! davanti a lui ! 来吧,这里,让我们走,让我们来,让我们离去,让我们留下——声音从腹部浮现,众多合唱队,一首流行歌曲——让我们走,让我们看,我大笑,我哭泣,我活着,我死去。书写和思,也是如此,张大的嘴巴,身体的作品。",
|
164 |
+
"decoded_text": "或是黎明,或是一个巨大的、敞开的舞台上洁白的聚光灯,一切都处于显在之中,正如只有意大利歌剧的场景才能够做到的。诸多的嘴巴和巨大的、敞开的身体被部署着,以宣告空间的纯粹碎片——dinanzi al re! davanti a lui! 来吧,这里,让我们走,让我们来,让我们离去,让我们留下——声音从腹部浮现,众多合唱队,一首流行歌曲——让我们走,让我们看,我大笑,我哭泣,我活着,我死去。书写和思,也是如此,张大的嘴巴,身体的作品。",
|
165 |
+
"diff": [
|
166 |
+
"delete text[100:101] --> decoded_text[100:100] ' ' --> ''",
|
167 |
+
"delete text[116:117] --> decoded_text[115:115] ' ' --> ''"
|
168 |
+
],
|
169 |
+
"n_oov_chars": 0,
|
170 |
+
"oov_ratio": 0.0,
|
171 |
+
"oov_charset": "[]"
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"text": "不用找了 ,那架飞机是被都敏俊劫走了,你和我面对面,距离很近,但心的距离好远。;跟喜欢的人在一起以后,笑点会变低,泪点也会变低,就连智商也低了。。",
|
175 |
+
"decoded_text": "不用找了,那架飞机是被都敏俊劫走了,你和我面对面,距离很近,但心的距离好远。;跟喜欢的人在一起以后,笑点会变低,泪点也会变低,就连智商也低了。。",
|
176 |
+
"diff": [
|
177 |
+
"delete text[4:5] --> decoded_text[4:4] ' ' --> ''"
|
178 |
+
],
|
179 |
+
"n_oov_chars": 0,
|
180 |
+
"oov_ratio": 0.0,
|
181 |
+
"oov_charset": "[]"
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"text": "聚合物电解质燃料电池亦称为质子交换膜(PEM ) 燃料电池, 最初是美国通用电气公司于1960年以空间应用为目的而研制的。由于初期研制的PEM电池内阻较大, 功率不高, 特别是质子交换膜(聚苯乙烯磺酸膜) 不能承耐强烈的电氧化还原作用而影响电池寿命, 因此, 美国宇航局试用后没有选中。直到1983 年加拿大国防部与国家研究委员会确认, PEM电池可以满足特殊的军事要求并有良好的商业前景, 于1984 年委托巴拉德能源公司对该电池进行开发研制, PEM电池的研究工作再度受到重视。美国、日本、意大利、俄罗斯、比利时等国家先后成立专门机构研究开发PEM电池。随着杜邦公司Nafion 膜的改进和Dow 化学公司Dow 膜的成功, 各类PEM 演示电池陆续问世, PEM 电池研究取得了重大进展。催化剂铂载量从10 mg・cm-2 降至0.4mg・cm-2 以下, 功率从0.1W・cm-2升至2~ 6W・cm-2, 电池组电极面积可达500~1200 cm2, 功率达5~10 kW , 其优势明显上升, 在很多方面已优于碱性燃料电池。",
|
185 |
+
"decoded_text": "聚合物电解质燃料电池亦称为质子交换膜(PEM ) 燃料电池, 最初是美国通用电气公司于1960年以空间应用为目的而研制的。由于初期研制的PEM电池内阻较大, 功率不高, 特别是质子交换膜(聚苯乙烯磺酸膜) 不能承耐强烈的电氧化还原作用而影响电池寿命, 因此, 美国宇航局试用后没有选中。直到1983 年加拿大国防部与国家研究委员会确认, PEM电池可以满足特殊的军事要求并有良好的商业前景, 于1984 年委托巴拉德能源公司对该电池进行开发研制, PEM电池的研究工作再度受到重视。美国、日本、意大利、俄罗斯、比利时等国家先后成立专门机构研究开发PEM电池。随着杜邦公司Nafion 膜的改进和Dow 化学公司Dow 膜的成功, 各类PEM 演示电池陆续问世, PEM 电池研究取得了重大进展。催化剂铂载量从10 mg・cm-2 降至0.4mg・cm-2 以下, 功率从0.1W・cm-2升至2~ 6W・cm-2, 电池组电极面积可达500~1200 cm2, ���率达5~10 kW, 其优势明显上升, 在很多方面已优于碱性燃料电池。",
|
186 |
+
"diff": [
|
187 |
+
"delete text[441:442] --> decoded_text[441:441] ' ' --> ''"
|
188 |
+
],
|
189 |
+
"n_oov_chars": 0,
|
190 |
+
"oov_ratio": 0.0,
|
191 |
+
"oov_charset": "[]"
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"text": "福州人流网: 福州宫外孕的危险性大吗?宫外孕的威胁是非常大的,但许多女性因为不了解宫外孕的症状,没有及时的发现,往往导致了悲剧的发生。下面由福州福兴妇产医院医生为大家介绍 ...",
|
195 |
+
"decoded_text": "福州人流网: 福州宫外孕的危险性大吗?宫外孕的威胁是非常大的,但许多女性因为不了解宫外孕的症状,没有及时的发现,往往导致了悲剧的发生。下面由福州福兴妇产医院医生为大家介绍...",
|
196 |
+
"diff": [
|
197 |
+
"delete text[85:86] --> decoded_text[85:85] ' ' --> ''"
|
198 |
+
],
|
199 |
+
"n_oov_chars": 0,
|
200 |
+
"oov_ratio": 0.0,
|
201 |
+
"oov_charset": "[]"
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"text": "“四组,你们那边到底什么情况?”野牛点射刘彬彬锁定胜局,鲁能3-1胜延边,再看看伊布拉希莫维奇那并不庆祝,可实际上却相当打脸的表情,两人心里头那叫一个郁闷啊,简直就跟被人穿裆没两样啊!,对于像圣埃蒂安这样的球队来说,没卖掉球员,是没足够的资金去引进球员的。“让我们看看,重新回到左边锋的加雷斯?贝尔的表现会不会有所起色。”阿兰建功卡希尔点射,恒大2-1胜绿城“I cannot understand…. The connection… exists only . between your two wands….”,“你们三个打算辞职吗?”“好啦,快吃饭了,别洗太久!”,郭士强赛后否认辽宁男篮欲退赛克罗地亚中场拿球转身面对进攻方向,看到加雷斯?贝尔已经在左路举手了,直接轻轻一脚直塞球,让皮球从阿尔维斯和普约尔中间穿过。。",
|
205 |
+
"decoded_text": "“四组,你们那边到底什么情况?”野牛点射刘彬彬锁定胜局,鲁能3-1胜延边,再看看伊布拉希莫维奇那并不庆祝,可实际上却相当打脸的表情,两人心里头那叫一个郁闷啊,简直就跟被人穿裆没两样啊!,对于像圣埃蒂安这样的球队来说,没卖掉球员,是没足够的资金去引进球员的。“让我们看看,重新回到左边锋的加雷斯?贝尔的表现会不会有所起色。”阿兰建功卡希尔点射,恒大2-1胜绿城“I cannot understand…. The connection… exists only. between your two wands….”,“你们三个打算辞职吗?”“好啦,快吃饭了,别洗太久!”,郭士强赛后否认辽宁男篮欲退赛克罗地亚中场拿球转身面对进攻方向,看到加雷斯?贝尔已经在左路举手了,直接轻轻一脚直塞球,让皮球从阿尔维斯和普约尔中间穿过。。",
|
206 |
+
"diff": [
|
207 |
+
"delete text[229:230] --> decoded_text[229:229] ' ' --> ''"
|
208 |
+
],
|
209 |
+
"n_oov_chars": 0,
|
210 |
+
"oov_ratio": 0.0,
|
211 |
+
"oov_charset": "[]"
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"text": "21.某中学数学兴趣小组为了解本校学生对电视节目的喜爱情况,随机调查了部分学生最喜爱哪一类节目 (被调查的学生只选一类并且没有不选择的) ,并将调查结果制成了如下的两个统计图(不完整).请你根据图中所提供的信息,完成下列问题:",
|
215 |
+
"decoded_text": "21.某中学数学兴趣小组为了解本校学生对电视节目的喜爱情况,随机调查了部分学生最喜爱哪一类节目 (被调查的学生只选一类并且没有不选择的),并将调查结果制成了如下的两个统计图(不完整).请你根据图中所提供的信息,完成下列问题:",
|
216 |
+
"diff": [
|
217 |
+
"delete text[68:69] --> decoded_text[68:68] ' ' --> ''"
|
218 |
+
],
|
219 |
+
"n_oov_chars": 0,
|
220 |
+
"oov_ratio": 0.0,
|
221 |
+
"oov_charset": "[]"
|
222 |
+
},
|
223 |
+
{
|
224 |
+
"text": "“我没听到他们说的一切。另一位747副驾驶Jonhan Harman在网站上回复道:“干得好的亚当,你应该仍然打他!”Corfield在第一次作为管家之后承认客户服务“不适合我”和“这个f *商务舱里的***几乎搞定了!! T ** t !!!!!“BA空中客车A320飞行员约翰林肯插话:”我现在已经完成了三次旅行。",
|
225 |
+
"decoded_text": "“我没听到他们说的一切。另一位747副驾驶Jonhan Harman在网站上回复道:“干得好的亚当,你应该仍然打他!”Corfield在第一次作为管家之后承认客户服务“不适合我”和“这个f *商务舱里的***几乎搞定了!! T ** t!!!!!“BA空中客车A320飞行员约翰林肯插话:”我现在已经完成了三次旅行。",
|
226 |
+
"diff": [
|
227 |
+
"delete text[118:119] --> decoded_text[118:118] ' ' --> ''"
|
228 |
+
],
|
229 |
+
"n_oov_chars": 0,
|
230 |
+
"oov_ratio": 0.0,
|
231 |
+
"oov_charset": "[]"
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"text": "这份经验总结并不适合手握大把实习经历的大牛们,但除了他们应该还有不少想求职产品经理的同学。大牛的经历只会徒增悲伤,我写的东西或许能够带来一些不一样的感���。 ...",
|
235 |
+
"decoded_text": "这份经验总结并不适合手握大把实习经历的大牛们,但除了他们应该还有不少想求职产品经理的同学。大牛的经历只会徒增悲伤,我写的东西或许能够带来一些不一样的感觉。...",
|
236 |
+
"diff": [
|
237 |
+
"delete text[77:78] --> decoded_text[77:77] ' ' --> ''"
|
238 |
+
],
|
239 |
+
"n_oov_chars": 0,
|
240 |
+
"oov_ratio": 0.0,
|
241 |
+
"oov_charset": "[]"
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"text": "根据西安网页设计的特点和制作网页的经验,一般应遵循以下原则:提纲挈领。主页的作用好比一个精美的广告,以下内容必不可少 ..",
|
245 |
+
"decoded_text": "根据西安网页设计的特点和制作网页的经验,一般应遵循以下原则:提纲挈领。主页的作用好比一个精美的广告,以下内容必不可少..",
|
246 |
+
"diff": [
|
247 |
+
"delete text[58:59] --> decoded_text[58:58] ' ' --> ''"
|
248 |
+
],
|
249 |
+
"n_oov_chars": 0,
|
250 |
+
"oov_ratio": 0.0,
|
251 |
+
"oov_charset": "[]"
|
252 |
+
},
|
253 |
+
{
|
254 |
+
"text": "西安网站优化公司认为,每个月月底改标题,对优化影响并不会太大,因为月底的时候,正是搜索引擎大更新的时候,对网站进行改变的时候,改网站的标题时,虽然收录有所下降,但网站的整体排名还是比较好的 ..",
|
255 |
+
"decoded_text": "西安网站优化公司认为,每个月月底改标题,对优化影响并不会太大,因为月底的时候,正是搜索引擎大更新的时候,对网站进行改变的时候,改网站的标题时,虽然收录有所下降,但网站的整体排名还是比较好的..",
|
256 |
+
"diff": [
|
257 |
+
"delete text[94:95] --> decoded_text[94:94] ' ' --> ''"
|
258 |
+
],
|
259 |
+
"n_oov_chars": 0,
|
260 |
+
"oov_ratio": 0.0,
|
261 |
+
"oov_charset": "[]"
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"text": "客户懂网站之后,虽然对网站建设公司影响很大,但我们能够看到十分积极的一面,西安做网站公司比如说现在的网站越来越国际化,表现形式越来越丰富,技术越来越先进,服务质量越来越好等等 ..",
|
265 |
+
"decoded_text": "客户懂网站之后,虽然对网站建设公司影响很大,但我们能够看到十分积极的一面,西安做网站公司比如说现在的网站越来越国际化,表现形式越来越丰富,技术越来越先进,服务质量越来越好等等..",
|
266 |
+
"diff": [
|
267 |
+
"delete text[87:88] --> decoded_text[87:87] ' ' --> ''"
|
268 |
+
],
|
269 |
+
"n_oov_chars": 0,
|
270 |
+
"oov_ratio": 0.0,
|
271 |
+
"oov_charset": "[]"
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"text": "网站上线前期的准备工作很重要,只有在上线前做好各种准备工作,才能避免上线后出现各种问题,那么,西安做网站公司在上线前有需要做哪些工作? ..",
|
275 |
+
"decoded_text": "网站上线前期的准备工作很重要,只有在上线前做好各种准备工作,才能避免上线后出现各种问题,那么,西安做网站公司在上线前有需要做哪些工作?..",
|
276 |
+
"diff": [
|
277 |
+
"delete text[67:68] --> decoded_text[67:67] ' ' --> ''"
|
278 |
+
],
|
279 |
+
"n_oov_chars": 0,
|
280 |
+
"oov_ratio": 0.0,
|
281 |
+
"oov_charset": "[]"
|
282 |
+
},
|
283 |
+
{
|
284 |
+
"text": "相对来说一个网站的好坏并不是在于网站的无效链接,重点还是网站的设计上和其他的细节,为什么这样说呢?西安网站建设公司认为作为一个网站来说给用户的第一眼很重要的,如果给用户第一眼感觉很差排版乱不合理 ..",
|
285 |
+
"decoded_text": "相对来说一个网站的好坏并不是在于网站的无效链接,重点还是网站的设计上和其他的细节,为什么这样说呢?西安网站建设公司认为作为一个网站来说给用户的第一眼很重要的,如果给用户第一眼感觉很差排版乱不合理..",
|
286 |
+
"diff": [
|
287 |
+
"delete text[97:98] --> decoded_text[97:97] ' ' --> ''"
|
288 |
+
],
|
289 |
+
"n_oov_chars": 0,
|
290 |
+
"oov_ratio": 0.0,
|
291 |
+
"oov_charset": "[]"
|
292 |
+
},
|
293 |
+
{
|
294 |
+
"text": "相信很多人都有一个疑问,就是西安建设网站,明明就是一个虚拟不存在的东西怎么会这么贵,少的要好几千,便宜的有几万甚至几十万!这到底是为什么呢? ..",
|
295 |
+
"decoded_text": "相信很多人都有一个疑问,就是西安建设网站,明明就是一个虚拟不存在的东西怎么会这么贵,少的要好几千,便宜的有几万甚至几十万!这到底是为什么呢?..",
|
296 |
+
"diff": [
|
297 |
+
"delete text[70:71] --> decoded_text[70:70] ' ' --> ''"
|
298 |
+
],
|
299 |
+
"n_oov_chars": 0,
|
300 |
+
"oov_ratio": 0.0,
|
301 |
+
"oov_charset": "[]"
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"text": "简介: 朱茵,女,1958年5月27日出生于上海,影视演员,戏剧家,上海戏剧家协会会员,中国民主同盟盟员,就职于上海话剧艺术中心。 曾出演多部话剧、电影以及电视剧作品,其中有话剧《日出》,电视剧《丈母娘来了》、《大男当婚》、《辣妈正传》、《老米家的婚事》 ... 查看更多>",
|
305 |
+
"decoded_text": "简介: 朱茵,女,1958年5月27日出生于上海,影视演员,戏剧���,上海戏剧家协会会员,中国民主同盟盟员,就职于上海话剧艺术中心。 曾出演多部话剧、电影以及电视剧作品,其中有话剧《日出》,电视剧《丈母娘来了》、《大男当婚》、《辣妈正传》、《老米家的婚事》... 查看更多>",
|
306 |
+
"diff": [
|
307 |
+
"delete text[127:128] --> decoded_text[127:127] ' ' --> ''"
|
308 |
+
],
|
309 |
+
"n_oov_chars": 0,
|
310 |
+
"oov_ratio": 0.0,
|
311 |
+
"oov_charset": "[]"
|
312 |
+
},
|
313 |
+
{
|
314 |
+
"text": "5. .具有较强的产品、用户心理分析的能力与看法,对市场发展方向和动态有较强的分析能力,了解网站用户的服务需求,能够根据需求与市场变化迅速做出回应;",
|
315 |
+
"decoded_text": "5..具有较强的产品、用户心理分析的能力与看法,对市场发展方向和动态有较强的分析能力,了解网站用户的服务需求,能够根据需求与市场变化迅速做出回应;",
|
316 |
+
"diff": [
|
317 |
+
"delete text[2:3] --> decoded_text[2:2] ' ' --> ''"
|
318 |
+
],
|
319 |
+
"n_oov_chars": 0,
|
320 |
+
"oov_ratio": 0.0,
|
321 |
+
"oov_charset": "[]"
|
322 |
+
},
|
323 |
+
{
|
324 |
+
"text": "简介:转载自:搜狐体育 作者:体育之星刘伟是中国男子拳击69公斤级名将,前中国拳击队队长。1987年出生的他,小时候却因为身高和臂展不占优势,一度被一些人认为并不适合从事专业拳击运动。但在湖北 ...",
|
325 |
+
"decoded_text": "简介:转载自:搜狐体育 作者:体育之星刘伟是中国男子拳击69公斤级名将,前中国拳击队队长。1987年出生的他,小时候却因为身高和臂展不占优势,一度被一些人认为并不适合从事专业拳击运动。但在湖北...",
|
326 |
+
"diff": [
|
327 |
+
"delete text[96:97] --> decoded_text[96:96] ' ' --> ''"
|
328 |
+
],
|
329 |
+
"n_oov_chars": 0,
|
330 |
+
"oov_ratio": 0.0,
|
331 |
+
"oov_charset": "[]"
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"text": "15 D. 10 2 2.若抛物线 y 2 ? 8x 上一点 P 到其焦点的距离为 9 ,则点 P 的坐标为( C ) 。",
|
335 |
+
"decoded_text": "15 D. 10 2 2.若抛物线 y 2? 8x 上一点 P 到其焦点的距离为 9 ,则点 P 的坐标为( C ) 。",
|
336 |
+
"diff": [
|
337 |
+
"delete text[21:22] --> decoded_text[21:21] ' ' --> ''"
|
338 |
+
],
|
339 |
+
"n_oov_chars": 0,
|
340 |
+
"oov_ratio": 0.0,
|
341 |
+
"oov_charset": "[]"
|
342 |
+
},
|
343 |
+
{
|
344 |
+
"text": "这些是我的一些特殊服务: 用嘴亲吻, 异装癖和恋物癖, 脸部射精, 性幻想, 性玩具, 无套口交, 女同, 恋尿癖, 按摩, 脱衣舞, 三人群交, 色情淋浴, 爱经, 69 ...",
|
345 |
+
"decoded_text": "这些是我的一些特殊服务: 用嘴亲吻, 异装癖和恋物癖, 脸部射精, 性幻想, 性玩具, 无套口交, 女同, 恋尿癖, 按摩, 脱衣舞, 三人群交, 色情淋浴, 爱经, 69...",
|
346 |
+
"diff": [
|
347 |
+
"delete text[86:87] --> decoded_text[86:86] ' ' --> ''"
|
348 |
+
],
|
349 |
+
"n_oov_chars": 0,
|
350 |
+
"oov_ratio": 0.0,
|
351 |
+
"oov_charset": "[]"
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"text": "因本人另有发展,将正在营业中的班车转让,有意者请致电:13197523784 非诚勿扰,谢谢! 本贴不回复,有 ...",
|
355 |
+
"decoded_text": "因本人另有发展,将正在营业中的班车转让,有意者请致电:13197523784 非诚勿扰,谢谢! 本贴不回复,有...",
|
356 |
+
"diff": [
|
357 |
+
"delete text[55:56] --> decoded_text[55:55] ' ' --> ''"
|
358 |
+
],
|
359 |
+
"n_oov_chars": 0,
|
360 |
+
"oov_ratio": 0.0,
|
361 |
+
"oov_charset": "[]"
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"text": "现在孩子多了,想入手一架车代步。最好1.2的,因为想省油。大家都知道的现在油价突破天际。最好近二年的车 ...",
|
365 |
+
"decoded_text": "现在孩子多了,想入手一架车代步。最好1.2的,因为想省油。大家都知道的现在油价突破天际。最好近二年的车...",
|
366 |
+
"diff": [
|
367 |
+
"delete text[51:52] --> decoded_text[51:51] ' ' --> ''"
|
368 |
+
],
|
369 |
+
"n_oov_chars": 0,
|
370 |
+
"oov_ratio": 0.0,
|
371 |
+
"oov_charset": "[]"
|
372 |
+
}
|
373 |
+
]
|
stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.ar.diff.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.de.diff.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.en.diff.json
ADDED
@@ -0,0 +1,1325 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "and yeah im a boy,and no, im not g*y, im a nice guy. i dont love his songs or anything , but he's not that bad tbh.",
|
4 |
+
"decoded_text": "and yeah im a boy,and no, im not g*y, im a nice guy. i dont love his songs or anything, but he's not that bad tbh.",
|
5 |
+
"diff": [
|
6 |
+
"delete text[86:87] --> decoded_text[86:86] ' ' --> ''"
|
7 |
+
],
|
8 |
+
"n_oov_chars": 0,
|
9 |
+
"oov_ratio": 0.0,
|
10 |
+
"oov_charset": "[]"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"text": "Justin serenaded wonderful or better than a great I like popular songs, particularly as it is talented. all those who hate Justin are g**s because they feel jealous of him because he is handsome at the same time a rising singer and a small age. I myself appreciate the wonderful artist with this beautiful and talented .",
|
14 |
+
"decoded_text": "Justin serenaded wonderful or better than a great I like popular songs, particularly as it is talented. all those who hate Justin are g**s because they feel jealous of him because he is handsome at the same time a rising singer and a small age. I myself appreciate the wonderful artist with this beautiful and talented.",
|
15 |
+
"diff": [
|
16 |
+
"delete text[318:319] --> decoded_text[318:318] ' ' --> ''"
|
17 |
+
],
|
18 |
+
"n_oov_chars": 0,
|
19 |
+
"oov_ratio": 0.0,
|
20 |
+
"oov_charset": "[]"
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"text": "Soften the landing zones with a pair of Rubber Mats , made from dyed rubber chips, heat compressed and available in dark green or brick red.",
|
24 |
+
"decoded_text": "Soften the landing zones with a pair of Rubber Mats, made from dyed rubber chips, heat compressed and available in dark green or brick red.",
|
25 |
+
"diff": [
|
26 |
+
"delete text[51:52] --> decoded_text[51:51] ' ' --> ''"
|
27 |
+
],
|
28 |
+
"n_oov_chars": 0,
|
29 |
+
"oov_ratio": 0.0,
|
30 |
+
"oov_charset": "[]"
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"text": "We're not so rough and over the top these days, so they miiiiight survive ._.",
|
34 |
+
"decoded_text": "We're not so rough and over the top these days, so they miiiiight survive._.",
|
35 |
+
"diff": [
|
36 |
+
"delete text[73:74] --> decoded_text[73:73] ' ' --> ''"
|
37 |
+
],
|
38 |
+
"n_oov_chars": 0,
|
39 |
+
"oov_ratio": 0.0,
|
40 |
+
"oov_charset": "[]"
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"text": "Just finished Hulse's \"Black River\" and simply adored the book. So pretty, overall, and much like the Kent Haruf novels, such as \"Plainsong\" that I've enjoyed over the years. \"Black River\" is surely one of the best five I've read this year. Solid Pulitzer choice, in my opinion. Side note: As I've mentioned before, I surely don't understand all of the hoopla surrounding \"The Sellout,\" with so many other worthy contenders. But, what do I know? I'm only a reader. :-) Read on ...",
|
44 |
+
"decoded_text": "Just finished Hulse's \"Black River\" and simply adored the book. So pretty, overall, and much like the Kent Haruf novels, such as \"Plainsong\" that I've enjoyed over the years. \"Black River\" is surely one of the best five I've read this year. Solid Pulitzer choice, in my opinion. Side note: As I've mentioned before, I surely don't understand all of the hoopla surrounding \"The Sellout,\" with so many other worthy contenders. But, what do I know? I'm only a reader. :-) Read on...",
|
45 |
+
"diff": [
|
46 |
+
"replace text[476:480] --> decoded_text[476:479] ' ...' --> '...'"
|
47 |
+
],
|
48 |
+
"n_oov_chars": 0,
|
49 |
+
"oov_ratio": 0.0,
|
50 |
+
"oov_charset": "[]"
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"text": "I really don't understand all of the hoopla over THE SELLOUT. Just a so-so book, in my opinion. Minor work. I struggled through it, and can never get back the time spent on that tome. EILEEN and HONEYDEW are sooooooo much better, not to mention THE TURNER HOUSE, TSAR, DID YOU EVER, and others. I'm reading DELICIOUS FOODS right now, and think it's a major-serious contender as well. BLACK RIVER is next on my list, and I can't wait. But, what do I know? :-) Read on ...",
|
54 |
+
"decoded_text": "I really don't understand all of the hoopla over THE SELLOUT. Just a so-so book, in my opinion. Minor work. I struggled through it, and can never get back the time spent on that tome. EILEEN and HONEYDEW are sooooooo much better, not to mention THE TURNER HOUSE, TSAR, DID YOU EVER, and others. I'm reading DELICIOUS FOODS right now, and think it's a major-serious contender as well. BLACK RIVER is next on my list, and I can't wait. But, what do I know? :-) Read on...",
|
55 |
+
"diff": [
|
56 |
+
"replace text[466:470] --> decoded_text[466:469] ' ...' --> '...'"
|
57 |
+
],
|
58 |
+
"n_oov_chars": 0,
|
59 |
+
"oov_ratio": 0.0,
|
60 |
+
"oov_charset": "[]"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"text": "I have also read The Shore ,Alex, yes I agree its very good, maybe a chance. The last years I have just waited to last in the year to see who the genral public have been siding and gone for that, from a collectors point of view, it would be nice if something won which did not have a 100,000 in the first print run.",
|
64 |
+
"decoded_text": "I have also read The Shore,Alex, yes I agree its very good, maybe a chance. The last years I have just waited to last in the year to see who the genral public have been siding and gone for that, from a collectors point of view, it would be nice if something won which did not have a 100,000 in the first print run.",
|
65 |
+
"diff": [
|
66 |
+
"delete text[26:27] --> decoded_text[26:26] ' ' --> ''"
|
67 |
+
],
|
68 |
+
"n_oov_chars": 0,
|
69 |
+
"oov_ratio": 0.0,
|
70 |
+
"oov_charset": "[]"
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"text": "Moving to K-W can be confusing for anybody: how can you explain King Street, that runs north, south, east and west ?! Or streets like King and Weber, that are sometimes parallel, and yet cross each other in two places ? For someone new to the country, adjusting to life here can be even much more confusing.",
|
74 |
+
"decoded_text": "Moving to K-W can be confusing for anybody: how can you explain King Street, that runs north, south, east and west?! Or streets like King and Weber, that are sometimes parallel, and yet cross each other in two places? For someone new to the country, adjusting to life here can be even much more confusing.",
|
75 |
+
"diff": [
|
76 |
+
"delete text[114:115] --> decoded_text[114:114] ' ' --> ''",
|
77 |
+
"delete text[217:218] --> decoded_text[216:216] ' ' --> ''"
|
78 |
+
],
|
79 |
+
"n_oov_chars": 0,
|
80 |
+
"oov_ratio": 0.0,
|
81 |
+
"oov_charset": "[]"
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"text": "Just in case you are getting the impression that it’s all work and no fun, let me remind you of the Multicultural Festival, which is held every year at Victoria Park during the Canada Day (July 1) weekend. For two fun-filled days, the whole family can enjoy crafts, traditional dancing and especially foods from around the world ! This event is something Kitchener-Waterloo always looks forward to.",
|
85 |
+
"decoded_text": "Just in case you are getting the impression that it’s all work and no fun, let me remind you of the Multicultural Festival, which is held every year at Victoria Park during the Canada Day (July 1) weekend. For two fun-filled days, the whole family can enjoy crafts, traditional dancing and especially foods from around the world! This event is something Kitchener-Waterloo always looks forward to.",
|
86 |
+
"diff": [
|
87 |
+
"delete text[328:329] --> decoded_text[328:328] ' ' --> ''"
|
88 |
+
],
|
89 |
+
"n_oov_chars": 0,
|
90 |
+
"oov_ratio": 0.0,
|
91 |
+
"oov_charset": "[]"
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"text": "Centralized vacuum system can be used to clean production lines, floors and installations during or ..",
|
95 |
+
"decoded_text": "Centralized vacuum system can be used to clean production lines, floors and installations during or..",
|
96 |
+
"diff": [
|
97 |
+
"delete text[99:100] --> decoded_text[99:99] ' ' --> ''"
|
98 |
+
],
|
99 |
+
"n_oov_chars": 0,
|
100 |
+
"oov_ratio": 0.0,
|
101 |
+
"oov_charset": "[]"
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"text": "REFRIGERATION MECHANIC Employees in this job participate in and oversee the installation of refrigeration, air conditioning, chemicals, and electricity. Some jobs require an employee to some risk of sustaining illness and injury from the use of chemicals, high-pressure laboratory systems ... Visit Document",
|
105 |
+
"decoded_text": "REFRIGERATION MECHANIC Employees in this job participate in and oversee the installation of refrigeration, air conditioning, chemicals, and electricity. Some jobs require an employee to some risk of sustaining illness and injury from the use of chemicals, high-pressure laboratory systems... Visit Document",
|
106 |
+
"diff": [
|
107 |
+
"delete text[288:289] --> decoded_text[288:288] ' ' --> ''"
|
108 |
+
],
|
109 |
+
"n_oov_chars": 0,
|
110 |
+
"oov_ratio": 0.0,
|
111 |
+
"oov_charset": "[]"
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"text": "AMMONIA REFRIGERATION IN WAREHOUSES What is ammonia? Pure ammonia (NH3), also known as anhydrous ammonia, is a colorless gas, chemicals; how these chemicals can be detected (such as by monitoring devices, or by smell), what the employer is going to do to protect workers, including emergency ... Read Here",
|
115 |
+
"decoded_text": "AMMONIA REFRIGERATION IN WAREHOUSES What is ammonia? Pure ammonia (NH3), also known as anhydrous ammonia, is a colorless gas, chemicals; how these chemicals can be detected (such as by monitoring devices, or by smell), what the employer is going to do to protect workers, including emergency... Read Here",
|
116 |
+
"diff": [
|
117 |
+
"delete text[291:292] --> decoded_text[291:291] ' ' --> ''"
|
118 |
+
],
|
119 |
+
"n_oov_chars": 0,
|
120 |
+
"oov_ratio": 0.0,
|
121 |
+
"oov_charset": "[]"
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"text": "A company in Washington has agreed to pay $50,805 in federal penalties after releasing ammonia gas into the atmosphere. ... Read News",
|
125 |
+
"decoded_text": "A company in Washington has agreed to pay $50,805 in federal penalties after releasing ammonia gas into the atmosphere.... Read News",
|
126 |
+
"diff": [
|
127 |
+
"delete text[119:120] --> decoded_text[119:119] ' ' --> ''"
|
128 |
+
],
|
129 |
+
"n_oov_chars": 0,
|
130 |
+
"oov_ratio": 0.0,
|
131 |
+
"oov_charset": "[]"
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"text": "A scientist trying to find a cure for his wife's disease is kicked into a container of chemicals which results in his needing a refrigeration suit to survive ... View Video",
|
135 |
+
"decoded_text": "A scientist trying to find a cure for his wife's disease is kicked into a container of chemicals which results in his needing a refrigeration suit to survive... View Video",
|
136 |
+
"diff": [
|
137 |
+
"delete text[157:158] --> decoded_text[157:157] ' ' --> ''"
|
138 |
+
],
|
139 |
+
"n_oov_chars": 0,
|
140 |
+
"oov_ratio": 0.0,
|
141 |
+
"oov_charset": "[]"
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"text": "Beverages, chemicals and petrochemicals, pharmaceuticals, starch, sugar and biofuels. Refrigeration and air-conditioning consume a lot of energy. Therefore, it is essential that the technical solutions economize on the use of energy and fulfil their mission ... Doc Viewer",
|
145 |
+
"decoded_text": "Beverages, chemicals and petrochemicals, pharmaceuticals, starch, sugar and biofuels. Refrigeration and air-conditioning consume a lot of energy. Therefore, it is essential that the technical solutions economize on the use of energy and fulfil their mission... Doc Viewer",
|
146 |
+
"diff": [
|
147 |
+
"delete text[257:258] --> decoded_text[257:257] ' ' --> ''"
|
148 |
+
],
|
149 |
+
"n_oov_chars": 0,
|
150 |
+
"oov_ratio": 0.0,
|
151 |
+
"oov_charset": "[]"
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"text": "Electrical equipment, blunt instruments, chemicals, lubricants, or any other tools or equipment seen or implied in this video. Due to factors beyond the control of EricTheCarGuy, ... View Video",
|
155 |
+
"decoded_text": "Electrical equipment, blunt instruments, chemicals, lubricants, or any other tools or equipment seen or implied in this video. Due to factors beyond the control of EricTheCarGuy,... View Video",
|
156 |
+
"diff": [
|
157 |
+
"delete text[178:179] --> decoded_text[178:178] ' ' --> ''"
|
158 |
+
],
|
159 |
+
"n_oov_chars": 0,
|
160 |
+
"oov_ratio": 0.0,
|
161 |
+
"oov_charset": "[]"
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"text": "Summary Of Ammonia Accidents In The United States To Which ...",
|
165 |
+
"decoded_text": "Summary Of Ammonia Accidents In The United States To Which...",
|
166 |
+
"diff": [
|
167 |
+
"delete text[58:59] --> decoded_text[58:58] ' ' --> ''"
|
168 |
+
],
|
169 |
+
"n_oov_chars": 0,
|
170 |
+
"oov_ratio": 0.0,
|
171 |
+
"oov_charset": "[]"
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"text": "Under current conditions, moderate or slow-onset health effects of GM foods could take decades to become known, just as it took decades for the damaging effects of trans-fats (another type of artificial food) to be recognized. ‘Slow poison’ effects from trans-fats have caused millions of premature deaths across the world6 .",
|
175 |
+
"decoded_text": "Under current conditions, moderate or slow-onset health effects of GM foods could take decades to become known, just as it took decades for the damaging effects of trans-fats (another type of artificial food) to be recognized. ‘Slow poison’ effects from trans-fats have caused millions of premature deaths across the world6.",
|
176 |
+
"diff": [
|
177 |
+
"delete text[323:324] --> decoded_text[323:323] ' ' --> ''"
|
178 |
+
],
|
179 |
+
"n_oov_chars": 0,
|
180 |
+
"oov_ratio": 0.0,
|
181 |
+
"oov_charset": "[]"
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"text": "Nevertheless, there are signs that all is not well with the US food supply. A report by the US Centers for Disease Control shows that food-related illnesses increased 2- to 10-fold in the years between 1994 (just before GM food was commercialized) and 19997 . Is there a link with GM food? No one knows, because studies on humans have not been done.",
|
185 |
+
"decoded_text": "Nevertheless, there are signs that all is not well with the US food supply. A report by the US Centers for Disease Control shows that food-related illnesses increased 2- to 10-fold in the years between 1994 (just before GM food was commercialized) and 19997. Is there a link with GM food? No one knows, because studies on humans have not been done.",
|
186 |
+
"diff": [
|
187 |
+
"delete text[257:258] --> decoded_text[257:257] ' ' --> ''"
|
188 |
+
],
|
189 |
+
"n_oov_chars": 0,
|
190 |
+
"oov_ratio": 0.0,
|
191 |
+
"oov_charset": "[]"
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"text": "Engine . . . I think that's a pretty exotic engine, can't be cheap to rebuild.",
|
195 |
+
"decoded_text": "Engine... I think that's a pretty exotic engine, can't be cheap to rebuild.",
|
196 |
+
"diff": [
|
197 |
+
"delete text[6:7] --> decoded_text[6:6] ' ' --> ''",
|
198 |
+
"delete text[8:9] --> decoded_text[7:7] ' ' --> ''",
|
199 |
+
"delete text[10:11] --> decoded_text[8:8] ' ' --> ''"
|
200 |
+
],
|
201 |
+
"n_oov_chars": 0,
|
202 |
+
"oov_ratio": 0.0,
|
203 |
+
"oov_charset": "[]"
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"text": "Put some $$ aside for electrics and rust, if still within the budget ...do it! You'll love it.",
|
207 |
+
"decoded_text": "Put some $$ aside for electrics and rust, if still within the budget...do it! You'll love it.",
|
208 |
+
"diff": [
|
209 |
+
"delete text[68:69] --> decoded_text[68:68] ' ' --> ''"
|
210 |
+
],
|
211 |
+
"n_oov_chars": 0,
|
212 |
+
"oov_ratio": 0.0,
|
213 |
+
"oov_charset": "[]"
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"text": "I bought my M5 10 years ago when prices were still 'reasonable' and I wouldn't take less than $50k today.",
|
217 |
+
"decoded_text": "I bought my M5 10 years ago when prices were still'reasonable' and I wouldn't take less than $50k today.",
|
218 |
+
"diff": [
|
219 |
+
"delete text[50:51] --> decoded_text[50:50] ' ' --> ''"
|
220 |
+
],
|
221 |
+
"n_oov_chars": 0,
|
222 |
+
"oov_ratio": 0.0,
|
223 |
+
"oov_charset": "[]"
|
224 |
+
},
|
225 |
+
{
|
226 |
+
"text": "Yes my clothes fit well...right now..since I lost weight ,they are a little big, but they are comfortable.",
|
227 |
+
"decoded_text": "Yes my clothes fit well...right now..since I lost weight,they are a little big, but they are comfortable.",
|
228 |
+
"diff": [
|
229 |
+
"delete text[56:57] --> decoded_text[56:56] ' ' --> ''"
|
230 |
+
],
|
231 |
+
"n_oov_chars": 0,
|
232 |
+
"oov_ratio": 0.0,
|
233 |
+
"oov_charset": "[]"
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"text": "Thanks to everyone who's posted so far, and I look forward to seeing more responses. It's really cool to get to 'meet' the people I've been hanging out with.",
|
237 |
+
"decoded_text": "Thanks to everyone who's posted so far, and I look forward to seeing more responses. It's really cool to get to'meet' the people I've been hanging out with.",
|
238 |
+
"diff": [
|
239 |
+
"delete text[111:112] --> decoded_text[111:111] ' ' --> ''"
|
240 |
+
],
|
241 |
+
"n_oov_chars": 0,
|
242 |
+
"oov_ratio": 0.0,
|
243 |
+
"oov_charset": "[]"
|
244 |
+
},
|
245 |
+
{
|
246 |
+
"text": "Flickr's API tells us that a particular photo has not been added to a group with the a code error. For the case we are using it is number 5. More information about error codes can be found here (https://www.flickr.com/services/api/flickr.groups.pools.add.html) .",
|
247 |
+
"decoded_text": "Flickr's API tells us that a particular photo has not been added to a group with the a code error. For the case we are using it is number 5. More information about error codes can be found here (https://www.flickr.com/services/api/flickr.groups.pools.add.html).",
|
248 |
+
"diff": [
|
249 |
+
"replace text[260:262] --> decoded_text[260:261] ' .' --> '.'"
|
250 |
+
],
|
251 |
+
"n_oov_chars": 0,
|
252 |
+
"oov_ratio": 0.0,
|
253 |
+
"oov_charset": "[]"
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"text": "I called Chris to celebrate... .... ... and to ask him if he would help break it down so we could get it home.",
|
257 |
+
"decoded_text": "I called Chris to celebrate.......... and to ask him if he would help break it down so we could get it home.",
|
258 |
+
"diff": [
|
259 |
+
"delete text[30:31] --> decoded_text[30:30] ' ' --> ''",
|
260 |
+
"delete text[35:36] --> decoded_text[34:34] ' ' --> ''"
|
261 |
+
],
|
262 |
+
"n_oov_chars": 0,
|
263 |
+
"oov_ratio": 0.0,
|
264 |
+
"oov_charset": "[]"
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"text": "I thought these would be pretty and bright in an entry way... .. ... of that mansion we are someday going to own.",
|
268 |
+
"decoded_text": "I thought these would be pretty and bright in an entry way........ of that mansion we are someday going to own.",
|
269 |
+
"diff": [
|
270 |
+
"delete text[61:62] --> decoded_text[61:61] ' ' --> ''",
|
271 |
+
"delete text[64:65] --> decoded_text[63:63] ' ' --> ''"
|
272 |
+
],
|
273 |
+
"n_oov_chars": 0,
|
274 |
+
"oov_ratio": 0.0,
|
275 |
+
"oov_charset": "[]"
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"text": "The Bisons are careening to their 12th straight non-playoff season spanning three parent clubs. A team that started the season 13-5 and was still eight games over .500 in early June just put up the most losses in any month in its history.",
|
279 |
+
"decoded_text": "The Bisons are careening to their 12th straight non-playoff season spanning three parent clubs. A team that started the season 13-5 and was still eight games over.500 in early June just put up the most losses in any month in its history.",
|
280 |
+
"diff": [
|
281 |
+
"delete text[162:163] --> decoded_text[162:162] ' ' --> ''"
|
282 |
+
],
|
283 |
+
"n_oov_chars": 0,
|
284 |
+
"oov_ratio": 0.0,
|
285 |
+
"oov_charset": "[]"
|
286 |
+
},
|
287 |
+
{
|
288 |
+
"text": "One disappointment that's really hurt at all levels is the wrist surgery for top outfield prospect Anthony Alford. He was lighting things up at Double-A New Hampshire batting .325 and the Blue Jays wanted to give him a brief look. He would have then come to Buffalo and ostensibly anchored the lineup. But he's yet to arrive and the season has gone down the drain.",
|
289 |
+
"decoded_text": "One disappointment that's really hurt at all levels is the wrist surgery for top outfield prospect Anthony Alford. He was lighting things up at Double-A New Hampshire batting.325 and the Blue Jays wanted to give him a brief look. He would have then come to Buffalo and ostensibly anchored the lineup. But he's yet to arrive and the season has gone down the drain.",
|
290 |
+
"diff": [
|
291 |
+
"delete text[174:175] --> decoded_text[174:174] ' ' --> ''"
|
292 |
+
],
|
293 |
+
"n_oov_chars": 0,
|
294 |
+
"oov_ratio": 0.0,
|
295 |
+
"oov_charset": "[]"
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"text": "Jamieson, an Ontario native, was a Double-A Southern League all-star in 2014, when he batted .298 at Mobile. He was the Metro Atlantic co-player of the year in 2011 and a three-time representative of Team Canada, including a gold medal at the Pan An Games in 2015.",
|
299 |
+
"decoded_text": "Jamieson, an Ontario native, was a Double-A Southern League all-star in 2014, when he batted.298 at Mobile. He was the Metro Atlantic co-player of the year in 2011 and a three-time representative of Team Canada, including a gold medal at the Pan An Games in 2015.",
|
300 |
+
"diff": [
|
301 |
+
"delete text[92:93] --> decoded_text[92:92] ' ' --> ''"
|
302 |
+
],
|
303 |
+
"n_oov_chars": 0,
|
304 |
+
"oov_ratio": 0.0,
|
305 |
+
"oov_charset": "[]"
|
306 |
+
},
|
307 |
+
{
|
308 |
+
"text": "In this tutorial, we learn how to get big bouncy curls like Kim Kardashian. First, have straight and clean hair and heat up a 1.5\" curling iron. Now, section your hair off and start with the bottom half. Wrap your hair around the curling iron, then after 15 seconds release the ...more",
|
309 |
+
"decoded_text": "In this tutorial, we learn how to get big bouncy curls like Kim Kardashian. First, have straight and clean hair and heat up a 1.5\" curling iron. Now, section your hair off and start with the bottom half. Wrap your hair around the curling iron, then after 15 seconds release the...more",
|
310 |
+
"diff": [
|
311 |
+
"delete text[277:278] --> decoded_text[277:277] ' ' --> ''"
|
312 |
+
],
|
313 |
+
"n_oov_chars": 0,
|
314 |
+
"oov_ratio": 0.0,
|
315 |
+
"oov_charset": "[]"
|
316 |
+
},
|
317 |
+
{
|
318 |
+
"text": "In this tutorial, we learn how to create a Kim Kardashian-inspired cat eye makeup look. Start by using a nude color eyeshadow on the eyes and tape to create an edge on the outer corner of the eye. After you do this, apply a white shadow underneath the eyebrows to create a high ...more",
|
319 |
+
"decoded_text": "In this tutorial, we learn how to create a Kim Kardashian-inspired cat eye makeup look. Start by using a nude color eyeshadow on the eyes and tape to create an edge on the outer corner of the eye. After you do this, apply a white shadow underneath the eyebrows to create a high...more",
|
320 |
+
"diff": [
|
321 |
+
"delete text[277:278] --> decoded_text[277:277] ' ' --> ''"
|
322 |
+
],
|
323 |
+
"n_oov_chars": 0,
|
324 |
+
"oov_ratio": 0.0,
|
325 |
+
"oov_charset": "[]"
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"text": "Love the makeup styles of Kim Kardashian? Then you might like this makeup tutorial, which shows you how to recreate her looks. This Kardashian-inspired smokey eyes look uses minimal product, so you don't need to load up on the cosmetics. It's a simple look, perfect for any occ ...more",
|
329 |
+
"decoded_text": "Love the makeup styles of Kim Kardashian? Then you might like this makeup tutorial, which shows you how to recreate her looks. This Kardashian-inspired smokey eyes look uses minimal product, so you don't need to load up on the cosmetics. It's a simple look, perfect for any occ...more",
|
330 |
+
"diff": [
|
331 |
+
"replace text[277:285] --> decoded_text[277:284] ' ...more' --> '...more'"
|
332 |
+
],
|
333 |
+
"n_oov_chars": 0,
|
334 |
+
"oov_ratio": 0.0,
|
335 |
+
"oov_charset": "[]"
|
336 |
+
},
|
337 |
+
{
|
338 |
+
"text": "You've probably seen tutorials on YouTube before on how to achieve Kim Kardashian curls, or Victoria's Secret waves, or the hair look of the fashionable deviants on \"Pretty Little Liars,\" but why go through so many tutorials when they're all really the same thing? Check out t ...more",
|
339 |
+
"decoded_text": "You've probably seen tutorials on YouTube before on how to achieve Kim Kardashian curls, or Victoria's Secret waves, or the hair look of the fashionable deviants on \"Pretty Little Liars,\" but why go through so many tutorials when they're all really the same thing? Check out t...more",
|
340 |
+
"diff": [
|
341 |
+
"delete text[276:277] --> decoded_text[276:276] ' ' --> ''"
|
342 |
+
],
|
343 |
+
"n_oov_chars": 0,
|
344 |
+
"oov_ratio": 0.0,
|
345 |
+
"oov_charset": "[]"
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"text": "In the Age of the Smokey Eye, women who get glammed up with red lips and cat eyes are endangered species. Fifty years ago you would have seen nothing but, yet today the predominance of Kim Kardashian smokey eyes and nude lips has made anyone wearing red lips a wonder. Which i ...more",
|
349 |
+
"decoded_text": "In the Age of the Smokey Eye, women who get glammed up with red lips and cat eyes are endangered species. Fifty years ago you would have seen nothing but, yet today the predominance of Kim Kardashian smokey eyes and nude lips has made anyone wearing red lips a wonder. Which i...more",
|
350 |
+
"diff": [
|
351 |
+
"replace text[276:284] --> decoded_text[276:283] ' ...more' --> '...more'"
|
352 |
+
],
|
353 |
+
"n_oov_chars": 0,
|
354 |
+
"oov_ratio": 0.0,
|
355 |
+
"oov_charset": "[]"
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"text": "TGIF. What better day to take a break from the week's dwindling grind? Below, a video demo plus instructions for indulging in a little tech-aided vanity during your next water cooler hiatus. An iPhone is necessary, so if you don't have one, find a co-worker stat. 1. Buy & Down ...more",
|
359 |
+
"decoded_text": "TGIF. What better day to take a break from the week's dwindling grind? Below, a video demo plus instructions for indulging in a little tech-aided vanity during your next water cooler hiatus. An iPhone is necessary, so if you don't have one, find a co-worker stat. 1. Buy & Down...more",
|
360 |
+
"diff": [
|
361 |
+
"delete text[277:278] --> decoded_text[277:277] ' ' --> ''"
|
362 |
+
],
|
363 |
+
"n_oov_chars": 0,
|
364 |
+
"oov_ratio": 0.0,
|
365 |
+
"oov_charset": "[]"
|
366 |
+
},
|
367 |
+
{
|
368 |
+
"text": "It's been a rough week for Instagram. First they came out with a new terms of service that suggested the right to let companies use people's photos for advertisements without the user's permission. Then everyone started freaking out and debating whether or not to leave the onl ...more",
|
369 |
+
"decoded_text": "It's been a rough week for Instagram. First they came out with a new terms of service that suggested the right to let companies use people's photos for advertisements without the user's permission. Then everyone started freaking out and debating whether or not to leave the onl...more",
|
370 |
+
"diff": [
|
371 |
+
"replace text[277:285] --> decoded_text[277:284] ' ...more' --> '...more'"
|
372 |
+
],
|
373 |
+
"n_oov_chars": 0,
|
374 |
+
"oov_ratio": 0.0,
|
375 |
+
"oov_charset": "[]"
|
376 |
+
},
|
377 |
+
{
|
378 |
+
"text": "Kim Kardashian and Kanye West have fittingly named their second child \"Saint,\" but I'm sure you don't care about that—and neither do I. Unfortunately, regardless of our pop culture interests, we're forced to know these types of stupid facts because the internet is so densely p ...more",
|
379 |
+
"decoded_text": "Kim Kardashian and Kanye West have fittingly named their second child \"Saint,\" but I'm sure you don't care about that—and neither do I. Unfortunately, regardless of our pop culture interests, we're forced to know these types of stupid facts because the internet is so densely p...more",
|
380 |
+
"diff": [
|
381 |
+
"replace text[277:285] --> decoded_text[277:284] ' ...more' --> '...more'"
|
382 |
+
],
|
383 |
+
"n_oov_chars": 0,
|
384 |
+
"oov_ratio": 0.0,
|
385 |
+
"oov_charset": "[]"
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"text": "The war may be forgotten but the warrior will always be remembered !!!! All gave Some-Some gave All. Rest in peace Alden. :-(",
|
389 |
+
"decoded_text": "The war may be forgotten but the warrior will always be remembered!!!! All gave Some-Some gave All. Rest in peace Alden. :-(",
|
390 |
+
"diff": [
|
391 |
+
"delete text[66:67] --> decoded_text[66:66] ' ' --> ''"
|
392 |
+
],
|
393 |
+
"n_oov_chars": 0,
|
394 |
+
"oov_ratio": 0.0,
|
395 |
+
"oov_charset": "[]"
|
396 |
+
},
|
397 |
+
{
|
398 |
+
"text": "“How about,” the boy suggested, “we go swimming, have a barbecue and watch the Indy 500 like we did last year . . . in honor of those who died for this great nation and our fun, American way of life.”",
|
399 |
+
"decoded_text": "“How about,” the boy suggested, “we go swimming, have a barbecue and watch the Indy 500 like we did last year... in honor of those who died for this great nation and our fun, American way of life.”",
|
400 |
+
"diff": [
|
401 |
+
"delete text[109:110] --> decoded_text[109:109] ' ' --> ''",
|
402 |
+
"delete text[111:112] --> decoded_text[110:110] ' ' --> ''",
|
403 |
+
"delete text[113:114] --> decoded_text[111:111] ' ' --> ''"
|
404 |
+
],
|
405 |
+
"n_oov_chars": 0,
|
406 |
+
"oov_ratio": 0.0,
|
407 |
+
"oov_charset": "[]"
|
408 |
+
},
|
409 |
+
{
|
410 |
+
"text": "Description : Brandon does it again as he brings us the lovely redhead slut Mylie Moore. This babe has got some serious attitude when it comes to sucking cock, and Brandon can tell right away that this is going to ...",
|
411 |
+
"decoded_text": "Description : Brandon does it again as he brings us the lovely redhead slut Mylie Moore. This babe has got some serious attitude when it comes to sucking cock, and Brandon can tell right away that this is going to...",
|
412 |
+
"diff": [
|
413 |
+
"replace text[213:217] --> decoded_text[213:216] ' ...' --> '...'"
|
414 |
+
],
|
415 |
+
"n_oov_chars": 0,
|
416 |
+
"oov_ratio": 0.0,
|
417 |
+
"oov_charset": "[]"
|
418 |
+
},
|
419 |
+
{
|
420 |
+
"text": "Description : Presley Maddox is in front of a chinese restaurant looking at the menu. \"What are you looking for?\" Brandon asks her. \"Creamofsomeyoungguy...\" she replies, a naughty look on her face. \"Oh, i've heard ...",
|
421 |
+
"decoded_text": "Description : Presley Maddox is in front of a chinese restaurant looking at the menu. \"What are you looking for?\" Brandon asks her. \"Creamofsomeyoungguy...\" she replies, a naughty look on her face. \"Oh, i've heard...",
|
422 |
+
"diff": [
|
423 |
+
"replace text[213:217] --> decoded_text[213:216] ' ...' --> '...'"
|
424 |
+
],
|
425 |
+
"n_oov_chars": 0,
|
426 |
+
"oov_ratio": 0.0,
|
427 |
+
"oov_charset": "[]"
|
428 |
+
},
|
429 |
+
{
|
430 |
+
"text": "I simply want to tell you that I am just all new to blogging and honestly loved this blog. Probably I’m likely to bookmark your site . You really have really good writings. Bless you for revealing your web page.",
|
431 |
+
"decoded_text": "I simply want to tell you that I am just all new to blogging and honestly loved this blog. Probably I’m likely to bookmark your site. You really have really good writings. Bless you for revealing your web page.",
|
432 |
+
"diff": [
|
433 |
+
"delete text[132:133] --> decoded_text[132:132] ' ' --> ''"
|
434 |
+
],
|
435 |
+
"n_oov_chars": 0,
|
436 |
+
"oov_ratio": 0.0,
|
437 |
+
"oov_charset": "[]"
|
438 |
+
},
|
439 |
+
{
|
440 |
+
"text": "I simply had to thank you very much once more. I’m not certain the things I might have sorted out without those methods documented by you relating to such a theme. It had been a real scary circumstance in my view, however , finding out your professional style you handled that took me to weep over delight. I’m thankful for your service and then sincerely hope you know what an amazing job you’re putting in educating some other people thru your websites. Probably you have never come across all of us.",
|
441 |
+
"decoded_text": "I simply had to thank you very much once more. I’m not certain the things I might have sorted out without those methods documented by you relating to such a theme. It had been a real scary circumstance in my view, however, finding out your professional style you handled that took me to weep over delight. I’m thankful for your service and then sincerely hope you know what an amazing job you’re putting in educating some other people thru your websites. Probably you have never come across all of us.",
|
442 |
+
"diff": [
|
443 |
+
"delete text[221:222] --> decoded_text[221:221] ' ' --> ''"
|
444 |
+
],
|
445 |
+
"n_oov_chars": 0,
|
446 |
+
"oov_ratio": 0.0,
|
447 |
+
"oov_charset": "[]"
|
448 |
+
},
|
449 |
+
{
|
450 |
+
"text": "I genuinely enjoy reading on this internet site , it contains good content . “Words are, of course, the most powerful drug used by mankind.” by Rudyard Kipling.",
|
451 |
+
"decoded_text": "I genuinely enjoy reading on this internet site, it contains good content. “Words are, of course, the most powerful drug used by mankind.” by Rudyard Kipling.",
|
452 |
+
"diff": [
|
453 |
+
"delete text[47:48] --> decoded_text[47:47] ' ' --> ''",
|
454 |
+
"delete text[74:75] --> decoded_text[73:73] ' ' --> ''"
|
455 |
+
],
|
456 |
+
"n_oov_chars": 0,
|
457 |
+
"oov_ratio": 0.0,
|
458 |
+
"oov_charset": "[]"
|
459 |
+
},
|
460 |
+
{
|
461 |
+
"text": "Hiya very cool web site!! Man .. Excellent .. Wonderful .. I’ll bookmark your web site and take the feeds additionally¡KI am satisfied to search out a lot of useful info here within the post, we need develop extra strategies in this regard, thank you for sharing. . . . . .",
|
462 |
+
"decoded_text": "Hiya very cool web site!! Man.. Excellent.. Wonderful.. I’ll bookmark your web site and take the feeds additionally¡KI am satisfied to search out a lot of useful info here within the post, we need develop extra strategies in this regard, thank you for sharing......",
|
463 |
+
"diff": [
|
464 |
+
"delete text[29:30] --> decoded_text[29:29] ' ' --> ''",
|
465 |
+
"delete text[42:43] --> decoded_text[41:41] ' ' --> ''",
|
466 |
+
"delete text[55:56] --> decoded_text[53:53] ' ' --> ''",
|
467 |
+
"replace text[263:273] --> decoded_text[260:265] ' . . . . .' --> '.....'"
|
468 |
+
],
|
469 |
+
"n_oov_chars": 0,
|
470 |
+
"oov_ratio": 0.0,
|
471 |
+
"oov_charset": "[]"
|
472 |
+
},
|
473 |
+
{
|
474 |
+
"text": "I have been exploring for a little bit for any high quality articles or weblog posts on this sort of area . Exploring in Yahoo I ultimately stumbled upon this website. Studying this info So i am satisfied to convey that I have a very good uncanny feeling I discovered just what I needed. I so much indisputably will make sure to do not forget this web site and provides it a look regularly.",
|
475 |
+
"decoded_text": "I have been exploring for a little bit for any high quality articles or weblog posts on this sort of area. Exploring in Yahoo I ultimately stumbled upon this website. Studying this info So i am satisfied to convey that I have a very good uncanny feeling I discovered just what I needed. I so much indisputably will make sure to do not forget this web site and provides it a look regularly.",
|
476 |
+
"diff": [
|
477 |
+
"delete text[105:106] --> decoded_text[105:105] ' ' --> ''"
|
478 |
+
],
|
479 |
+
"n_oov_chars": 0,
|
480 |
+
"oov_ratio": 0.0,
|
481 |
+
"oov_charset": "[]"
|
482 |
+
},
|
483 |
+
{
|
484 |
+
"text": "Howdy very nice web site!! Man .. Beautiful .. Superb .. I will bookmark your blog and take the feeds also¡KI’m glad to search out a lot of useful information right here in the post, we need develop more techniques in this regard, thank you for sharing. . . . . .",
|
485 |
+
"decoded_text": "Howdy very nice web site!! Man.. Beautiful.. Superb.. I will bookmark your blog and take the feeds also¡KI’m glad to search out a lot of useful information right here in the post, we need develop more techniques in this regard, thank you for sharing......",
|
486 |
+
"diff": [
|
487 |
+
"delete text[30:31] --> decoded_text[30:30] ' ' --> ''",
|
488 |
+
"delete text[43:44] --> decoded_text[42:42] ' ' --> ''",
|
489 |
+
"delete text[53:54] --> decoded_text[51:51] ' ' --> ''",
|
490 |
+
"replace text[253:263] --> decoded_text[250:255] ' . . . . .' --> '.....'"
|
491 |
+
],
|
492 |
+
"n_oov_chars": 0,
|
493 |
+
"oov_ratio": 0.0,
|
494 |
+
"oov_charset": "[]"
|
495 |
+
},
|
496 |
+
{
|
497 |
+
"text": "I simply desired to say thanks all over again. I am not sure what I could possibly have worked on without these solutions documented by you directly on my industry. It had become a frustrating condition for me personally, nevertheless being able to view your skilled form you dealt with the issue forced me to cry for contentment. I’m just grateful for this guidance and as well , trust you find out what a powerful job that you’re putting in teaching some other people through the use of a blog. I know that you’ve never come across any of us.",
|
498 |
+
"decoded_text": "I simply desired to say thanks all over again. I am not sure what I could possibly have worked on without these solutions documented by you directly on my industry. It had become a frustrating condition for me personally, nevertheless being able to view your skilled form you dealt with the issue forced me to cry for contentment. I’m just grateful for this guidance and as well, trust you find out what a powerful job that you’re putting in teaching some other people through the use of a blog. I know that you’ve never come across any of us.",
|
499 |
+
"diff": [
|
500 |
+
"delete text[378:379] --> decoded_text[378:378] ' ' --> ''"
|
501 |
+
],
|
502 |
+
"n_oov_chars": 0,
|
503 |
+
"oov_ratio": 0.0,
|
504 |
+
"oov_charset": "[]"
|
505 |
+
},
|
506 |
+
{
|
507 |
+
"text": "I simply had to thank you so much once again. I am not sure the things I could possibly have gone through without the type of points contributed by you regarding this topic. Entirely was a real challenging circumstance for me, however , noticing the very specialized style you solved the issue took me to cry over contentment. Now i am grateful for this help and then sincerely hope you know what a powerful job your are accomplishing educating the others using your blog. Most likely you’ve never encountered any of us.",
|
508 |
+
"decoded_text": "I simply had to thank you so much once again. I am not sure the things I could possibly have gone through without the type of points contributed by you regarding this topic. Entirely was a real challenging circumstance for me, however, noticing the very specialized style you solved the issue took me to cry over contentment. Now i am grateful for this help and then sincerely hope you know what a powerful job your are accomplishing educating the others using your blog. Most likely you’ve never encountered any of us.",
|
509 |
+
"diff": [
|
510 |
+
"delete text[234:235] --> decoded_text[234:234] ' ' --> ''"
|
511 |
+
],
|
512 |
+
"n_oov_chars": 0,
|
513 |
+
"oov_ratio": 0.0,
|
514 |
+
"oov_charset": "[]"
|
515 |
+
},
|
516 |
+
{
|
517 |
+
"text": "Terrific paintings! That is the kind of info that are supposed to be shared around the internet. Shame on the seek engines for no longer positioning this publish higher! Come on over and seek advice from my website . Thank you =)",
|
518 |
+
"decoded_text": "Terrific paintings! That is the kind of info that are supposed to be shared around the internet. Shame on the seek engines for no longer positioning this publish higher! Come on over and seek advice from my website. Thank you =)",
|
519 |
+
"diff": [
|
520 |
+
"delete text[214:215] --> decoded_text[214:214] ' ' --> ''"
|
521 |
+
],
|
522 |
+
"n_oov_chars": 0,
|
523 |
+
"oov_ratio": 0.0,
|
524 |
+
"oov_charset": "[]"
|
525 |
+
},
|
526 |
+
{
|
527 |
+
"text": "Hi there very nice site!! Guy .. Beautiful .. Wonderful .. I will bookmark your web site and take the feeds additionally¡KI’m glad to search out a lot of useful information right here in the put up, we need develop more strategies on this regard, thanks for sharing. . . . . .",
|
528 |
+
"decoded_text": "Hi there very nice site!! Guy.. Beautiful.. Wonderful.. I will bookmark your web site and take the feeds additionally¡KI’m glad to search out a lot of useful information right here in the put up, we need develop more strategies on this regard, thanks for sharing......",
|
529 |
+
"diff": [
|
530 |
+
"delete text[29:30] --> decoded_text[29:29] ' ' --> ''",
|
531 |
+
"delete text[42:43] --> decoded_text[41:41] ' ' --> ''",
|
532 |
+
"delete text[55:56] --> decoded_text[53:53] ' ' --> ''",
|
533 |
+
"replace text[266:276] --> decoded_text[263:268] ' . . . . .' --> '.....'"
|
534 |
+
],
|
535 |
+
"n_oov_chars": 0,
|
536 |
+
"oov_ratio": 0.0,
|
537 |
+
"oov_charset": "[]"
|
538 |
+
},
|
539 |
+
{
|
540 |
+
"text": "Whats up very cool website!! Man .. Excellent .. Amazing .. I will bookmark your website and take the feeds also¡KI’m glad to search out numerous useful information right here in the post, we need work out extra strategies on this regard, thank you for sharing. . . . . .",
|
541 |
+
"decoded_text": "Whats up very cool website!! Man.. Excellent.. Amazing.. I will bookmark your website and take the feeds also¡KI’m glad to search out numerous useful information right here in the post, we need work out extra strategies on this regard, thank you for sharing......",
|
542 |
+
"diff": [
|
543 |
+
"delete text[32:33] --> decoded_text[32:32] ' ' --> ''",
|
544 |
+
"delete text[45:46] --> decoded_text[44:44] ' ' --> ''",
|
545 |
+
"delete text[56:57] --> decoded_text[54:54] ' ' --> ''",
|
546 |
+
"replace text[261:271] --> decoded_text[258:263] ' . . . . .' --> '.....'"
|
547 |
+
],
|
548 |
+
"n_oov_chars": 0,
|
549 |
+
"oov_ratio": 0.0,
|
550 |
+
"oov_charset": "[]"
|
551 |
+
},
|
552 |
+
{
|
553 |
+
"text": "Thanks , I have recently been looking for information about this topic for a long time and yours is the greatest I’ve found out till now. But, what in regards to the conclusion? Are you sure about the source?",
|
554 |
+
"decoded_text": "Thanks, I have recently been looking for information about this topic for a long time and yours is the greatest I’ve found out till now. But, what in regards to the conclusion? Are you sure about the source?",
|
555 |
+
"diff": [
|
556 |
+
"delete text[6:7] --> decoded_text[6:6] ' ' --> ''"
|
557 |
+
],
|
558 |
+
"n_oov_chars": 0,
|
559 |
+
"oov_ratio": 0.0,
|
560 |
+
"oov_charset": "[]"
|
561 |
+
},
|
562 |
+
{
|
563 |
+
"text": "I simply desired to say thanks once again. I do not know the things that I would’ve accomplished in the absence of the ways contributed by you regarding my theme. It absolutely was a frightful situation for me personally, but viewing a specialised fashion you processed the issue forced me to weep over gladness. I will be happy for the assistance and as well , pray you recognize what a powerful job your are doing teaching the others through a blog. Probably you have never come across all of us.",
|
564 |
+
"decoded_text": "I simply desired to say thanks once again. I do not know the things that I would’ve accomplished in the absence of the ways contributed by you regarding my theme. It absolutely was a frightful situation for me personally, but viewing a specialised fashion you processed the issue forced me to weep over gladness. I will be happy for the assistance and as well, pray you recognize what a powerful job your are doing teaching the others through a blog. Probably you have never come across all of us.",
|
565 |
+
"diff": [
|
566 |
+
"delete text[359:360] --> decoded_text[359:359] ' ' --> ''"
|
567 |
+
],
|
568 |
+
"n_oov_chars": 0,
|
569 |
+
"oov_ratio": 0.0,
|
570 |
+
"oov_charset": "[]"
|
571 |
+
},
|
572 |
+
{
|
573 |
+
"text": "Why hedge funds are Japan's only sane, liquid asset class _ Alternatives _ AsianInvestor . By continuing to use our website, you accept our Privacy Policy and Terms & Conditions.",
|
574 |
+
"decoded_text": "Why hedge funds are Japan's only sane, liquid asset class _ Alternatives _ AsianInvestor. By continuing to use our website, you accept our Privacy Policy and Terms & Conditions.",
|
575 |
+
"diff": [
|
576 |
+
"delete text[88:89] --> decoded_text[88:88] ' ' --> ''"
|
577 |
+
],
|
578 |
+
"n_oov_chars": 0,
|
579 |
+
"oov_ratio": 0.0,
|
580 |
+
"oov_charset": "[]"
|
581 |
+
},
|
582 |
+
{
|
583 |
+
"text": "i am seve months pregnant ,this is my first pregnancy i cant wait to hold my little prince",
|
584 |
+
"decoded_text": "i am seve months pregnant,this is my first pregnancy i cant wait to hold my little prince",
|
585 |
+
"diff": [
|
586 |
+
"delete text[25:26] --> decoded_text[25:25] ' ' --> ''"
|
587 |
+
],
|
588 |
+
"n_oov_chars": 0,
|
589 |
+
"oov_ratio": 0.0,
|
590 |
+
"oov_charset": "[]"
|
591 |
+
},
|
592 |
+
{
|
593 |
+
"text": "im so scared because this is my first kid and im 18 and all my babys dady wants to do is be with other women and its like he dose not care about the baby he just cares about himself i don't have a clue on what to do or say to him ??????????????",
|
594 |
+
"decoded_text": "im so scared because this is my first kid and im 18 and all my babys dady wants to do is be with other women and its like he dose not care about the baby he just cares about himself i don't have a clue on what to do or say to him??????????????",
|
595 |
+
"diff": [
|
596 |
+
"replace text[229:244] --> decoded_text[229:243] ' ??????????????' --> '??????????????'"
|
597 |
+
],
|
598 |
+
"n_oov_chars": 0,
|
599 |
+
"oov_ratio": 0.0,
|
600 |
+
"oov_charset": "[]"
|
601 |
+
},
|
602 |
+
{
|
603 |
+
"text": "Yes there are some very strange women out there, scary thing is that they are pregnant these spiteful women and they are going to raise the children of our future, futures not lookin so bright with retarded parents !",
|
604 |
+
"decoded_text": "Yes there are some very strange women out there, scary thing is that they are pregnant these spiteful women and they are going to raise the children of our future, futures not lookin so bright with retarded parents!",
|
605 |
+
"diff": [
|
606 |
+
"delete text[214:215] --> decoded_text[214:214] ' ' --> ''"
|
607 |
+
],
|
608 |
+
"n_oov_chars": 0,
|
609 |
+
"oov_ratio": 0.0,
|
610 |
+
"oov_charset": "[]"
|
611 |
+
},
|
612 |
+
{
|
613 |
+
"text": "dissect that reason because what you might find is that all that 'smoke and mirrors' talk about why you shouldn't be doing something",
|
614 |
+
"decoded_text": "dissect that reason because what you might find is that all that'smoke and mirrors' talk about why you shouldn't be doing something",
|
615 |
+
"diff": [
|
616 |
+
"delete text[64:65] --> decoded_text[64:64] ' ' --> ''"
|
617 |
+
],
|
618 |
+
"n_oov_chars": 0,
|
619 |
+
"oov_ratio": 0.0,
|
620 |
+
"oov_charset": "[]"
|
621 |
+
},
|
622 |
+
{
|
623 |
+
"text": "What we offer here is not a hack , but a marketing of our otherwise compensated services.",
|
624 |
+
"decoded_text": "What we offer here is not a hack, but a marketing of our otherwise compensated services.",
|
625 |
+
"diff": [
|
626 |
+
"delete text[32:33] --> decoded_text[32:32] ' ' --> ''"
|
627 |
+
],
|
628 |
+
"n_oov_chars": 0,
|
629 |
+
"oov_ratio": 0.0,
|
630 |
+
"oov_charset": "[]"
|
631 |
+
},
|
632 |
+
{
|
633 |
+
"text": "that ca n't be comprehended by me.",
|
634 |
+
"decoded_text": "that can't be comprehended by me.",
|
635 |
+
"diff": [
|
636 |
+
"delete text[7:8] --> decoded_text[7:7] ' ' --> ''"
|
637 |
+
],
|
638 |
+
"n_oov_chars": 0,
|
639 |
+
"oov_ratio": 0.0,
|
640 |
+
"oov_charset": "[]"
|
641 |
+
},
|
642 |
+
{
|
643 |
+
"text": "The easiest way to seek for it's with , the most important unclaimed money database on the planet.",
|
644 |
+
"decoded_text": "The easiest way to seek for it's with, the most important unclaimed money database on the planet.",
|
645 |
+
"diff": [
|
646 |
+
"delete text[37:38] --> decoded_text[37:37] ' ' --> ''"
|
647 |
+
],
|
648 |
+
"n_oov_chars": 0,
|
649 |
+
"oov_ratio": 0.0,
|
650 |
+
"oov_charset": "[]"
|
651 |
+
},
|
652 |
+
{
|
653 |
+
"text": "Hi there, i believe that i noticed you seen my own blog thus my partner and i reached rewind this choose? . I am just wanting to to find items to strengthen this site! I guess their sufficient to make use of some of your own aspects!",
|
654 |
+
"decoded_text": "Hi there, i believe that i noticed you seen my own blog thus my partner and i reached rewind this choose?. I am just wanting to to find items to strengthen this site! I guess their sufficient to make use of some of your own aspects!",
|
655 |
+
"diff": [
|
656 |
+
"delete text[105:106] --> decoded_text[105:105] ' ' --> ''"
|
657 |
+
],
|
658 |
+
"n_oov_chars": 0,
|
659 |
+
"oov_ratio": 0.0,
|
660 |
+
"oov_charset": "[]"
|
661 |
+
},
|
662 |
+
{
|
663 |
+
"text": "You managed to hit the nail upon the top and defined out the whole thing without having side effect , people could",
|
664 |
+
"decoded_text": "You managed to hit the nail upon the top and defined out the whole thing without having side effect, people could",
|
665 |
+
"diff": [
|
666 |
+
"delete text[99:100] --> decoded_text[99:99] ' ' --> ''"
|
667 |
+
],
|
668 |
+
"n_oov_chars": 0,
|
669 |
+
"oov_ratio": 0.0,
|
670 |
+
"oov_charset": "[]"
|
671 |
+
},
|
672 |
+
{
|
673 |
+
"text": "Sure, it was the name of a white abolitionist ' from Ali's own",
|
674 |
+
"decoded_text": "Sure, it was the name of a white abolitionist'from Ali's own",
|
675 |
+
"diff": [
|
676 |
+
"delete text[45:46] --> decoded_text[45:45] ' ' --> ''",
|
677 |
+
"delete text[47:48] --> decoded_text[46:46] ' ' --> ''"
|
678 |
+
],
|
679 |
+
"n_oov_chars": 0,
|
680 |
+
"oov_ratio": 0.0,
|
681 |
+
"oov_charset": "[]"
|
682 |
+
},
|
683 |
+
{
|
684 |
+
"text": "the fact that these programmes are full of 'stars' who want",
|
685 |
+
"decoded_text": "the fact that these programmes are full of'stars' who want",
|
686 |
+
"diff": [
|
687 |
+
"delete text[42:43] --> decoded_text[42:42] ' ' --> ''"
|
688 |
+
],
|
689 |
+
"n_oov_chars": 0,
|
690 |
+
"oov_ratio": 0.0,
|
691 |
+
"oov_charset": "[]"
|
692 |
+
},
|
693 |
+
{
|
694 |
+
"text": "You managed to hit the nail upon the top as smartly as outlined out the entire thing without having side-effects , folks can take a signal.",
|
695 |
+
"decoded_text": "You managed to hit the nail upon the top as smartly as outlined out the entire thing without having side-effects, folks can take a signal.",
|
696 |
+
"diff": [
|
697 |
+
"delete text[112:113] --> decoded_text[112:112] ' ' --> ''"
|
698 |
+
],
|
699 |
+
"n_oov_chars": 0,
|
700 |
+
"oov_ratio": 0.0,
|
701 |
+
"oov_charset": "[]"
|
702 |
+
},
|
703 |
+
{
|
704 |
+
"text": "I've been exploring for a little bit for any high-quality articles or weblog posts in this sort of space . Exploring in Yahoo I finally stumbled upon this website. Reading this information So i am glad to convey that I've an incredibly excellent uncanny feeling I discovered exactly what I needed. I so much for sure will make sure to do not disregard this web site and give it a look regularly.",
|
705 |
+
"decoded_text": "I've been exploring for a little bit for any high-quality articles or weblog posts in this sort of space. Exploring in Yahoo I finally stumbled upon this website. Reading this information So i am glad to convey that I've an incredibly excellent uncanny feeling I discovered exactly what I needed. I so much for sure will make sure to do not disregard this web site and give it a look regularly.",
|
706 |
+
"diff": [
|
707 |
+
"delete text[104:105] --> decoded_text[104:104] ' ' --> ''"
|
708 |
+
],
|
709 |
+
"n_oov_chars": 0,
|
710 |
+
"oov_ratio": 0.0,
|
711 |
+
"oov_charset": "[]"
|
712 |
+
},
|
713 |
+
{
|
714 |
+
"text": "top and defined out the entire thing with no need side-effects , other folks can take a signal.",
|
715 |
+
"decoded_text": "top and defined out the entire thing with no need side-effects, other folks can take a signal.",
|
716 |
+
"diff": [
|
717 |
+
"delete text[62:63] --> decoded_text[62:62] ' ' --> ''"
|
718 |
+
],
|
719 |
+
"n_oov_chars": 0,
|
720 |
+
"oov_ratio": 0.0,
|
721 |
+
"oov_charset": "[]"
|
722 |
+
},
|
723 |
+
{
|
724 |
+
"text": "Hi there, i believe that i personally noticed you actually frequented the blog as a result my spouse and i stumbled on return back your favor? . I'm just looking to to seek out what to enhance my website! I reckon that it's blog pendidikan adequate to work with several of your ideas!",
|
725 |
+
"decoded_text": "Hi there, i believe that i personally noticed you actually frequented the blog as a result my spouse and i stumbled on return back your favor?. I'm just looking to to seek out what to enhance my website! I reckon that it's blog pendidikan adequate to work with several of your ideas!",
|
726 |
+
"diff": [
|
727 |
+
"delete text[142:143] --> decoded_text[142:142] ' ' --> ''"
|
728 |
+
],
|
729 |
+
"n_oov_chars": 0,
|
730 |
+
"oov_ratio": 0.0,
|
731 |
+
"oov_charset": "[]"
|
732 |
+
},
|
733 |
+
{
|
734 |
+
"text": "Places To Stay On The Big Island is free HD wallpaper. This wallpaper was upload at December 12, 2018 upload by admin in .You can download it in your computer by clicking resolution image in Download by size:. Don't forget to rate and comment if you interest with this wallpaper.",
|
735 |
+
"decoded_text": "Places To Stay On The Big Island is free HD wallpaper. This wallpaper was upload at December 12, 2018 upload by admin in.You can download it in your computer by clicking resolution image in Download by size:. Don't forget to rate and comment if you interest with this wallpaper.",
|
736 |
+
"diff": [
|
737 |
+
"delete text[120:121] --> decoded_text[120:120] ' ' --> ''"
|
738 |
+
],
|
739 |
+
"n_oov_chars": 0,
|
740 |
+
"oov_ratio": 0.0,
|
741 |
+
"oov_charset": "[]"
|
742 |
+
},
|
743 |
+
{
|
744 |
+
"text": "LADY T.: ... But they are more respectable than the Socialists.",
|
745 |
+
"decoded_text": "LADY T.:... But they are more respectable than the Socialists.",
|
746 |
+
"diff": [
|
747 |
+
"delete text[8:9] --> decoded_text[8:8] ' ' --> ''"
|
748 |
+
],
|
749 |
+
"n_oov_chars": 0,
|
750 |
+
"oov_ratio": 0.0,
|
751 |
+
"oov_charset": "[]"
|
752 |
+
},
|
753 |
+
{
|
754 |
+
"text": "LADY T.: You needn't be sarcastic with me. (The emphasis is on 'me')",
|
755 |
+
"decoded_text": "LADY T.: You needn't be sarcastic with me. (The emphasis is on'me')",
|
756 |
+
"diff": [
|
757 |
+
"delete text[62:63] --> decoded_text[62:62] ' ' --> ''"
|
758 |
+
],
|
759 |
+
"n_oov_chars": 0,
|
760 |
+
"oov_ratio": 0.0,
|
761 |
+
"oov_charset": "[]"
|
762 |
+
},
|
763 |
+
{
|
764 |
+
"text": "LADY C.: (counting) ... six, seven, eight, nine. Good gracious!",
|
765 |
+
"decoded_text": "LADY C.: (counting)... six, seven, eight, nine. Good gracious!",
|
766 |
+
"diff": [
|
767 |
+
"delete text[19:20] --> decoded_text[19:19] ' ' --> ''"
|
768 |
+
],
|
769 |
+
"n_oov_chars": 0,
|
770 |
+
"oov_ratio": 0.0,
|
771 |
+
"oov_charset": "[]"
|
772 |
+
},
|
773 |
+
{
|
774 |
+
"text": "a perfect fifth) which I take to be 'something to keep the wet out'.",
|
775 |
+
"decoded_text": "a perfect fifth) which I take to be'something to keep the wet out'.",
|
776 |
+
"diff": [
|
777 |
+
"delete text[35:36] --> decoded_text[35:35] ' ' --> ''"
|
778 |
+
],
|
779 |
+
"n_oov_chars": 0,
|
780 |
+
"oov_ratio": 0.0,
|
781 |
+
"oov_charset": "[]"
|
782 |
+
},
|
783 |
+
{
|
784 |
+
"text": "LADY C.: But you also represent, I take it, the ...",
|
785 |
+
"decoded_text": "LADY C.: But you also represent, I take it, the...",
|
786 |
+
"diff": [
|
787 |
+
"delete text[47:48] --> decoded_text[47:47] ' ' --> ''"
|
788 |
+
],
|
789 |
+
"n_oov_chars": 0,
|
790 |
+
"oov_ratio": 0.0,
|
791 |
+
"oov_charset": "[]"
|
792 |
+
},
|
793 |
+
{
|
794 |
+
"text": "LADY C.: But I feel that it has. If I had foreseen ... If I ...",
|
795 |
+
"decoded_text": "LADY C.: But I feel that it has. If I had foreseen... If I...",
|
796 |
+
"diff": [
|
797 |
+
"delete text[50:51] --> decoded_text[50:50] ' ' --> ''",
|
798 |
+
"delete text[59:60] --> decoded_text[58:58] ' ' --> ''"
|
799 |
+
],
|
800 |
+
"n_oov_chars": 0,
|
801 |
+
"oov_ratio": 0.0,
|
802 |
+
"oov_charset": "[]"
|
803 |
+
},
|
804 |
+
{
|
805 |
+
"text": "especially if the American has superior advantages in the way of climate and other things. ,",
|
806 |
+
"decoded_text": "especially if the American has superior advantages in the way of climate and other things. ,",
|
807 |
+
"diff": [
|
808 |
+
"delete text[93:94] --> decoded_text[93:93] ' ' --> ''"
|
809 |
+
],
|
810 |
+
"n_oov_chars": 0,
|
811 |
+
"oov_ratio": 0.0,
|
812 |
+
"oov_charset": "[]"
|
813 |
+
},
|
814 |
+
{
|
815 |
+
"text": "(a) The name, address and occupation of the applicant—Orland P. ,",
|
816 |
+
"decoded_text": "(a) The name, address and occupation of the applicant—Orland P.,",
|
817 |
+
"diff": [
|
818 |
+
"delete text[64:65] --> decoded_text[64:64] ' ' --> ''"
|
819 |
+
],
|
820 |
+
"n_oov_chars": 0,
|
821 |
+
"oov_ratio": 0.0,
|
822 |
+
"oov_charset": "[]"
|
823 |
+
},
|
824 |
+
{
|
825 |
+
"text": "is the first step towards the exploitation and .the subordination of",
|
826 |
+
"decoded_text": "is the first step towards the exploitation and.the subordination of",
|
827 |
+
"diff": [
|
828 |
+
"delete text[46:47] --> decoded_text[46:46] ' ' --> ''"
|
829 |
+
],
|
830 |
+
"n_oov_chars": 0,
|
831 |
+
"oov_ratio": 0.0,
|
832 |
+
"oov_charset": "[]"
|
833 |
+
},
|
834 |
+
{
|
835 |
+
"text": "Have you stopped to consider the saving of $ $ $ on the House Furnishing you require ?",
|
836 |
+
"decoded_text": "Have you stopped to consider the saving of $ $ $ on the House Furnishing you require?",
|
837 |
+
"diff": [
|
838 |
+
"delete text[84:85] --> decoded_text[84:84] ' ' --> ''"
|
839 |
+
],
|
840 |
+
"n_oov_chars": 0,
|
841 |
+
"oov_ratio": 0.0,
|
842 |
+
"oov_charset": "[]"
|
843 |
+
},
|
844 |
+
{
|
845 |
+
"text": "\"At the .very best, the whole matter of the agreement is an experiment— a foolish interference with",
|
846 |
+
"decoded_text": "\"At the.very best, the whole matter of the agreement is an experiment— a foolish interference with",
|
847 |
+
"diff": [
|
848 |
+
"delete text[7:8] --> decoded_text[7:7] ' ' --> ''"
|
849 |
+
],
|
850 |
+
"n_oov_chars": 0,
|
851 |
+
"oov_ratio": 0.0,
|
852 |
+
"oov_charset": "[]"
|
853 |
+
},
|
854 |
+
{
|
855 |
+
"text": "My 7 years old boys is autistic and we live in 3 bed rooms house in the best neighborhood in San Diego ... more",
|
856 |
+
"decoded_text": "My 7 years old boys is autistic and we live in 3 bed rooms house in the best neighborhood in San Diego... more",
|
857 |
+
"diff": [
|
858 |
+
"delete text[102:103] --> decoded_text[102:102] ' ' --> ''"
|
859 |
+
],
|
860 |
+
"n_oov_chars": 0,
|
861 |
+
"oov_ratio": 0.0,
|
862 |
+
"oov_charset": "[]"
|
863 |
+
},
|
864 |
+
{
|
865 |
+
"text": "take full responsibility of managing my kids time, extra activities, play dates, pick up and drop off to school, keep routine in place accompanying them, follow up school work ... more",
|
866 |
+
"decoded_text": "take full responsibility of managing my kids time, extra activities, play dates, pick up and drop off to school, keep routine in place accompanying them, follow up school work... more",
|
867 |
+
"diff": [
|
868 |
+
"delete text[175:176] --> decoded_text[175:175] ' ' --> ''"
|
869 |
+
],
|
870 |
+
"n_oov_chars": 0,
|
871 |
+
"oov_ratio": 0.0,
|
872 |
+
"oov_charset": "[]"
|
873 |
+
},
|
874 |
+
{
|
875 |
+
"text": "Skills Required: Perform a variety of cleaning activities such as washing, ironing, sweeping, mopping, dusting and polishing. Adhere strictly to rules regarding health and safety. Ability to work with little supervision and maintain ...",
|
876 |
+
"decoded_text": "Skills Required: Perform a variety of cleaning activities such as washing, ironing, sweeping, mopping, dusting and polishing. Adhere strictly to rules regarding health and safety. Ability to work with little supervision and maintain...",
|
877 |
+
"diff": [
|
878 |
+
"replace text[232:236] --> decoded_text[232:235] ' ...' --> '...'"
|
879 |
+
],
|
880 |
+
"n_oov_chars": 0,
|
881 |
+
"oov_ratio": 0.0,
|
882 |
+
"oov_charset": "[]"
|
883 |
+
},
|
884 |
+
{
|
885 |
+
"text": "Hello, We are small family husband and wife with little girl and expected to have new baby girl soon. We need housekeeper with experience. she is willing to learn cocking some of our dishes. No much work. We will provide accommodation, ... more",
|
886 |
+
"decoded_text": "Hello, We are small family husband and wife with little girl and expected to have new baby girl soon. We need housekeeper with experience. she is willing to learn cocking some of our dishes. No much work. We will provide accommodation,... more",
|
887 |
+
"diff": [
|
888 |
+
"replace text[235:244] --> decoded_text[235:243] ' ... more' --> '... more'"
|
889 |
+
],
|
890 |
+
"n_oov_chars": 0,
|
891 |
+
"oov_ratio": 0.0,
|
892 |
+
"oov_charset": "[]"
|
893 |
+
},
|
894 |
+
{
|
895 |
+
"text": "I am attending Plovdiv medical school and needs someone to stay with my kids at my apartment until I finish my classes and come back home, appreciate quick response ... more",
|
896 |
+
"decoded_text": "I am attending Plovdiv medical school and needs someone to stay with my kids at my apartment until I finish my classes and come back home, appreciate quick response... more",
|
897 |
+
"diff": [
|
898 |
+
"delete text[164:165] --> decoded_text[164:164] ' ' --> ''"
|
899 |
+
],
|
900 |
+
"n_oov_chars": 0,
|
901 |
+
"oov_ratio": 0.0,
|
902 |
+
"oov_charset": "[]"
|
903 |
+
},
|
904 |
+
{
|
905 |
+
"text": "I need for a personal assistant for our fashion manager at one of the biggest companies in Kuwait. If interested please ... more",
|
906 |
+
"decoded_text": "I need for a personal assistant for our fashion manager at one of the biggest companies in Kuwait. If interested please... more",
|
907 |
+
"diff": [
|
908 |
+
"delete text[119:120] --> decoded_text[119:119] ' ' --> ''"
|
909 |
+
],
|
910 |
+
"n_oov_chars": 0,
|
911 |
+
"oov_ratio": 0.0,
|
912 |
+
"oov_charset": "[]"
|
913 |
+
},
|
914 |
+
{
|
915 |
+
"text": "We are looking for Private Nurses(Basic Nursing Qualification required) for Elders, Parents, b Opening is in one of the royal family in Dammam. Taking care of old Parents(AGE 70 to 80) ,having knowledge of giving medicines. Traveling ...",
|
916 |
+
"decoded_text": "We are looking for Private Nurses(Basic Nursing Qualification required) for Elders, Parents, b Opening is in one of the royal family in Dammam. Taking care of old Parents(AGE 70 to 80),having knowledge of giving medicines. Traveling...",
|
917 |
+
"diff": [
|
918 |
+
"delete text[184:185] --> decoded_text[184:184] ' ' --> ''",
|
919 |
+
"replace text[233:237] --> decoded_text[232:235] ' ...' --> '...'"
|
920 |
+
],
|
921 |
+
"n_oov_chars": 0,
|
922 |
+
"oov_ratio": 0.0,
|
923 |
+
"oov_charset": "[]"
|
924 |
+
},
|
925 |
+
{
|
926 |
+
"text": "It’s a long list To write it all down , but I will fill you up via face time interview or call . Mostly it’s two jobs Personal assistance to the employer and her children and light house keeping and it involve a lot off traveling .... more",
|
927 |
+
"decoded_text": "It’s a long list To write it all down, but I will fill you up via face time interview or call. Mostly it’s two jobs Personal assistance to the employer and her children and light house keeping and it involve a lot off traveling.... more",
|
928 |
+
"diff": [
|
929 |
+
"delete text[37:38] --> decoded_text[37:37] ' ' --> ''",
|
930 |
+
"delete text[94:95] --> decoded_text[93:93] ' ' --> ''",
|
931 |
+
"delete text[229:230] --> decoded_text[227:227] ' ' --> ''"
|
932 |
+
],
|
933 |
+
"n_oov_chars": 0,
|
934 |
+
"oov_ratio": 0.0,
|
935 |
+
"oov_charset": "[]"
|
936 |
+
},
|
937 |
+
{
|
938 |
+
"text": "I am looking for a helper who can start IMMEDIATELY for the next 2 weeks. We are a small family of 4 who will have guests staying with us for almost 2 weeks and the extra cleaning help will be needed for the time they are here. Live in ... more",
|
939 |
+
"decoded_text": "I am looking for a helper who can start IMMEDIATELY for the next 2 weeks. We are a small family of 4 who will have guests staying with us for almost 2 weeks and the extra cleaning help will be needed for the time they are here. Live in... more",
|
940 |
+
"diff": [
|
941 |
+
"replace text[235:244] --> decoded_text[235:243] ' ... more' --> '... more'"
|
942 |
+
],
|
943 |
+
"n_oov_chars": 0,
|
944 |
+
"oov_ratio": 0.0,
|
945 |
+
"oov_charset": "[]"
|
946 |
+
},
|
947 |
+
{
|
948 |
+
"text": "We are living in Langata, Karen, Nairobi. Would provide your own room and bathroom. We are a very caring family, and looking for a very caring person to join our family. ... more",
|
949 |
+
"decoded_text": "We are living in Langata, Karen, Nairobi. Would provide your own room and bathroom. We are a very caring family, and looking for a very caring person to join our family.... more",
|
950 |
+
"diff": [
|
951 |
+
"delete text[169:170] --> decoded_text[169:169] ' ' --> ''"
|
952 |
+
],
|
953 |
+
"n_oov_chars": 0,
|
954 |
+
"oov_ratio": 0.0,
|
955 |
+
"oov_charset": "[]"
|
956 |
+
},
|
957 |
+
{
|
958 |
+
"text": "Hello, We are looking for a housemaid/nanny for our home. We have two children, aged 2 and 4. The children go to school from 8am to 3.30pk. We live in a small house as follows: First floor: 1 Living Room for Guests which is ... more",
|
959 |
+
"decoded_text": "Hello, We are looking for a housemaid/nanny for our home. We have two children, aged 2 and 4. The children go to school from 8am to 3.30pk. We live in a small house as follows: First floor: 1 Living Room for Guests which is... more",
|
960 |
+
"diff": [
|
961 |
+
"replace text[223:232] --> decoded_text[223:231] ' ... more' --> '... more'"
|
962 |
+
],
|
963 |
+
"n_oov_chars": 0,
|
964 |
+
"oov_ratio": 0.0,
|
965 |
+
"oov_charset": "[]"
|
966 |
+
},
|
967 |
+
{
|
968 |
+
"text": "So far we have 2 nurses and looking for an additional one + 1 care giver to work on 12 hours shift duties, one day off day when the team is complete. If not then we offer overtime for the extra days. We look forward to have you as a ... more",
|
969 |
+
"decoded_text": "So far we have 2 nurses and looking for an additional one + 1 care giver to work on 12 hours shift duties, one day off day when the team is complete. If not then we offer overtime for the extra days. We look forward to have you as a... more",
|
970 |
+
"diff": [
|
971 |
+
"replace text[232:241] --> decoded_text[232:240] ' ... more' --> '... more'"
|
972 |
+
],
|
973 |
+
"n_oov_chars": 0,
|
974 |
+
"oov_ratio": 0.0,
|
975 |
+
"oov_charset": "[]"
|
976 |
+
},
|
977 |
+
{
|
978 |
+
"text": "We 4 persons my wife, son, my mother in law's and me ... more",
|
979 |
+
"decoded_text": "We 4 persons my wife, son, my mother in law's and me... more",
|
980 |
+
"diff": [
|
981 |
+
"delete text[52:53] --> decoded_text[52:52] ' ' --> ''"
|
982 |
+
],
|
983 |
+
"n_oov_chars": 0,
|
984 |
+
"oov_ratio": 0.0,
|
985 |
+
"oov_charset": "[]"
|
986 |
+
},
|
987 |
+
{
|
988 |
+
"text": "I need an experienced housekeeper who know how to work with cleaning machines all kinds. Im about to start my cleaning company. for a start there will be a salary for first 6 months. if we pull it off and secceed. I will make salary by ... more",
|
989 |
+
"decoded_text": "I need an experienced housekeeper who know how to work with cleaning machines all kinds. Im about to start my cleaning company. for a start there will be a salary for first 6 months. if we pull it off and secceed. I will make salary by... more",
|
990 |
+
"diff": [
|
991 |
+
"replace text[235:244] --> decoded_text[235:243] ' ... more' --> '... more'"
|
992 |
+
],
|
993 |
+
"n_oov_chars": 0,
|
994 |
+
"oov_ratio": 0.0,
|
995 |
+
"oov_charset": "[]"
|
996 |
+
},
|
997 |
+
{
|
998 |
+
"text": "We are a Kuwaiti couple with a baby on the way ,we would like in our housekeeper to assist us and be true to her words and work. We would love to have a respectful employer-employee relationship. We can have an arrangement that suit a... more",
|
999 |
+
"decoded_text": "We are a Kuwaiti couple with a baby on the way,we would like in our housekeeper to assist us and be true to her words and work. We would love to have a respectful employer-employee relationship. We can have an arrangement that suit a... more",
|
1000 |
+
"diff": [
|
1001 |
+
"delete text[46:47] --> decoded_text[46:46] ' ' --> ''"
|
1002 |
+
],
|
1003 |
+
"n_oov_chars": 0,
|
1004 |
+
"oov_ratio": 0.0,
|
1005 |
+
"oov_charset": "[]"
|
1006 |
+
},
|
1007 |
+
{
|
1008 |
+
"text": "Hello .. i want someone who like and love baby.. my baby is very friendly and he is love playing ... more",
|
1009 |
+
"decoded_text": "Hello.. i want someone who like and love baby.. my baby is very friendly and he is love playing... more",
|
1010 |
+
"diff": [
|
1011 |
+
"delete text[5:6] --> decoded_text[5:5] ' ' --> ''",
|
1012 |
+
"delete text[96:97] --> decoded_text[95:95] ' ' --> ''"
|
1013 |
+
],
|
1014 |
+
"n_oov_chars": 0,
|
1015 |
+
"oov_ratio": 0.0,
|
1016 |
+
"oov_charset": "[]"
|
1017 |
+
},
|
1018 |
+
{
|
1019 |
+
"text": "I require an assistant who speaks fluent English and Armenian. The job is to basically assign you with tasks to search for suppliers and contractors who can do specific tasks, negotiate with them, and report back to me the findings. In ... more",
|
1020 |
+
"decoded_text": "I require an assistant who speaks fluent English and Armenian. The job is to basically assign you with tasks to search for suppliers and contractors who can do specific tasks, negotiate with them, and report back to me the findings. In... more",
|
1021 |
+
"diff": [
|
1022 |
+
"replace text[235:244] --> decoded_text[235:243] ' ... more' --> '... more'"
|
1023 |
+
],
|
1024 |
+
"n_oov_chars": 0,
|
1025 |
+
"oov_ratio": 0.0,
|
1026 |
+
"oov_charset": "[]"
|
1027 |
+
},
|
1028 |
+
{
|
1029 |
+
"text": "Yeah ... no thanks. When I make an appointment next year I'll ask for Demerol/Versed, which worked fine the first time. My father, a surgeon, always emphasized the slim but serious risks of anesthesia. Never be put under unless it's a medical necessity, he told me. So skipping the propofol seems like a wise medical approach, as well as a money-saver.",
|
1030 |
+
"decoded_text": "Yeah... no thanks. When I make an appointment next year I'll ask for Demerol/Versed, which worked fine the first time. My father, a surgeon, always emphasized the slim but serious risks of anesthesia. Never be put under unless it's a medical necessity, he told me. So skipping the propofol seems like a wise medical approach, as well as a money-saver.",
|
1031 |
+
"diff": [
|
1032 |
+
"delete text[4:5] --> decoded_text[4:4] ' ' --> ''"
|
1033 |
+
],
|
1034 |
+
"n_oov_chars": 0,
|
1035 |
+
"oov_ratio": 0.0,
|
1036 |
+
"oov_charset": "[]"
|
1037 |
+
},
|
1038 |
+
{
|
1039 |
+
"text": "Carolinas HealthCare System is still offering financial aid to help some low-income patients pay insurance premiums for 2015. As I reported ...",
|
1040 |
+
"decoded_text": "Carolinas HealthCare System is still offering financial aid to help some low-income patients pay insurance premiums for 2015. As I reported...",
|
1041 |
+
"diff": [
|
1042 |
+
"delete text[139:140] --> decoded_text[139:139] ' ' --> ''"
|
1043 |
+
],
|
1044 |
+
"n_oov_chars": 0,
|
1045 |
+
"oov_ratio": 0.0,
|
1046 |
+
"oov_charset": "[]"
|
1047 |
+
},
|
1048 |
+
{
|
1049 |
+
"text": "He’ll be 27 in March and just came off the best season of his career, hitting .300/.389/.474 with 13 HR, and 11 triples.",
|
1050 |
+
"decoded_text": "He’ll be 27 in March and just came off the best season of his career, hitting.300/.389/.474 with 13 HR, and 11 triples.",
|
1051 |
+
"diff": [
|
1052 |
+
"delete text[77:78] --> decoded_text[77:77] ' ' --> ''"
|
1053 |
+
],
|
1054 |
+
"n_oov_chars": 0,
|
1055 |
+
"oov_ratio": 0.0,
|
1056 |
+
"oov_charset": "[]"
|
1057 |
+
},
|
1058 |
+
{
|
1059 |
+
"text": "You don’t freak out when 25 year old Alex Rodriguez strikes out 135 times because he’s hitting .318/.399/.622 with 52 bombs and 18 steals and he’s playing GG quality defense.",
|
1060 |
+
"decoded_text": "You don’t freak out when 25 year old Alex Rodriguez strikes out 135 times because he’s hitting.318/.399/.622 with 52 bombs and 18 steals and he’s playing GG quality defense.",
|
1061 |
+
"diff": [
|
1062 |
+
"delete text[94:95] --> decoded_text[94:94] ' ' --> ''"
|
1063 |
+
],
|
1064 |
+
"n_oov_chars": 0,
|
1065 |
+
"oov_ratio": 0.0,
|
1066 |
+
"oov_charset": "[]"
|
1067 |
+
},
|
1068 |
+
{
|
1069 |
+
"text": "46 of those players OPSed .800 or more.",
|
1070 |
+
"decoded_text": "46 of those players OPSed.800 or more.",
|
1071 |
+
"diff": [
|
1072 |
+
"delete text[25:26] --> decoded_text[25:25] ' ' --> ''"
|
1073 |
+
],
|
1074 |
+
"n_oov_chars": 0,
|
1075 |
+
"oov_ratio": 0.0,
|
1076 |
+
"oov_charset": "[]"
|
1077 |
+
},
|
1078 |
+
{
|
1079 |
+
"text": "From May 20th until June 29th Lee had an ERA of 5.68 and opposing batters hit over .300 off of him. During that almost six week stretch, which made up over 25% of his starts in ‘12, he was awful and run support was irrelevant as he put the Phillies back on their heels in start after start.",
|
1080 |
+
"decoded_text": "From May 20th until June 29th Lee had an ERA of 5.68 and opposing batters hit over.300 off of him. During that almost six week stretch, which made up over 25% of his starts in ‘12, he was awful and run support was irrelevant as he put the Phillies back on their heels in start after start.",
|
1081 |
+
"diff": [
|
1082 |
+
"delete text[82:83] --> decoded_text[82:82] ' ' --> ''"
|
1083 |
+
],
|
1084 |
+
"n_oov_chars": 0,
|
1085 |
+
"oov_ratio": 0.0,
|
1086 |
+
"oov_charset": "[]"
|
1087 |
+
},
|
1088 |
+
{
|
1089 |
+
"text": "There will be the first EVENT for 2nd CBT ! If you have any interest just join !",
|
1090 |
+
"decoded_text": "There will be the first EVENT for 2nd CBT! If you have any interest just join!",
|
1091 |
+
"diff": [
|
1092 |
+
"delete text[41:42] --> decoded_text[41:41] ' ' --> ''",
|
1093 |
+
"delete text[78:79] --> decoded_text[77:77] ' ' --> ''"
|
1094 |
+
],
|
1095 |
+
"n_oov_chars": 0,
|
1096 |
+
"oov_ratio": 0.0,
|
1097 |
+
"oov_charset": "[]"
|
1098 |
+
},
|
1099 |
+
{
|
1100 |
+
"text": "\"A film — it should be seen on a screen,\" she says. \"You should be able to witness it at the same proportion or bigger than life. ... I guess maybe it would make the job a little easier — I wouldn't have to worry about if the third button matched — but I don't want to do it that way.\"",
|
1101 |
+
"decoded_text": "\"A film — it should be seen on a screen,\" she says. \"You should be able to witness it at the same proportion or bigger than life.... I guess maybe it would make the job a little easier — I wouldn't have to worry about if the third button matched — but I don't want to do it that way.\"",
|
1102 |
+
"diff": [
|
1103 |
+
"delete text[129:130] --> decoded_text[129:129] ' ' --> ''"
|
1104 |
+
],
|
1105 |
+
"n_oov_chars": 0,
|
1106 |
+
"oov_ratio": 0.0,
|
1107 |
+
"oov_charset": "[]"
|
1108 |
+
},
|
1109 |
+
{
|
1110 |
+
"text": "The travel takes a toll; Dresser has two young children and he wants to watch them grow up. And it isn't just his kids who notice he's gone, he says: \"After I came back [shooting] in North Carolina ... my dry cleaner asked me, 'Where have you been? I haven't seen you in a very long time — did you go to another drycleaner?'\"",
|
1111 |
+
"decoded_text": "The travel takes a toll; Dresser has two young children and he wants to watch them grow up. And it isn't just his kids who notice he's gone, he says: \"After I came back [shooting] in North Carolina... my dry cleaner asked me, 'Where have you been? I haven't seen you in a very long time — did you go to another drycleaner?'\"",
|
1112 |
+
"diff": [
|
1113 |
+
"delete text[197:198] --> decoded_text[197:197] ' ' --> ''"
|
1114 |
+
],
|
1115 |
+
"n_oov_chars": 0,
|
1116 |
+
"oov_ratio": 0.0,
|
1117 |
+
"oov_charset": "[]"
|
1118 |
+
},
|
1119 |
+
{
|
1120 |
+
"text": "Artisans still had to paint the gun to look antique, but the 3-D printer lets the prop master duplicate the gun easily. \"We made two of them,\" Glenn says. \"Because with an action prop, if it breaks ... you lose a day of shooting.\" The gun isn't on screen for more than a few seconds but each one cost about $20,000.",
|
1121 |
+
"decoded_text": "Artisans still had to paint the gun to look antique, but the 3-D printer lets the prop master duplicate the gun easily. \"We made two of them,\" Glenn says. \"Because with an action prop, if it breaks... you lose a day of shooting.\" The gun isn't on screen for more than a few seconds but each one cost about $20,000.",
|
1122 |
+
"diff": [
|
1123 |
+
"delete text[197:198] --> decoded_text[197:197] ' ' --> ''"
|
1124 |
+
],
|
1125 |
+
"n_oov_chars": 0,
|
1126 |
+
"oov_ratio": 0.0,
|
1127 |
+
"oov_charset": "[]"
|
1128 |
+
},
|
1129 |
+
{
|
1130 |
+
"text": "My mother was very wary at first and now she's come around 180 degrees. She's like one of my biggest fans, now. Like, she'll come over to my house and she'll be like, \"OK, listen: I need two t-shirts from the comedy show and give me three DVDs. The neighbors are asking for them.\" ...",
|
1131 |
+
"decoded_text": "My mother was very wary at first and now she's come around 180 degrees. She's like one of my biggest fans, now. Like, she'll come over to my house and she'll be like, \"OK, listen: I need two t-shirts from the comedy show and give me three DVDs. The neighbors are asking for them.\"...",
|
1132 |
+
"diff": [
|
1133 |
+
"replace text[280:284] --> decoded_text[280:283] ' ...' --> '...'"
|
1134 |
+
],
|
1135 |
+
"n_oov_chars": 0,
|
1136 |
+
"oov_ratio": 0.0,
|
1137 |
+
"oov_charset": "[]"
|
1138 |
+
},
|
1139 |
+
{
|
1140 |
+
"text": "My thoughts go to two places: One is that when cops are attacked, they close ranks. I'm not talking about the blue wall of silence, but I think what happens is \"us versus them.\" I'm talking about incidents which the cops — like [in] Ferguson where an unarmed man was shot, when they get under attack with the media, they just close ranks. It's like buffalo when they see lions out there. ...",
|
1141 |
+
"decoded_text": "My thoughts go to two places: One is that when cops are attacked, they close ranks. I'm not talking about the blue wall of silence, but I think what happens is \"us versus them.\" I'm talking about incidents which the cops — like [in] Ferguson where an unarmed man was shot, when they get under attack with the media, they just close ranks. It's like buffalo when they see lions out there....",
|
1142 |
+
"diff": [
|
1143 |
+
"replace text[387:391] --> decoded_text[387:390] ' ...' --> '...'"
|
1144 |
+
],
|
1145 |
+
"n_oov_chars": 0,
|
1146 |
+
"oov_ratio": 0.0,
|
1147 |
+
"oov_charset": "[]"
|
1148 |
+
},
|
1149 |
+
{
|
1150 |
+
"text": "\"We ... want to capture the potential of unmanned aircraft and we have been working to develop the framework for the safe integration of this technology into our airspace,\" Department of Transportation Secretary Anthony Foxx said during a teleconference with journalists about the new proposed rules.",
|
1151 |
+
"decoded_text": "\"We... want to capture the potential of unmanned aircraft and we have been working to develop the framework for the safe integration of this technology into our airspace,\" Department of Transportation Secretary Anthony Foxx said during a teleconference with journalists about the new proposed rules.",
|
1152 |
+
"diff": [
|
1153 |
+
"delete text[3:4] --> decoded_text[3:3] ' ' --> ''"
|
1154 |
+
],
|
1155 |
+
"n_oov_chars": 0,
|
1156 |
+
"oov_ratio": 0.0,
|
1157 |
+
"oov_charset": "[]"
|
1158 |
+
},
|
1159 |
+
{
|
1160 |
+
"text": "The statement says that the proposed rules ensure \"that the Federal Government's use of UAS takes into account ... important concerns and in service of them, promotes better accountability and transparent use of this technology.\"",
|
1161 |
+
"decoded_text": "The statement says that the proposed rules ensure \"that the Federal Government's use of UAS takes into account... important concerns and in service of them, promotes better accountability and transparent use of this technology.\"",
|
1162 |
+
"diff": [
|
1163 |
+
"delete text[110:111] --> decoded_text[110:110] ' ' --> ''"
|
1164 |
+
],
|
1165 |
+
"n_oov_chars": 0,
|
1166 |
+
"oov_ratio": 0.0,
|
1167 |
+
"oov_charset": "[]"
|
1168 |
+
},
|
1169 |
+
{
|
1170 |
+
"text": "Context and Background Homeless (H) children may periodically live out of their parent’s car or sleep on a friend’s couch (sofa) – there are approximately 1.5 million homeless children in the United States. A recent study of homeless street youth showed that some had attended as many as 19 schools . Highly mobile (HM) or transient children attend two or more schools each school year and have been shown to be at high risk for dropping out (leaving early). There is no way to measure the size of this population.",
|
1171 |
+
"decoded_text": "Context and Background Homeless (H) children may periodically live out of their parent’s car or sleep on a friend’s couch (sofa) – there are approximately 1.5 million homeless children in the United States. A recent study of homeless street youth showed that some had attended as many as 19 schools. Highly mobile (HM) or transient children attend two or more schools each school year and have been shown to be at high risk for dropping out (leaving early). There is no way to measure the size of this population.",
|
1172 |
+
"diff": [
|
1173 |
+
"delete text[298:299] --> decoded_text[298:298] ' ' --> ''"
|
1174 |
+
],
|
1175 |
+
"n_oov_chars": 0,
|
1176 |
+
"oov_ratio": 0.0,
|
1177 |
+
"oov_charset": "[]"
|
1178 |
+
},
|
1179 |
+
{
|
1180 |
+
"text": "Homeless (H) children may periodically live out of their parent’s car or sleep on a friend’s couch (sofa) – there are approximately 1.5 million homeless children in the United States. A recent study of homeless street youth showed that some had attended as many as 19 schools .",
|
1181 |
+
"decoded_text": "Homeless (H) children may periodically live out of their parent’s car or sleep on a friend’s couch (sofa) – there are approximately 1.5 million homeless children in the United States. A recent study of homeless street youth showed that some had attended as many as 19 schools.",
|
1182 |
+
"diff": [
|
1183 |
+
"delete text[275:276] --> decoded_text[275:275] ' ' --> ''"
|
1184 |
+
],
|
1185 |
+
"n_oov_chars": 0,
|
1186 |
+
"oov_ratio": 0.0,
|
1187 |
+
"oov_charset": "[]"
|
1188 |
+
},
|
1189 |
+
{
|
1190 |
+
"text": "Seventeen educators – 9 administrators Representing: 1 homeless shelter , 1 charter high school (ages 14-18) 2 middle schools (ages 11-13), 1 multi level school (grades 4 - 19) 4 elementary schools (ages 4- 10) Participants:",
|
1191 |
+
"decoded_text": "Seventeen educators – 9 administrators Representing: 1 homeless shelter, 1 charter high school (ages 14-18) 2 middle schools (ages 11-13), 1 multi level school (grades 4 - 19) 4 elementary schools (ages 4- 10) Participants:",
|
1192 |
+
"diff": [
|
1193 |
+
"delete text[71:72] --> decoded_text[71:71] ' ' --> ''"
|
1194 |
+
],
|
1195 |
+
"n_oov_chars": 0,
|
1196 |
+
"oov_ratio": 0.0,
|
1197 |
+
"oov_charset": "[]"
|
1198 |
+
},
|
1199 |
+
{
|
1200 |
+
"text": "Schools that were in: rural (1) , small towns (3), suburban areas (3), urban (3)",
|
1201 |
+
"decoded_text": "Schools that were in: rural (1), small towns (3), suburban areas (3), urban (3)",
|
1202 |
+
"diff": [
|
1203 |
+
"delete text[31:32] --> decoded_text[31:31] ' ' --> ''"
|
1204 |
+
],
|
1205 |
+
"n_oov_chars": 0,
|
1206 |
+
"oov_ratio": 0.0,
|
1207 |
+
"oov_charset": "[]"
|
1208 |
+
},
|
1209 |
+
{
|
1210 |
+
"text": "The purpose of this paper is to make a new genre of action research accessible to readers ... participatory action research, ... (www.mmu.ac.uk/carn) ...",
|
1211 |
+
"decoded_text": "The purpose of this paper is to make a new genre of action research accessible to readers... participatory action research,... (www.mmu.ac.uk/carn)...",
|
1212 |
+
"diff": [
|
1213 |
+
"delete text[89:90] --> decoded_text[89:89] ' ' --> ''",
|
1214 |
+
"delete text[124:125] --> decoded_text[123:123] ' ' --> ''",
|
1215 |
+
"delete text[149:150] --> decoded_text[147:147] ' ' --> ''"
|
1216 |
+
],
|
1217 |
+
"n_oov_chars": 0,
|
1218 |
+
"oov_ratio": 0.0,
|
1219 |
+
"oov_charset": "[]"
|
1220 |
+
},
|
1221 |
+
{
|
1222 |
+
"text": "... Trust in Action Research’ on ... for Collaborative Action Research Networks (CARN) ... collaborative and participatory approaches to research.",
|
1223 |
+
"decoded_text": "... Trust in Action Research’ on... for Collaborative Action Research Networks (CARN)... collaborative and participatory approaches to research.",
|
1224 |
+
"diff": [
|
1225 |
+
"delete text[32:33] --> decoded_text[32:32] ' ' --> ''",
|
1226 |
+
"delete text[86:87] --> decoded_text[85:85] ' ' --> ''"
|
1227 |
+
],
|
1228 |
+
"n_oov_chars": 0,
|
1229 |
+
"oov_ratio": 0.0,
|
1230 |
+
"oov_charset": "[]"
|
1231 |
+
},
|
1232 |
+
{
|
1233 |
+
"text": "The Contribution of Action Research to Development in ...",
|
1234 |
+
"decoded_text": "The Contribution of Action Research to Development in...",
|
1235 |
+
"diff": [
|
1236 |
+
"delete text[53:54] --> decoded_text[53:53] ' ' --> ''"
|
1237 |
+
],
|
1238 |
+
"n_oov_chars": 0,
|
1239 |
+
"oov_ratio": 0.0,
|
1240 |
+
"oov_charset": "[]"
|
1241 |
+
},
|
1242 |
+
{
|
1243 |
+
"text": "Using Participatory Action Research in a Local Authority ...",
|
1244 |
+
"decoded_text": "Using Participatory Action Research in a Local Authority...",
|
1245 |
+
"diff": [
|
1246 |
+
"delete text[56:57] --> decoded_text[56:56] ' ' --> ''"
|
1247 |
+
],
|
1248 |
+
"n_oov_chars": 0,
|
1249 |
+
"oov_ratio": 0.0,
|
1250 |
+
"oov_charset": "[]"
|
1251 |
+
},
|
1252 |
+
{
|
1253 |
+
"text": "Using Participatory Action Research in a Local ... , I. ‘Participatory Action Research: ... http://www.uea.ac.uk/care/carn/conf97/PAPERS ...",
|
1254 |
+
"decoded_text": "Using Participatory Action Research in a Local..., I. ‘Participatory Action Research:... http://www.uea.ac.uk/care/carn/conf97/PAPERS...",
|
1255 |
+
"diff": [
|
1256 |
+
"delete text[46:47] --> decoded_text[46:46] ' ' --> ''",
|
1257 |
+
"delete text[50:51] --> decoded_text[49:49] ' ' --> ''",
|
1258 |
+
"delete text[87:88] --> decoded_text[85:85] ' ' --> ''",
|
1259 |
+
"delete text[136:137] --> decoded_text[133:133] ' ' --> ''"
|
1260 |
+
],
|
1261 |
+
"n_oov_chars": 0,
|
1262 |
+
"oov_ratio": 0.0,
|
1263 |
+
"oov_charset": "[]"
|
1264 |
+
},
|
1265 |
+
{
|
1266 |
+
"text": "Now, can we get all customers to go to \"smart alarms\" - nope !",
|
1267 |
+
"decoded_text": "Now, can we get all customers to go to \"smart alarms\" - nope!",
|
1268 |
+
"diff": [
|
1269 |
+
"delete text[60:61] --> decoded_text[60:60] ' ' --> ''"
|
1270 |
+
],
|
1271 |
+
"n_oov_chars": 0,
|
1272 |
+
"oov_ratio": 0.0,
|
1273 |
+
"oov_charset": "[]"
|
1274 |
+
},
|
1275 |
+
{
|
1276 |
+
"text": "You didn't (suggest \"faster\"). However, \"reading between the lines\", I thought your tech and associated use cases created an opening for faster roundtrips, which can have a huge impact on learning. So I guess that means business case. (Sorry previous note done from a phone included an \"?\" which kind of changed the meaning. . . .)",
|
1277 |
+
"decoded_text": "You didn't (suggest \"faster\"). However, \"reading between the lines\", I thought your tech and associated use cases created an opening for faster roundtrips, which can have a huge impact on learning. So I guess that means business case. (Sorry previous note done from a phone included an \"?\" which kind of changed the meaning....)",
|
1278 |
+
"diff": [
|
1279 |
+
"replace text[324:329] --> decoded_text[324:326] ' . . ' --> '..'"
|
1280 |
+
],
|
1281 |
+
"n_oov_chars": 0,
|
1282 |
+
"oov_ratio": 0.0,
|
1283 |
+
"oov_charset": "[]"
|
1284 |
+
},
|
1285 |
+
{
|
1286 |
+
"text": "Instead of having eight teams for eight products, we have one team for eight products.. . . Encapsulation, Abstraction, Inheritance, Polymorphism almost allow you to make a new product any day you like. The big chore is the terminology which has to change when you go from one industry/app to another - nothing to do with programming.",
|
1287 |
+
"decoded_text": "Instead of having eight teams for eight products, we have one team for eight products.... Encapsulation, Abstraction, Inheritance, Polymorphism almost allow you to make a new product any day you like. The big chore is the terminology which has to change when you go from one industry/app to another - nothing to do with programming.",
|
1288 |
+
"diff": [
|
1289 |
+
"replace text[87:90] --> decoded_text[87:88] ' . ' --> '.'"
|
1290 |
+
],
|
1291 |
+
"n_oov_chars": 0,
|
1292 |
+
"oov_ratio": 0.0,
|
1293 |
+
"oov_charset": "[]"
|
1294 |
+
},
|
1295 |
+
{
|
1296 |
+
"text": "But I definitely was surprised to see the game so close, but in division games, they historically are. Regardless of records. The crux of it is do you believe pete can bring this team back, or do you think he’s done. Well fought and I was impressed in the loss. But it all boils down to coaching and decision making for me , whether it’s betting on a unproven kicker/ punter AGAIN. And management on that last drive AGAIN.",
|
1297 |
+
"decoded_text": "But I definitely was surprised to see the game so close, but in division games, they historically are. Regardless of records. The crux of it is do you believe pete can bring this team back, or do you think he’s done. Well fought and I was impressed in the loss. But it all boils down to coaching and decision making for me, whether it’s betting on a unproven kicker/ punter AGAIN. And management on that last drive AGAIN.",
|
1298 |
+
"diff": [
|
1299 |
+
"delete text[322:323] --> decoded_text[322:322] ' ' --> ''"
|
1300 |
+
],
|
1301 |
+
"n_oov_chars": 0,
|
1302 |
+
"oov_ratio": 0.0,
|
1303 |
+
"oov_charset": "[]"
|
1304 |
+
},
|
1305 |
+
{
|
1306 |
+
"text": "a spider 'retreat' into his hole when he is trying to coax the flies?",
|
1307 |
+
"decoded_text": "a spider'retreat' into his hole when he is trying to coax the flies?",
|
1308 |
+
"diff": [
|
1309 |
+
"delete text[8:9] --> decoded_text[8:8] ' ' --> ''"
|
1310 |
+
],
|
1311 |
+
"n_oov_chars": 0,
|
1312 |
+
"oov_ratio": 0.0,
|
1313 |
+
"oov_charset": "[]"
|
1314 |
+
},
|
1315 |
+
{
|
1316 |
+
"text": "Does a hawk 'retreat' into the sky when he is looking out for",
|
1317 |
+
"decoded_text": "Does a hawk'retreat' into the sky when he is looking out for",
|
1318 |
+
"diff": [
|
1319 |
+
"delete text[11:12] --> decoded_text[11:11] ' ' --> ''"
|
1320 |
+
],
|
1321 |
+
"n_oov_chars": 0,
|
1322 |
+
"oov_ratio": 0.0,
|
1323 |
+
"oov_charset": "[]"
|
1324 |
+
}
|
1325 |
+
]
|
stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.es.diff.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.fa.diff.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.fr.diff.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.ja.diff.json
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "好きなことで生きていく人生って憧れますよね。自分のやりたいことだけやって生きていけたらどんなに幸せなんだろうって。 で、ふと思ったんですよ。『やりたいことやって成功してる人って\"やりたいことしかやって ...",
|
4 |
+
"decoded_text": "好きなことで生きていく人生って憧れますよね。自分のやりたいことだけやって生きていけたらどんなに幸せなんだろうって。 で、ふと思ったんですよ。『やりたいことやって成功してる人って\"やりたいことしかやって...",
|
5 |
+
"diff": [
|
6 |
+
"delete text[100:101] --> decoded_text[100:100] ' ' --> ''"
|
7 |
+
],
|
8 |
+
"n_oov_chars": 0,
|
9 |
+
"oov_ratio": 0.0,
|
10 |
+
"oov_charset": "[]"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"text": "生きていると色んな事がある。 その中でも人生を左右する大きな出来事がきっと生きているうちに何度かあると思う。 そんな時、自分はどう生きるか。 全てに時がある。 そして祈りの中で導かれる時がある。 その実感を得られる時もあれば振り返った時にそう感じる時もあるだろう。 ...",
|
14 |
+
"decoded_text": "生きていると色んな事がある。 その中でも人生を左右する大きな出来事がきっと生きているうちに何度かあると思う。 そんな時、自分はどう生きるか。 全てに時がある。 そして祈りの中で導かれる時がある。 その実感を得られる時もあれば振り返った時にそう感じる時もあるだろう。...",
|
15 |
+
"diff": [
|
16 |
+
"delete text[132:133] --> decoded_text[132:132] ' ' --> ''"
|
17 |
+
],
|
18 |
+
"n_oov_chars": 0,
|
19 |
+
"oov_ratio": 0.0,
|
20 |
+
"oov_charset": "[]"
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"text": "久しぶりに投稿となりました。 ここ最近はお仕事もそれなりに忙しく、ドタバタ。 でも新しい業務、新しいメンバーと共に仕事をすることで脳が活性化されているのが分かります。 先月から始めた弁当生活もたまに休んでいますが、継続中です。 今日は妻が弁当を作ってくれました。 ...",
|
24 |
+
"decoded_text": "久しぶりに投稿となりました。 ここ最近はお仕事もそれなりに忙しく、ドタバタ。 でも新しい業務、新しいメンバーと共に仕事をすることで脳が活性化されているのが分かります。 先月から始めた弁当生活もたまに休んでいますが、継続中です。 今日は妻が弁当を作ってくれました。...",
|
25 |
+
"diff": [
|
26 |
+
"delete text[131:132] --> decoded_text[131:131] ' ' --> ''"
|
27 |
+
],
|
28 |
+
"n_oov_chars": 0,
|
29 |
+
"oov_ratio": 0.0,
|
30 |
+
"oov_charset": "[]"
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"text": "彼氏の元カノがまだ彼氏に未練があるかもしれません…。 私は高校1年生で、 ...",
|
34 |
+
"decoded_text": "彼氏の元カノがまだ彼氏に未練があるかもしれません…。 私は高校1年生で、...",
|
35 |
+
"diff": [
|
36 |
+
"delete text[36:37] --> decoded_text[36:36] ' ' --> ''"
|
37 |
+
],
|
38 |
+
"n_oov_chars": 0,
|
39 |
+
"oov_ratio": 0.0,
|
40 |
+
"oov_charset": "[]"
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"text": "どうもこんにちは。今まで当ブログを見てくださった方ならわかると思うのですが、今日からちょっと雰囲気が変わったことに気づきました?そうです。広告が貼られるようになったのです。この広告はGoogle Ad ...",
|
44 |
+
"decoded_text": "どうもこんにちは。今まで当ブログを見てくださった方ならわかると思うのですが、今日からちょっと雰囲気が変わったことに気づきました?そうです。広告が貼られるようになったのです。この広告はGoogle Ad...",
|
45 |
+
"diff": [
|
46 |
+
"delete text[100:101] --> decoded_text[100:100] ' ' --> ''"
|
47 |
+
],
|
48 |
+
"n_oov_chars": 0,
|
49 |
+
"oov_ratio": 0.0,
|
50 |
+
"oov_charset": "[]"
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"text": "初めて生命保険に加入する人が保険選びに失敗しないためのポイントについて解説してい ...",
|
54 |
+
"decoded_text": "初めて生命保険に加入する人が保険選びに失敗しないためのポイントについて解説してい...",
|
55 |
+
"diff": [
|
56 |
+
"delete text[40:41] --> decoded_text[40:40] ' ' --> ''"
|
57 |
+
],
|
58 |
+
"n_oov_chars": 0,
|
59 |
+
"oov_ratio": 0.0,
|
60 |
+
"oov_charset": "[]"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"text": "生命保険を選ぶ際に保険会社はどのように選べばよいのでしょうか?選び方のポイントな ...",
|
64 |
+
"decoded_text": "生命保険を選ぶ際に保険会社はどのように選べばよいのでしょうか?選び方のポイントな...",
|
65 |
+
"diff": [
|
66 |
+
"delete text[40:41] --> decoded_text[40:40] ' ' --> ''"
|
67 |
+
],
|
68 |
+
"n_oov_chars": 0,
|
69 |
+
"oov_ratio": 0.0,
|
70 |
+
"oov_charset": "[]"
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"text": "よく言われることですが、『コンポは105以上にしたほうが無難』という��があります。 これは果たしてどういうことなのか、説明していきます。 ...",
|
74 |
+
"decoded_text": "よく言われることですが、『コンポは105以上にしたほうが無難』という説があります。 これは果たしてどういうことなのか、説明していきます。...",
|
75 |
+
"diff": [
|
76 |
+
"delete text[68:69] --> decoded_text[68:68] ' ' --> ''"
|
77 |
+
],
|
78 |
+
"n_oov_chars": 0,
|
79 |
+
"oov_ratio": 0.0,
|
80 |
+
"oov_charset": "[]"
|
81 |
+
}
|
82 |
+
]
|
stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.ko.diff.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
stats/compression_rate/tencent.Hunyuan-1.8B-Instruct @ cc100.zh-Hans.diff.json
ADDED
@@ -0,0 +1,373 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"text": "佩奇大学健康科学学院在Kaposvár(高波什瓦尔), Pécs(佩奇), Szombathely(松博特海伊) ,Zalaegerszeg (扎洛埃格塞格) 有四所地区培训中心。这些培训中心都与当地的医院和社会机构保持着良好的关系。我院在健康科学领域是匈牙利提供专业数量最多,教师数量最多和校园数量最多的教育机构。",
|
4 |
+
"decoded_text": "佩奇大学健康科学学院在Kaposvár(高波什瓦尔), Pécs(佩奇), Szombathely(松博特海伊),Zalaegerszeg (扎洛埃格塞格) 有四所地区培训中心。这些培训中心都与当地的医院和社会机构保持着良好的关系。我院在健康科学领域是匈牙利提供专业数量最多,教师数量最多和校园数量最多的教育机构。",
|
5 |
+
"diff": [
|
6 |
+
"delete text[56:57] --> decoded_text[56:56] ' ' --> ''"
|
7 |
+
],
|
8 |
+
"n_oov_chars": 0,
|
9 |
+
"oov_ratio": 0.0,
|
10 |
+
"oov_charset": "[]"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"text": "支持和 虚拟机,以及 7 . / 和的屏幕截图,在三个不同的分区中有三个文档.最大的问题之一是硬件支持可能很棘手。 我参加过一些冷酷黑暗的联赛。",
|
14 |
+
"decoded_text": "支持和 虚拟机,以及 7. / 和的屏幕截图,在三个不同的分区中有三个文档.最大的问题之一是硬件支持可能很棘手。 我参加过一些冷酷黑暗的联赛。",
|
15 |
+
"diff": [
|
16 |
+
"delete text[12:13] --> decoded_text[12:12] ' ' --> ''"
|
17 |
+
],
|
18 |
+
"n_oov_chars": 0,
|
19 |
+
"oov_ratio": 0.0,
|
20 |
+
"oov_charset": "[]"
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"text": "他指责执法部门和 其他政府机构调查他与俄罗斯的关系,作为参与党派恐怖袭击的深刻的国家阴谋,他经常对媒体愤怒作为敌人。尽管瑞士化学公司与法国建筑材料公司-达成协议以结束长期存在的法律纠纷,但 .的股价在指数水平上走势平稳,股价上涨8.7%至的顶部。",
|
24 |
+
"decoded_text": "他指责执法部门和 其他政府机构调查他与俄罗斯的关系,作为参与党派恐怖袭击的深刻的国家阴谋,他经常对媒体愤怒作为敌人。尽管瑞士化学公司与法国建筑材料公司-达成协议以结束长期存在的法律纠纷,但.的股价在指数水平上走势平稳,股价上涨8.7%至的顶部。",
|
25 |
+
"diff": [
|
26 |
+
"delete text[94:95] --> decoded_text[94:94] ' ' --> ''"
|
27 |
+
],
|
28 |
+
"n_oov_chars": 0,
|
29 |
+
"oov_ratio": 0.0,
|
30 |
+
"oov_charset": "[]"
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"text": "简介:上期,本报《快乐老年》版推出了暑假期间老人“上岗”带娃的报道,不同老人有不同的带娃方式。有的老人深感带娃是一种甜蜜的负担。确实,现在孩子的教育问题越来越受到重视,很多老人与时俱进,吸收了好的育儿理念。 ...",
|
34 |
+
"decoded_text": "简介:上期,本报《快乐老年》版推出了暑假期间老人“上岗”带娃的报道,不同老人有不同的带娃方式。有的老人深感带娃是一种甜蜜的负担。确实,现在孩子的教育问题越来越受到重视,很多老人与时俱进,吸收了好的育儿理念。...",
|
35 |
+
"diff": [
|
36 |
+
"delete text[103:104] --> decoded_text[103:103] ' ' --> ''"
|
37 |
+
],
|
38 |
+
"n_oov_chars": 0,
|
39 |
+
"oov_ratio": 0.0,
|
40 |
+
"oov_charset": "[]"
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"text": "导演:安德鲁・麦卡锡,玛吉・基莉 ,迈克・卡希尔,马克・托德莱",
|
44 |
+
"decoded_text": "导演:安德鲁・麦卡锡,玛吉・基莉,迈克・卡希尔,马克・托德莱",
|
45 |
+
"diff": [
|
46 |
+
"delete text[16:17] --> decoded_text[16:16] ' ' --> ''"
|
47 |
+
],
|
48 |
+
"n_oov_chars": 0,
|
49 |
+
"oov_ratio": 0.0,
|
50 |
+
"oov_charset": "[]"
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"text": "龙茫接过来,他知道是什么,可是现在看不到任何东西的他拿着这些东西真的有用吗?全球华人的自由讨论天地 4 ~5 ?0 G' R5 f2 b) G. v",
|
54 |
+
"decoded_text": "龙茫接过来,他知道是什么,可是现在看不到任何东西的他拿着这些东西真的有用吗?全球华人的自由讨论天地 4 ~5?0 G' R5 f2 b) G. v",
|
55 |
+
"diff": [
|
56 |
+
"delete text[54:55] --> decoded_text[54:54] ' ' --> ''"
|
57 |
+
],
|
58 |
+
"n_oov_chars": 0,
|
59 |
+
"oov_ratio": 0.0,
|
60 |
+
"oov_charset": "[]"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"text": "如果在没有失明之前,他很可能能够看到写在手心上的字,但现在是完全看不到了,他只能感到手指在他的手心上不停地划来划去,却不知道写了什么上去。全球华人的自由讨论天地 ! _& N. f/ x6 @/ x2 v; \\\\6 I' Y6 q3 _",
|
64 |
+
"decoded_text": "如果在没有失明之前,他很可能能够看到写在手心上的字,但现在是完全看不到了,他只能感到手指在他的手心上不停地划来划去,却不知道写了什么上去。全球华人的自由讨论天地! _& N. f/ x6 @/ x2 v; \\\\6 I' Y6 q3 _",
|
65 |
+
"diff": [
|
66 |
+
"delete text[80:81] --> decoded_text[80:80] ' ' --> ''"
|
67 |
+
],
|
68 |
+
"n_oov_chars": 0,
|
69 |
+
"oov_ratio": 0.0,
|
70 |
+
"oov_charset": "[]"
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"text": "是啊,对于一个刚刚恢复光明的人来说是多么想出去走走,看看这个世界啊!华人论坛0 [ b- q4 B b' [5 W7 ?$ K! M",
|
74 |
+
"decoded_text": "是啊,对于一个刚刚恢复光明的人来说是多么想出去走走,看看这个世界啊!华人论坛0 [ b- q4 B b' [5 W7?$ K! M",
|
75 |
+
"diff": [
|
76 |
+
"delete text[58:59] --> decoded_text[58:58] ' ' --> ''"
|
77 |
+
],
|
78 |
+
"n_oov_chars": 0,
|
79 |
+
"oov_ratio": 0.0,
|
80 |
+
"oov_charset": "[]"
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"text": ". ?. h& r( A8 u1 ]# L 如果是邻居的话也用不着给我发这么一条短信吧?我记得妈说过我7岁时就搬走了,那么说我还有跟他联系吗?可是为什么那个女人会叫我郭水?",
|
84 |
+
"decoded_text": ".?. h& r( A8 u1 ]# L 如果是邻居的话也用不着给我发这么一条短信吧?我记得妈说过我7岁时就搬走了,那么说我还有跟他联系吗?可是为什么那个女人会叫我郭水?",
|
85 |
+
"diff": [
|
86 |
+
"delete text[1:2] --> decoded_text[1:1] ' ' --> ''"
|
87 |
+
],
|
88 |
+
"n_oov_chars": 0,
|
89 |
+
"oov_ratio": 0.0,
|
90 |
+
"oov_charset": "[]"
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"text": "8 d7 G! M B+ X9 l/ k\" {6 ?- cbb.a4.79ae.static.theplanet.com “算了,都过去了,你就不要再问了。”蒋成说话的声音有些奇怪,好像很恐惧一样,龙茫看到他的表情似乎很紧张。",
|
94 |
+
"decoded_text": "8 d7 G! M B+ X9 l/ k\" {6?- cbb.a4.79ae.static.theplanet.com “算了,都过去了,你就不要再问了。”蒋成说话的声音有些奇怪,好像很恐惧一样,龙茫看到他的表情似乎很紧张。",
|
95 |
+
"diff": [
|
96 |
+
"delete text[24:25] --> decoded_text[24:24] ' ' --> ''"
|
97 |
+
],
|
98 |
+
"n_oov_chars": 0,
|
99 |
+
"oov_ratio": 0.0,
|
100 |
+
"oov_charset": "[]"
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"text": "“好了老婆!你看离上学的时间都还有一个多月,你就让他玩吧,等到开学后我一定会严厉管教他的!所以啊,你就取消那个补习班吧。”郭水实在也不想小茫的童年在补习班上度过,这么枯燥的人生也不是有多少人能够承受的。1 ^ `) ?4 ]% f! _",
|
104 |
+
"decoded_text": "“好了老婆!你看离上学的时间都还有一个多月,你就让他玩吧,等到开学后我一定会严厉管教他的!所以啊,你就取消那个补习班吧。”郭水实在也不想小茫的童年在补习班上度过,这么枯燥的人生也不是有多少人能够承受的。1 ^ `)?4 ]% f! _",
|
105 |
+
"diff": [
|
106 |
+
"delete text[107:108] --> decoded_text[107:107] ' ' --> ''"
|
107 |
+
],
|
108 |
+
"n_oov_chars": 0,
|
109 |
+
"oov_ratio": 0.0,
|
110 |
+
"oov_charset": "[]"
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"text": "看到这些旧家具了吗, 跟前面设计的时候渲染的颜色是不是一毛一样? 基本都是黑色的... ...我手上那个白色的板子就是渲染的时候发出神秘光线的那个宜家的隔板",
|
114 |
+
"decoded_text": "看到这些旧家具了吗, 跟前面设计的时候渲染的颜色是不是一毛一样? 基本都是黑色的......我手上那个白色的板子就是渲染的时候发出神秘光线的那个宜家的隔板",
|
115 |
+
"diff": [
|
116 |
+
"delete text[43:44] --> decoded_text[43:43] ' ' --> ''"
|
117 |
+
],
|
118 |
+
"n_oov_chars": 0,
|
119 |
+
"oov_ratio": 0.0,
|
120 |
+
"oov_charset": "[]"
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"text": "bash 支持一个特殊的变量 !$,永远保存着前一条命令的最后一个参数,例如:",
|
124 |
+
"decoded_text": "bash 支持一个特殊的变量!$,永远保存着前一条命令的最后一个参数,例如:",
|
125 |
+
"diff": [
|
126 |
+
"delete text[14:15] --> decoded_text[14:14] ' ' --> ''"
|
127 |
+
],
|
128 |
+
"n_oov_chars": 0,
|
129 |
+
"oov_ratio": 0.0,
|
130 |
+
"oov_charset": "[]"
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"text": "如果你想一次删除多个项目,还可以使用 !* ( 多谢 qt 补充 : )。",
|
134 |
+
"decoded_text": "如果你想一次删除多个项目,还可以使用!* ( 多谢 qt 补充 : )。",
|
135 |
+
"diff": [
|
136 |
+
"delete text[18:19] --> decoded_text[18:18] ' ' --> ''"
|
137 |
+
],
|
138 |
+
"n_oov_chars": 0,
|
139 |
+
"oov_ratio": 0.0,
|
140 |
+
"oov_charset": "[]"
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"text": "这家位于佛罗里达(Florida )的酒店分布在2栋建筑内,提供免费WiFi和2个室外游泳池,距离劳德代尔堡海滩公园(Fort Lauderdale Beach Park)3.6公里,距离Classic Gateway Theatre剧院3.9公里。 Tara Hotel酒店的所有客房均配有平板有线电视。每间套房和一室公寓均设有小冰箱和连接浴室。 Hotal... I believe the young mans name was chun. He was great . Helpful",
|
144 |
+
"decoded_text": "这家位于佛罗里达(Florida )的酒店分布在2栋建筑内,提供免费WiFi和2个室外游泳池,距离劳德代尔堡海滩公园(Fort Lauderdale Beach Park)3.6公里,距离Classic Gateway Theatre剧院3.9公里。 Tara Hotel酒店的所有客房均配有平板有线电视。每间套房和一室公寓均设有小冰箱和连接浴室。 Hotal... I believe the young mans name was chun. He was great. Helpful",
|
145 |
+
"diff": [
|
146 |
+
"delete text[236:237] --> decoded_text[236:236] ' ' --> ''"
|
147 |
+
],
|
148 |
+
"n_oov_chars": 0,
|
149 |
+
"oov_ratio": 0.0,
|
150 |
+
"oov_charset": "[]"
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"text": "陕国投作为陕西省国资委管理的一级企业、是国内首批上市的非银行金融机构, ,净资产79.31亿元,2017年管理的信托资产总额达到4532.22亿元,具备优秀的服务地方经济建设能力和资产管理经验。[详情]",
|
154 |
+
"decoded_text": "陕国投作为陕西省国资委管理的一级企业、是国内首批上市的非银行金融机构,,净资产79.31亿元,2017年管理的信托资产总额达到4532.22亿元,具备优秀的服务地方经济建设能力和资产管理经验。[详情]",
|
155 |
+
"diff": [
|
156 |
+
"delete text[35:36] --> decoded_text[35:35] ' ' --> ''"
|
157 |
+
],
|
158 |
+
"n_oov_chars": 0,
|
159 |
+
"oov_ratio": 0.0,
|
160 |
+
"oov_charset": "[]"
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"text": "或是黎明,或是一个巨大的、敞开的舞台上洁白的聚光灯,一切都处于显在之中,正如只有意大利歌剧的场景才能够做到的。诸多的嘴巴和巨大的、敞开的身体被部署着,以宣告空间的纯粹碎片——dinanzi al re ! davanti a lui ! 来吧,这里,让我们走,让我们来,让我们离去,让我们留下——声音从腹部浮现,众多合唱队,一首流行歌曲——让我们走,让我们看,我大笑,我哭泣,我活着,我死去。书写和思,也是如此,张大的嘴巴,身体的作品。",
|
164 |
+
"decoded_text": "或是黎明,或是一个巨大的、敞开的舞台上洁白的聚光灯,一切都处于显在之中,正如只有意大利歌剧的场景才能够做到的。诸多的嘴巴和巨大的、敞开的身体被部署着,以宣告空间的纯粹碎片——dinanzi al re! davanti a lui! 来吧,这里,让我们走,让我们来,让我们离去,让我们留下——声音从腹部浮现,众多合唱队,一首流行歌曲——让我们走,让我们看,我大笑,我哭泣,我活着,我死去。书写和思,也是如此,张大的嘴巴,身体的作品。",
|
165 |
+
"diff": [
|
166 |
+
"delete text[100:101] --> decoded_text[100:100] ' ' --> ''",
|
167 |
+
"delete text[116:117] --> decoded_text[115:115] ' ' --> ''"
|
168 |
+
],
|
169 |
+
"n_oov_chars": 0,
|
170 |
+
"oov_ratio": 0.0,
|
171 |
+
"oov_charset": "[]"
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"text": "不用找了 ,那架飞机是被都敏俊劫走了,你和我面对面,距离很近,但心的距离好远。;跟喜欢的人在一起以后,笑点会变低,泪点也会变低,就连智商也低了。。",
|
175 |
+
"decoded_text": "不用找了,那架飞机是被都敏俊劫走了,你和我面对面,距离很近,但心的距离好远。;跟喜欢的人在一起以后,笑点会变低,泪点也会变低,就连智商也低了。。",
|
176 |
+
"diff": [
|
177 |
+
"delete text[4:5] --> decoded_text[4:4] ' ' --> ''"
|
178 |
+
],
|
179 |
+
"n_oov_chars": 0,
|
180 |
+
"oov_ratio": 0.0,
|
181 |
+
"oov_charset": "[]"
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"text": "聚合物电解质燃料电池亦称为质子交换膜(PEM ) 燃料电池, 最初是美国通用电气公司于1960年以空间应用为目的而研制的。由于初期研制的PEM电池内阻较大, 功率不高, 特别是质子交换膜(聚苯乙烯磺酸膜) 不能承耐强烈的电氧化还原作用而影响电池寿命, 因此, 美国宇航局试用后没有选中。直到1983 年加拿大国防部与国家研究委员会确认, PEM电池可以满足特殊的军事要求并有良好的商业前景, 于1984 年委托巴拉德能源公司对该电池进行开发研制, PEM电池的研究工作再度受到重视。美国、日本、意大利、俄罗斯、比利时等国家先后成立专门机构研究开发PEM电池。随着杜邦公司Nafion 膜的改进和Dow 化学公司Dow 膜的成功, 各类PEM 演示电池陆续问世, PEM 电池研究取得了重大进展。催化剂铂载量从10 mg・cm-2 降至0.4mg・cm-2 以下, 功率从0.1W・cm-2升至2~ 6W・cm-2, 电池组电极面积可达500~1200 cm2, 功率达5~10 kW , 其优势明显上升, 在很多方面已优于碱性燃料电池。",
|
185 |
+
"decoded_text": "聚合物电解质燃料电池亦称为质子交换膜(PEM ) 燃料电池, 最初是美国通用电气公司于1960年以空间应用为目的而研制的。由于初期研制的PEM电池内阻较大, 功率不高, 特别是质子交换膜(聚苯乙烯磺酸膜) 不能承耐强烈的电氧化还原作用而影响电池寿命, 因此, 美国宇航局试用后没有选中。直到1983 年加拿大国防部与国家研究委员会确认, PEM电池可以满足特殊的军事要求并有良好的商业前景, 于1984 年委托巴拉德能源公司对该电池进行开发研制, PEM电池的研究工作再度受到重视。美国、日本、意大利、俄罗斯、比利时等国家先后成立专门机构研究开发PEM电池。随着杜邦公司Nafion 膜的改进和Dow 化学公司Dow 膜的成功, 各类PEM 演示电池陆续问世, PEM 电池研究取得了重大进展。催化剂铂载量从10 mg・cm-2 降至0.4mg・cm-2 以下, 功率从0.1W・cm-2升至2~ 6W・cm-2, 电池组电极面积可达500~1200 cm2, ���率达5~10 kW, 其优势明显上升, 在很多方面已优于碱性燃料电池。",
|
186 |
+
"diff": [
|
187 |
+
"delete text[441:442] --> decoded_text[441:441] ' ' --> ''"
|
188 |
+
],
|
189 |
+
"n_oov_chars": 0,
|
190 |
+
"oov_ratio": 0.0,
|
191 |
+
"oov_charset": "[]"
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"text": "福州人流网: 福州宫外孕的危险性大吗?宫外孕的威胁是非常大的,但许多女性因为不了解宫外孕的症状,没有及时的发现,往往导致了悲剧的发生。下面由福州福兴妇产医院医生为大家介绍 ...",
|
195 |
+
"decoded_text": "福州人流网: 福州宫外孕的危险性大吗?宫外孕的威胁是非常大的,但许多女性因为不了解宫外孕的症状,没有及时的发现,往往导致了悲剧的发生。下面由福州福兴妇产医院医生为大家介绍...",
|
196 |
+
"diff": [
|
197 |
+
"delete text[85:86] --> decoded_text[85:85] ' ' --> ''"
|
198 |
+
],
|
199 |
+
"n_oov_chars": 0,
|
200 |
+
"oov_ratio": 0.0,
|
201 |
+
"oov_charset": "[]"
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"text": "“四组,你们那边到底什么情况?”野牛点射刘彬彬锁定胜局,鲁能3-1胜延边,再看看伊布拉希莫维奇那并不庆祝,可实际上却相当打脸的表情,两人心里头那叫一个郁闷啊,简直就跟被人穿裆没两样啊!,对于像圣埃蒂安这样的球队来说,没卖掉球员,是没足够的资金去引进球员的。“让我们看看,重新回到左边锋的加雷斯?贝尔的表现会不会有所起色。”阿兰建功卡希尔点射,恒大2-1胜绿城“I cannot understand…. The connection… exists only . between your two wands….”,“你们三个打算辞职吗?”“好啦,快吃饭了,别洗太久!”,郭士强赛后否认辽宁男篮欲退赛克罗地亚中场拿球转身面对进攻方向,看到加雷斯?贝尔已经在左路举手了,直接轻轻一脚直塞球,让皮球从阿尔维斯和普约尔中间穿过。。",
|
205 |
+
"decoded_text": "“四组,你们那边到底什么情况?”野牛点射刘彬彬锁定胜局,鲁能3-1胜延边,再看看伊布拉希莫维奇那并不庆祝,可实际上却相当打脸的表情,两人心里头那叫一个郁闷啊,简直就跟被人穿裆没两样啊!,对于像圣埃蒂安这样的球队来说,没卖掉球员,是没足够的资金去引进球员的。“让我们看看,重新回到左边锋的加雷斯?贝尔的表现会不会有所起色。”阿兰建功卡希尔点射,恒大2-1胜绿城“I cannot understand…. The connection… exists only. between your two wands….”,“你们三个打算辞职吗?”“好啦,快吃饭了,别洗太久!”,郭士强赛后否认辽宁男篮欲退赛克罗地亚中场拿球转身面对进攻方向,看到加雷斯?贝尔已经在左路举手了,直接轻轻一脚直塞球,让皮球从阿尔维斯和普约尔中间穿过。。",
|
206 |
+
"diff": [
|
207 |
+
"delete text[229:230] --> decoded_text[229:229] ' ' --> ''"
|
208 |
+
],
|
209 |
+
"n_oov_chars": 0,
|
210 |
+
"oov_ratio": 0.0,
|
211 |
+
"oov_charset": "[]"
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"text": "21.某中学数学兴趣小组为了解本校学生对电视节目的喜爱情况,随机调查了部分学生最喜爱哪一类节目 (被调查的学生只选一类并且没有不选择的) ,并将调查结果制成了如下的两个统计图(不完整).请你根据图中所提供的信息,完成下列问题:",
|
215 |
+
"decoded_text": "21.某中学数学兴趣小组为了解本校学生对电视节目的喜爱情况,随机调查了部分学生最喜爱哪一类节目 (被调查的学生只选一类并且没有不选择的),并将调查结果制成了如下的两个统计图(不完整).请你根据图中所提供的信息,完成下列问题:",
|
216 |
+
"diff": [
|
217 |
+
"delete text[68:69] --> decoded_text[68:68] ' ' --> ''"
|
218 |
+
],
|
219 |
+
"n_oov_chars": 0,
|
220 |
+
"oov_ratio": 0.0,
|
221 |
+
"oov_charset": "[]"
|
222 |
+
},
|
223 |
+
{
|
224 |
+
"text": "“我没听到他们说的一切。另一位747副驾驶Jonhan Harman在网站上回复道:“干得好的亚当,你应该仍然打他!”Corfield在第一次作为管家之后承认客户服务“不适合我”和“这个f *商务舱里的***几乎搞定了!! T ** t !!!!!“BA空中客车A320飞行员约翰林肯插话:”我现在已经完成了三次旅行。",
|
225 |
+
"decoded_text": "“我没听到他们说的一切。另一位747副驾驶Jonhan Harman在网站上回复道:“干得好的亚当,你应该仍然打他!”Corfield在第一次作为管家之后承认客户服务“不适合我”和“这个f *商务舱里的***几乎搞定了!! T ** t!!!!!“BA空中客车A320飞行员约翰林肯插话:”我现在已经完成了三次旅行。",
|
226 |
+
"diff": [
|
227 |
+
"delete text[118:119] --> decoded_text[118:118] ' ' --> ''"
|
228 |
+
],
|
229 |
+
"n_oov_chars": 0,
|
230 |
+
"oov_ratio": 0.0,
|
231 |
+
"oov_charset": "[]"
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"text": "这份经验总结并不适合手握大把实习经历的大牛们,但除了他们应该还有不少想求职产品经理的同学。大牛的经历只会徒增悲伤,我写的东西或许能够带来一些不一样的感���。 ...",
|
235 |
+
"decoded_text": "这份经验总结并不适合手握大把实习经历的大牛们,但除了他们应该还有不少想求职产品经理的同学。大牛的经历只会徒增悲伤,我写的东西或许能够带来一些不一样的感觉。...",
|
236 |
+
"diff": [
|
237 |
+
"delete text[77:78] --> decoded_text[77:77] ' ' --> ''"
|
238 |
+
],
|
239 |
+
"n_oov_chars": 0,
|
240 |
+
"oov_ratio": 0.0,
|
241 |
+
"oov_charset": "[]"
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"text": "根据西安网页设计的特点和制作网页的经验,一般应遵循以下原则:提纲挈领。主页的作用好比一个精美的广告,以下内容必不可少 ..",
|
245 |
+
"decoded_text": "根据西安网页设计的特点和制作网页的经验,一般应遵循以下原则:提纲挈领。主页的作用好比一个精美的广告,以下内容必不可少..",
|
246 |
+
"diff": [
|
247 |
+
"delete text[58:59] --> decoded_text[58:58] ' ' --> ''"
|
248 |
+
],
|
249 |
+
"n_oov_chars": 0,
|
250 |
+
"oov_ratio": 0.0,
|
251 |
+
"oov_charset": "[]"
|
252 |
+
},
|
253 |
+
{
|
254 |
+
"text": "西安网站优化公司认为,每个月月底改标题,对优化影响并不会太大,因为月底的时候,正是搜索引擎大更新的时候,对网站进行改变的时候,改网站的标题时,虽然收录有所下降,但网站的整体排名还是比较好的 ..",
|
255 |
+
"decoded_text": "西安网站优化公司认为,每个月月底改标题,对优化影响并不会太大,因为月底的时候,正是搜索引擎大更新的时候,对网站进行改变的时候,改网站的标题时,虽然收录有所下降,但网站的整体排名还是比较好的..",
|
256 |
+
"diff": [
|
257 |
+
"delete text[94:95] --> decoded_text[94:94] ' ' --> ''"
|
258 |
+
],
|
259 |
+
"n_oov_chars": 0,
|
260 |
+
"oov_ratio": 0.0,
|
261 |
+
"oov_charset": "[]"
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"text": "客户懂网站之后,虽然对网站建设公司影响很大,但我们能够看到十分积极的一面,西安做网站公司比如说现在的网站越来越国际化,表现形式越来越丰富,技术越来越先进,服务质量越来越好等等 ..",
|
265 |
+
"decoded_text": "客户懂网站之后,虽然对网站建设公司影响很大,但我们能够看到十分积极的一面,西安做网站公司比如说现在的网站越来越国际化,表现形式越来越丰富,技术越来越先进,服务质量越来越好等等..",
|
266 |
+
"diff": [
|
267 |
+
"delete text[87:88] --> decoded_text[87:87] ' ' --> ''"
|
268 |
+
],
|
269 |
+
"n_oov_chars": 0,
|
270 |
+
"oov_ratio": 0.0,
|
271 |
+
"oov_charset": "[]"
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"text": "网站上线前期的准备工作很重要,只有在上线前做好各种准备工作,才能避免上线后出现各种问题,那么,西安做网站公司在上线前有需要做哪些工作? ..",
|
275 |
+
"decoded_text": "网站上线前期的准备工作很重要,只有在上线前做好各种准备工作,才能避免上线后出现各种问题,那么,西安做网站公司在上线前有需要做哪些工作?..",
|
276 |
+
"diff": [
|
277 |
+
"delete text[67:68] --> decoded_text[67:67] ' ' --> ''"
|
278 |
+
],
|
279 |
+
"n_oov_chars": 0,
|
280 |
+
"oov_ratio": 0.0,
|
281 |
+
"oov_charset": "[]"
|
282 |
+
},
|
283 |
+
{
|
284 |
+
"text": "相对来说一个网站的好坏并不是在于网站的无效链接,重点还是网站的设计上和其他的细节,为什么这样说呢?西安网站建设公司认为作为一个网站来说给用户的第一眼很重要的,如果给用户第一眼感觉很差排版乱不合理 ..",
|
285 |
+
"decoded_text": "相对来说一个网站的好坏并不是在于网站的无效链接,重点还是网站的设计上和其他的细节,为什么这样说呢?西安网站建设公司认为作为一个网站来说给用户的第一眼很重要的,如果给用户第一眼感觉很差排版乱不合理..",
|
286 |
+
"diff": [
|
287 |
+
"delete text[97:98] --> decoded_text[97:97] ' ' --> ''"
|
288 |
+
],
|
289 |
+
"n_oov_chars": 0,
|
290 |
+
"oov_ratio": 0.0,
|
291 |
+
"oov_charset": "[]"
|
292 |
+
},
|
293 |
+
{
|
294 |
+
"text": "相信很多人都有一个疑问,就是西安建设网站,明明就是一个虚拟不存在的东西怎么会这么贵,少的要好几千,便宜的有几万甚至几十万!这到底是为什么呢? ..",
|
295 |
+
"decoded_text": "相信很多人都有一个疑问,就是西安建设网站,明明就是一个虚拟不存在的东西怎么会这么贵,少的要好几千,便宜的有几万甚至几十万!这到底是为什么呢?..",
|
296 |
+
"diff": [
|
297 |
+
"delete text[70:71] --> decoded_text[70:70] ' ' --> ''"
|
298 |
+
],
|
299 |
+
"n_oov_chars": 0,
|
300 |
+
"oov_ratio": 0.0,
|
301 |
+
"oov_charset": "[]"
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"text": "简介: 朱茵,女,1958年5月27日出生于上海,影视演员,戏剧家,上海戏剧家协会会员,中国民主同盟盟员,就职于上海话剧艺术中心。 曾出演多部话剧、电影以及电视剧作品,其中有话剧《日出》,电视剧《丈母娘来了》、《大男当婚》、《辣妈正传》、《老米家的婚事》 ... 查看更多>",
|
305 |
+
"decoded_text": "简介: 朱茵,女,1958年5月27日出生于上海,影视演员,戏剧���,上海戏剧家协会会员,中国民主同盟盟员,就职于上海话剧艺术中心。 曾出演多部话剧、电影以及电视剧作品,其中有话剧《日出》,电视剧《丈母娘来了》、《大男当婚》、《辣妈正传》、《老米家的婚事》... 查看更多>",
|
306 |
+
"diff": [
|
307 |
+
"delete text[127:128] --> decoded_text[127:127] ' ' --> ''"
|
308 |
+
],
|
309 |
+
"n_oov_chars": 0,
|
310 |
+
"oov_ratio": 0.0,
|
311 |
+
"oov_charset": "[]"
|
312 |
+
},
|
313 |
+
{
|
314 |
+
"text": "5. .具有较强的产品、用户心理分析的能力与看法,对市场发展方向和动态有较强的分析能力,了解网站用户的服务需求,能够根据需求与市场变化迅速做出回应;",
|
315 |
+
"decoded_text": "5..具有较强的产品、用户心理分析的能力与看法,对市场发展方向和动态有较强的分析能力,了解网站用户的服务需求,能够根据需求与市场变化迅速做出回应;",
|
316 |
+
"diff": [
|
317 |
+
"delete text[2:3] --> decoded_text[2:2] ' ' --> ''"
|
318 |
+
],
|
319 |
+
"n_oov_chars": 0,
|
320 |
+
"oov_ratio": 0.0,
|
321 |
+
"oov_charset": "[]"
|
322 |
+
},
|
323 |
+
{
|
324 |
+
"text": "简介:转载自:搜狐体育 作者:体育之星刘伟是中国男子拳击69公斤级名将,前中国拳击队队长。1987年出生的他,小时候却因为身高和臂展不占优势,一度被一些人认为并不适合从事专业拳击运动。但在湖北 ...",
|
325 |
+
"decoded_text": "简介:转载自:搜狐体育 作者:体育之星刘伟是中国男子拳击69公斤级名将,前中国拳击队队长。1987年出生的他,小时候却因为身高和臂展不占优势,一度被一些人认为并不适合从事专业拳击运动。但在湖北...",
|
326 |
+
"diff": [
|
327 |
+
"delete text[96:97] --> decoded_text[96:96] ' ' --> ''"
|
328 |
+
],
|
329 |
+
"n_oov_chars": 0,
|
330 |
+
"oov_ratio": 0.0,
|
331 |
+
"oov_charset": "[]"
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"text": "15 D. 10 2 2.若抛物线 y 2 ? 8x 上一点 P 到其焦点的距离为 9 ,则点 P 的坐标为( C ) 。",
|
335 |
+
"decoded_text": "15 D. 10 2 2.若抛物线 y 2? 8x 上一点 P 到其焦点的距离为 9 ,则点 P 的坐标为( C ) 。",
|
336 |
+
"diff": [
|
337 |
+
"delete text[21:22] --> decoded_text[21:21] ' ' --> ''"
|
338 |
+
],
|
339 |
+
"n_oov_chars": 0,
|
340 |
+
"oov_ratio": 0.0,
|
341 |
+
"oov_charset": "[]"
|
342 |
+
},
|
343 |
+
{
|
344 |
+
"text": "这些是我的一些特殊服务: 用嘴亲吻, 异装癖和恋物癖, 脸部射精, 性幻想, 性玩具, 无套口交, 女同, 恋尿癖, 按摩, 脱衣舞, 三人群交, 色情淋浴, 爱经, 69 ...",
|
345 |
+
"decoded_text": "这些是我的一些特殊服务: 用嘴亲吻, 异装癖和恋物癖, 脸部射精, 性幻想, 性玩具, 无套口交, 女同, 恋尿癖, 按摩, 脱衣舞, 三人群交, 色情淋浴, 爱经, 69...",
|
346 |
+
"diff": [
|
347 |
+
"delete text[86:87] --> decoded_text[86:86] ' ' --> ''"
|
348 |
+
],
|
349 |
+
"n_oov_chars": 0,
|
350 |
+
"oov_ratio": 0.0,
|
351 |
+
"oov_charset": "[]"
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"text": "因本人另有发展,将正在营业中的班车转让,有意者请致电:13197523784 非诚勿扰,谢谢! 本贴不回复,有 ...",
|
355 |
+
"decoded_text": "因本人另有发展,将正在营业中的班车转让,有意者请致电:13197523784 非诚勿扰,谢谢! 本贴不回复,有...",
|
356 |
+
"diff": [
|
357 |
+
"delete text[55:56] --> decoded_text[55:55] ' ' --> ''"
|
358 |
+
],
|
359 |
+
"n_oov_chars": 0,
|
360 |
+
"oov_ratio": 0.0,
|
361 |
+
"oov_charset": "[]"
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"text": "现在孩子多了,想入手一架车代步。最好1.2的,因为想省油。大家都知道的现在油价突破天际。最好近二年的车 ...",
|
365 |
+
"decoded_text": "现在孩子多了,想入手一架车代步。最好1.2的,因为想省油。大家都知道的现在油价突破天际。最好近二年的车...",
|
366 |
+
"diff": [
|
367 |
+
"delete text[51:52] --> decoded_text[51:51] ' ' --> ''"
|
368 |
+
],
|
369 |
+
"n_oov_chars": 0,
|
370 |
+
"oov_ratio": 0.0,
|
371 |
+
"oov_charset": "[]"
|
372 |
+
}
|
373 |
+
]
|
vocab.py
CHANGED
@@ -400,6 +400,13 @@ _all_tokenizer_config = [
|
|
400 |
TokenizerConfig("openai/gpt-oss-120b", org="OpenAI"),
|
401 |
TokenizerConfig("Qwen/Qwen3-235B-A22B-Thinking-2507", org="Alibaba"),
|
402 |
TokenizerConfig("Qwen/Qwen3-30B-A3B-Thinking-2507", org="Alibaba"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
|
404 |
|
405 |
|
|
|
400 |
TokenizerConfig("openai/gpt-oss-120b", org="OpenAI"),
|
401 |
TokenizerConfig("Qwen/Qwen3-235B-A22B-Thinking-2507", org="Alibaba"),
|
402 |
TokenizerConfig("Qwen/Qwen3-30B-A3B-Thinking-2507", org="Alibaba"),
|
403 |
+
TokenizerConfig("Qwen/Qwen3-4B-Instruct-2507", org="Alibaba"),
|
404 |
+
TokenizerConfig("Qwen/Qwen3-Embedding-0.6B", org="Alibaba"),
|
405 |
+
TokenizerConfig("tencent/Hunyuan-1.8B-Instruct", org="Tencent"),
|
406 |
+
TokenizerConfig("tencent/Hunyuan-0.5B-Instruct", org="Tencent"),
|
407 |
+
TokenizerConfig("zai-org/GLM-4.5", org="Zai"),
|
408 |
+
TokenizerConfig("openbmb/MiniCPM-V-4", org="OpenBMB"),
|
409 |
+
# TokenizerConfig("moonshotai/Kimi-K2-Instruct", org="MoonshotAI"), # 依赖 blobfile
|
410 |
|
411 |
|
412 |
|