niobures commited on
Commit
3533d15
·
verified ·
1 Parent(s): 95be345

yumoxu-detnet

Browse files
.gitattributes CHANGED
@@ -1,35 +1,58 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.lz4 filter=lfs diff=lfs merge=lfs -text
12
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
13
+ *.model filter=lfs diff=lfs merge=lfs -text
14
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
15
+ *.npy filter=lfs diff=lfs merge=lfs -text
16
+ *.npz filter=lfs diff=lfs merge=lfs -text
17
+ *.onnx filter=lfs diff=lfs merge=lfs -text
18
+ *.ot filter=lfs diff=lfs merge=lfs -text
19
+ *.parquet filter=lfs diff=lfs merge=lfs -text
20
+ *.pb filter=lfs diff=lfs merge=lfs -text
21
+ *.pickle filter=lfs diff=lfs merge=lfs -text
22
+ *.pkl filter=lfs diff=lfs merge=lfs -text
23
+ *.pt filter=lfs diff=lfs merge=lfs -text
24
+ *.pth filter=lfs diff=lfs merge=lfs -text
25
+ *.rar filter=lfs diff=lfs merge=lfs -text
26
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
27
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
29
+ *.tar filter=lfs diff=lfs merge=lfs -text
30
+ *.tflite filter=lfs diff=lfs merge=lfs -text
31
+ *.tgz filter=lfs diff=lfs merge=lfs -text
32
+ *.wasm filter=lfs diff=lfs merge=lfs -text
33
+ *.xz filter=lfs diff=lfs merge=lfs -text
34
+ *.zip filter=lfs diff=lfs merge=lfs -text
35
+ *.zst filter=lfs diff=lfs merge=lfs -text
36
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
37
+ # Audio files - uncompressed
38
+ *.pcm filter=lfs diff=lfs merge=lfs -text
39
+ *.sam filter=lfs diff=lfs merge=lfs -text
40
+ *.raw filter=lfs diff=lfs merge=lfs -text
41
+ # Audio files - compressed
42
+ *.aac filter=lfs diff=lfs merge=lfs -text
43
+ *.flac filter=lfs diff=lfs merge=lfs -text
44
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
45
+ *.ogg filter=lfs diff=lfs merge=lfs -text
46
+ *.wav filter=lfs diff=lfs merge=lfs -text
47
+ # Image files - uncompressed
48
+ *.bmp filter=lfs diff=lfs merge=lfs -text
49
+ *.gif filter=lfs diff=lfs merge=lfs -text
50
+ *.png filter=lfs diff=lfs merge=lfs -text
51
+ *.tiff filter=lfs diff=lfs merge=lfs -text
52
+ # Image files - compressed
53
+ *.jpg filter=lfs diff=lfs merge=lfs -text
54
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
55
+ *.webp filter=lfs diff=lfs merge=lfs -text
56
+ # Video files - compressed
57
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
58
+ *.webm filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Weakly Supervised Domain Detection
2
+ This repository releases the code and data for weakly supervised domain detection. Please cite the following paper [[bib](https://www.mitpressjournals.org/action/showCitFormats?doi=10.1162/tacl_a_00287)] if you find our code /data resource useful to you,
3
+
4
+ > In this paper we introduce domain detection as a new natural language processing task. We argue that the ability to detect textual segments which are domain-heavy, i.e., sentences or phrases which are representative of and provide evidence for a given domain would enable the development of domain aware tools and increase the domain coverage for practical applications. We propose an encoder-detector framework for domain detection and bootstrap classifiers with multiple instance learning (MIL). The models are hierarchically organized and suited to multilabel classification. We demonstrate that despite learning from minimal supervision, our models can be applied to text spans of different granularities, languages, and genres. We also explore the potential of domain detection for text summarization.
5
+
6
+ Should you have any query please contact me at [[email protected]](mailto:[email protected]).
7
+
8
+ ## Project Structure
9
+
10
+ ```bash
11
+ DomainDetection
12
+ │ README.md
13
+ │ spec-file.text
14
+ └───src
15
+ │ └───frame # DetNet framework
16
+ │ │ encoder.py
17
+ │ │ detector.py
18
+ │ │ ...
19
+ │ └───config # configuration files
20
+ │ └───data # dataset parsing, building and piping
21
+ │ └───utils # miscellaneous utils
22
+ └───dataset
23
+ │ └───en # English dataset
24
+ │ └───...
25
+ │ └───zh # Chinese dataset
26
+ │ └───...
27
+ └───res # resources (vocabulary)
28
+ │ └───vocab
29
+ │ └───en # English vocabulary
30
+ │ │ vocab
31
+ │ └───zh # Chinese vocabulary
32
+ │ │ vocab
33
+ └───model # trained models
34
+ │ └───en # English models
35
+ │ │ DetNet
36
+ │ │ ...
37
+ │ └───zh # Chinese models
38
+ │ │ DetNet
39
+ │ │ ...
40
+ └───log
41
+
42
+ ```
43
+
44
+ ## Environment Setup
45
+
46
+ You can check the `spec-file.txt` provided in this project for the list of packages required.
47
+
48
+ To create a suitable environment conviniently with `conda`, do:
49
+
50
+ ```bash
51
+ conda create --name myenv --file spec-file.txt
52
+ ```
53
+
54
+ or alternatively, you may prefer to install required packages into an existing environment:
55
+
56
+ ```bash
57
+ conda install --name myenv --file spec-file.txt
58
+ ```
59
+
60
+ ## Dataset
61
+
62
+ You can download our datasets for both English and Chinese via [Google Drive](https://drive.google.com/drive/folders/1K5TdwoezGzzb19_2QjTuNipOX9kf1tUY?usp=sharing).
63
+
64
+ After uncompressing *.zip files, put them under `dataset/en` or `dataset/zh`, respectively. These include data for model training, development and test. Note that `test` is for document-level test, and `syn_docs`is for sentence-level test with synthesized contexts (check the algorithm proposed in our paper for details).
65
+
66
+ `*.json` files include documents sampled from Wikipedia (in both `en` and `zh`) and NYT (in `en`); these documents are manually labeled via MTurk at both sentence-level and word-level for test purpose.
detnet-master [yumoxu].zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e3de6cbd2e04d5598f82dcf5f4f16f9f1d37ef9e8ccc9447eec966a1b360120
3
+ size 3704884
en_dev.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2805a1ab5f57eac983fb6560eba159d8197b22c1a04f6f439dead6ad74c62736
3
+ size 21839312
en_nyt_mturk_docs.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"sent_labels": [[4], [4], [4], [4], [4], [4], [4], [4], [4], [4]], "word_labels": [[3, 6, 8, 10, 11, 14], [17, 9, 11], [8], [16, 3, 6, 7, 22], [12, 16, 18, 20, 21, 23, 24, 28, 29], [19, 20, 7, 9, 18, 15], [16, 6, 7, 24, 10, 23, 28, 30, 15], [5, 12, 22, 50, 51, 63], [4, 6, 9, 13], [17, 15]], "sents": ["Far too many patients leave the doctor's office or medical clinic with a prescription that borders on the illegible.", "They may have been told the category of the drug -- antibiotic, for example, or painkiller -- but not its name. ", "And they may have at best a vague memory of how to take it -- how much, when and with what. ", "Rarely are they warned of possible adverse effects and what to do if they notice no improvement or a downturn in their health.", "You can help protect yourself by maintaining a list of all the drugs you take -- prescription and nonprescription, vitamin-mineral supplements and herbal remedies, including the dosing schedule and amount. ", "Bring this list with you whenever you visit the doctor and make sure it is reviewed by the health care provider.", "If you have ever experienced an allergic reaction to a medication, or have a food allergy, be sure to tell your health care provider before any medication is prescribed.", "When you are given a prescription, ask the name of the drug, what it is supposed to do for your condition, how much to take and how often it should be taken, whether it should be taken with food or on an empty stomach, what side effects are possible and what effects warrant a prompt call to the doctor. ", "Also ask how the medication might interact with other remedies you take or foods you eat.", "And, of course, write down the answers while you are still in the doctor's office."], "doc_labels": [4], "doc_id": "1815886_HEA"}
2
+ {"sent_labels": [[0], [0], [0], [0], [0], [1], [2], [0, 1, 2], [0], [0, 6], [0], [1]], "word_labels": [[32, 1, 35, 24, 25, 12], [32, 2, 3, 37, 18, 31], [32, 3, 17, 38, 39, 8, 18, 24, 29, 31], [3], [16, 20, 5, 6, 24, 25, 28, 13, 29], [35, 26], [16, 3, 4, 7, 8, 15], [3, 38, 7, 8, 10, 43, 29, 19, 9], [7, 8, 11, 13, 15, 16, 27, 31, 32, 38, 39, 40, 41, 43, 44, 47, 48], [11], [19, 4, 38, 10], [7, 9, 16, 19, 22, 24, 25, 28, 32, 33, 35]], "sents": ["''The presidency is out of the question at this point because of Bush fatigue,'' said Peter Schweizer, a fellow at the Hoover Institution at Stanford who wrote ''The Bushes: A Dynasty'' with his wife, Rochelle. ", "''But the vice presidential slot is something that's very much in play. He's a successful governor of an important state, he helps shore up relations with the social conservatives and he has the Bush money machine.''", "One of Mr. Bush's former chiefs of staff has gone to work for Mr. McCain's exploratory committee, but several other former aides have signed up with Gov. Mitt Romney of Massachusetts, another probable Republican contender.", "''Jeb is a policy-driven guy,'' Mr. Schweizer said. ", "''If he can be a vice president that plays some kind of a policy role as Cheney has, as Gore did in the Clinton administration, then Jeb Bush will be interested.''", "Many assume that for now -- at least partly at the urging of his wife, described as shy and eager to be out of the public eye -- Mr. Bush will return to the private sector. ", "He reported a net worth of $1.4 million in 2005, down from $2.4 million in 1998.", "He was a partner in a major real estate development firm here until his first, unsuccessful run for governor in 1994, but Mr. Schweizer predicted that Mr. Bush might now seek out work involving the bioscience industry or the Latin American economy, both of which ''he seems particularly animated by.''", "All indications notwithstanding, ardent admirers like Grover Norquist, the president of Americans for Tax Reform, are not giving up on the prospect of Mr. Bush jumping into the presidential race next year, especially if Senator Hillary Rodham Clinton of New York becomes the Democratic candidate.", "''He could step in later than anybody else,'' Mr. Norquist said. ", "''You can run for president with the last name of Bush, even though there is and will be Bush fatigue, in a year that you're likely to be running against someone whose last name is Clinton.''", "For the time being, Mr. Bush bought a car, a Chrysler 300C, and rented a $5,500-per-month, 3,949-square-foot condominium in Segovia Tower, a luxury building overlooking a golf course in lush Coral Gables."], "doc_labels": [0], "doc_id": "1815916_GOV"}
3
+ {"sent_labels": [[2], [2], [2], [2], [2], [2], [2], [2], [2], [0, 2], [0]], "word_labels": [[5, 16, 8, 26, 30], [24, 5, 7], [35, 17, 20, 27, 28], [9], [0, 3, 6, 7], [9, 16, 25, 29, 30], [1, 2, 20, 21, 28, 30], [1], [5, 8], [0, 16, 6], [0, 9, 18]], "sents": ["He also noted that European consumer groups, regulators and legislators have made Apple the biggest target of their efforts to open up the digital music market, so that consumers can play songs from online sites on any portable music device.", "''Much of the concern over D.R.M. systems has arisen in European countries,'' Mr. Jobs wrote in an essay posted Tuesday on Apple's Web site. ", "He said these groups might want to look ''right in their backyard,'' and instead lobby the music companies to agree to drop digital rights management software, which restricts the ability of consumers to copy songs or to move them from one digital device to another.", "Mr. Jobs's comments drew a lukewarm reception in Europe on Wednesday.", "Analysts, music industry representatives and consumer organizations praised his apparent willingness to discuss the copy-protection issue but said the proposal seemed to be an effort to shift the focus of the debate.", "''He's trying to move the responsibility away from Apple and turn this into a trans-Atlantic trade issue,'' said Torgeir Waterhouse, senior adviser to the Norwegian Consumer Council.", "Like consumer organizations in several other European countries, the Norwegian group is not happy that songs bought from the Apple iTunes Store can be played only on Apple's iPods. ", "A consumer ombudsman in Norway agreed last month with a complaint filed on the matter and gave Apple until March 1 to respond and until Oct. 1 to do something about the situation.", "While Norway is a tiny market, the ombudsman's decision was followed by similar complaints from groups in other Nordic countries, France, Germany and the Netherlands.", "Analysts say the moves could encourage regulators to take bolder steps to try to break the iTunes/iPod link.", "Legislators in France tried last year to pass a law opening the system to competition, but the proposal was watered down."], "doc_labels": [3], "doc_id": "1824654_LAW"}
4
+ {"sent_labels": [[0, 3], [0, 3], [0, 3], [0, 3], [0, 3], [0, 3], [0, 3], [0, 3], [3], [0, 3]], "word_labels": [[6, 7, 8, 10, 13, 15, 16, 21, 26, 27, 31, 34, 35, 41, 43], [32, 33, 4, 5, 6, 9, 10, 13, 14, 21, 22, 27, 28], [2, 6, 8, 9, 16, 17, 19, 20, 25, 26], [6, 7, 11, 15], [1, 2, 6, 8, 13, 17, 18, 21, 23, 24], [0, 33, 1, 13, 15, 26], [9, 10, 21, 25, 26, 27, 29, 30, 36, 39, 44, 48, 50], [4, 5, 6, 17, 12, 13], [0, 3, 9, 12, 13], [4, 37, 33, 9, 10, 7, 17, 36, 26]], "sents": ["In its ruling Tuesday, the United States Court of Appeals for the District of Columbia Circuit found that the new law did not violate the constitutional provision that bars the government from suspending habeas corpus except in ''cases of rebellion or invasion.'' ", "Two of the three appeals court judges, citing Supreme Court and other historical precedent, held that the right of habeas corpus did not extend to foreign citizens detained outside the United States.", "The majority decision was written by Judge A. Raymond Randolph, whose two earlier opinions on habeas corpus and Guant\u00e1namo prisoners had also favored the Bush administration. ", "Those opinions were reversed by the Supreme Court, but on statutory grounds rather than constitutional ones.", "The dissenting judge on Tuesday, Judith W. Rogers, said the new law did violate the constitutional provision restricting the suspension of habeas corpus.", "Administration officials welcomed the decision as a vindication of its position on the rights of detainees, after years of its halting efforts to create a legal process that would withstand tests in court.", "''The decision,'' said Erik Ablin, a Justice Department spokesman, ''reaffirms the validity of the framework that Congress established in the Military Commissions Act permitting Guant\u00e1namo detainees to challenge their detention through combatant status review tribunals with the opportunity for judicial review before the D.C. Circuit.''", "Tony Snow, the White House spokesman, said at his daily news briefing, ''The court decided the position that we put forward.'' He declined to say more.", "Lawyers representing the detainees vowed to seek a new review by the Supreme Court.", "Shayana Kadidal, a lawyer at the Center for Constitutional Rights, which represents many of the detainees, said, ''This decision empowers the president to do whatever he wishes to prisoners without any legal limitation as long as he does it offshore.''"], "doc_labels": [3, 5], "doc_id": "1827796_LAW_MIL"}
5
+ {"sent_labels": [[6], [4, 6], [6], [6], [6], [4], [3], [0, 3], [4], [6], [2, 4, 6], [6]], "word_labels": [[1, 5], [27, 34, 36], [3, 6, 12], [14, 17, 22], [4, 14], [15, 46, 47, 49, 50, 52, 53], [37, 38, 26, 27], [11, 13, 7], [18, 12, 20], [3], [10, 13, 15], [8, 18]], "sents": ["Its inventor called it the cardio-pneumo-psychograph.", "To a clutch of coeds in Berkeley, Calif., in 1921, it was a newfangled magic box that was somehow going to look into their minds and find out who was pilfering cash and jewelry at their college boardinghouse. ", "To the newspaper-reading public and future generations, it was the lie detector, a contraption with dubious scientific credentials, a shady ethical aura and, as it turned out, amazing longevity.", "In ''The Measure of All Things'' Ken Alder, a professor of history and the humanities at Northwestern University, chronicled the quest of two French scientists to calibrate the meter. ", "In ''The Lie Detectors'' he tells a similar tale of obsession and self-delusion, this time with a purely American setting.", "In an era that gave birth to scientific industrial management, time-motion studies and the I.Q. test, a small group of American scientists, inventors and social reformers pursued the dream of a mechanical device that would separate truth from deception by recording involuntary bodily responses like blood pressure and pulse rate.", "The lie detector, billed as ''a mechanical instrument of the future'' by one of its earliest proponents, would in theory replace traditional police interrogations (heavily dependent on the third degree) and jury deliberations. ", "It would allow private companies and the government to weed out thieves and spies.", "It would shine a high-intensity beam into the deepest recesses of the psyche, advancing the work of psychologists and psychiatrists. ", "That was the promise.", "But toward the end of his life John Larson, inventor of the machine, despaired. ", "He called his work ''a Frankenstein's monster, which I have spent over 40 years in combating.''"], "doc_labels": [6], "doc_id": "1829843_GEN"}
6
+ {"sent_labels": [[2], [2], [2], [0, 2], [2], [2], [2], [2]], "word_labels": [[3, 5, 14, 19, 23, 36], [32, 2, 5, 28, 21, 22], [5, 10, 11], [9, 20, 21, 23, 27, 28], [34, 17, 26, 27, 28], [1, 2, 9, 12, 14, 21, 29, 31], [17, 2, 6, 22], [6, 40, 11, 18, 39, 29]], "sents": ["Swisscom offered 47 euros ($61.67) for each FastWeb share, 12 percent more than the Italian company's closing share price Friday, when the shares jumped 6.5 percent on speculation that a buyout offer might be made over the weekend.", "FastWeb's shares rose 6.30 euros, or 15 percent, to close at 48.34 euros on Monday, above the offer price, an indication that the market is expecting a counterbid higher than Swisscom's offer. ", "The FastWeb board endorsed the bid Monday and said the tender offer would begin March 22.", "Swisscom, which is 55 percent owned by the government, described Italy as ''one of the most attractive broadband markets in Europe with significant expected growth potential.'' ", "FastWeb has grown quickly, but it remains a distant second to Telecom Italia, the former monopoly, which commands 80 percent of the Italian broadband Internet market compared with FastWeb's 15 percent.", "Second-tier telecommunications companies like FastWeb are facing pressure to merge or be bought to absorb the high costs of introducing new infrastructure and to take competitors out of the market, analysts said.", "FastWeb has grown quickly since its founding in 1999, but it has yet to make a profit, and its break-even target date has been pushed back several times.", "FastWeb, which will keep its brand name if the Swisscom deal goes through, has forecast that sales would rise 30 percent this year and that gross operating profit would advance 40 percent as it posts its first net profit."], "doc_labels": [2], "doc_id": "1832651_BUS"}
7
+ {"sent_labels": [[1], [1], [1], [1], [1], [1], [1, 6], [1], [1]], "word_labels": [[0], [1, 3, 4, 12, 14, 17, 19, 22], [1, 2, 5, 24, 27, 13, 14], [3], [1, 7, 9, 11], [2, 37, 39, 41, 43, 45, 48, 17, 19, 23], [3], [1], [7, 8, 10, 12, 14, 15, 17, 19, 20, 22, 24, 25, 26, 32, 34, 35, 37, 38, 40, 41, 44]], "sents": ["Salads were good, but missing something that might have made them great. ", "Duck salad with chili paste was well balanced, matched to bell peppers, scallions, red onions, pineapple and ground peanuts. ", "Green papaya salad needed Jersey tomatoes in season, as did the sliced beef salad, which was nevertheless pleasantly bulked up with crisp cucumber and bell peppers.", "The ubiquitous pad Thai was a touch too sweet and a touch too pale.", "The lad-nar, with its fried flat noodles and vegetables and squid, was more balanced.", "Some main dishes, in a presentation similar to that of other Thai restaurants, begin with vegetables, nuts, flavorings and sauces and make the diner figure out which would work best with them: chicken, pork, beef, shrimp, squid or mixed seafood. ", "There are no hints.", "Other dishes are preordained.", "Among our favorites: a tender, spicy-yet-sweet deep-fried red snapper with vegetables and tamarind sauce; duck with coconut milk, pineapple and green curry sauce; a steamed whole striped bass with lemon grass and lime juice; spicy seafood in red curry."], "doc_labels": [1], "doc_id": "1835492_LIF"}
8
+ {"sent_labels": [[6], [2], [2], [1], [1], [0], [1], [1], [0, 1], [1]], "word_labels": [[17, 14], [5], [2], [1, 2], [3, 4], [8, 12], [15, 22, 26, 27, 29], [20, 24, 9, 12], [5, 26, 14], [3, 12, 13, 19]], "sents": ["As former cane fields go, Canadaville's is beautiful, studded with pecan trees and live oaks. ", "One shallow pond is for raising crawfish; another is for catfish.", "Tidy fences pen the chickens and some goats.", "The roomy homes, tan or grey rectangles, all have porches and foundation plantings. ", "Some have little decorative shutters.", "It is a far cry from the bleak government trailer parks for evacuees.", "In a field on the road to downtown Simmesport, a shiny sign promises the community center, a baseball diamond, tennis courts, a swimming pool and sports fields. ", "But the park is limited so far to a basketball court with lights, and there is not much for children to do, residents say.", "Of course, as the mayor pointed out, there is not much for adults to do, either, at least not by New Orleans standards. ", "The choices including visiting a wildlife management area north of town, attending church and stopping in for a snack of pickled pigs lips at Bordelon's Superette."], "doc_labels": [1], "doc_id": "1843468_LIF"}
9
+ {"sent_labels": [[2], [2], [2], [2], [2], [2], [2], [2], [2]], "word_labels": [[8, 9, 19, 29], [6, 7, 15, 17, 18], [25, 26, 31], [17, 8, 22, 28], [17, 18, 12], [3, 6], [0, 1], [12, 22, 26, 27], [13, 19, 24, 27, 29]], "sents": ["Burt's Bees' ability to expand into mass-market retail chains while maintaining its0 mom-and-pop storyline has made the company a force to be reckoned with in the beauty industry.", "According to analysts who track national cosmetics sales, Burt's Bees is the best-selling mass-market natural personal-care brand.", "''Burt's Bees is like lightning in a bottle,'' said Leigh Anne Rowinski, director of client solutions at Information Resources, a market-research company that covers the personal-care industry. ", "''Everyone would like to capture that word-of-mouth viral marketing that spreads like wildfire, attracting a young consumer who appreciates authentic niche brands and stays away from mainstream advertising techniques.''", "In the last five years, Burt's Bees has doubled its sales outlets and quadrupled its retail sales. ", "According to the company, the products are now sold in more than 20,000 locations nationwide, up from 10,000 in 2001. ", "Retail sales increased to $250 million in 2006 from $60 million in 2001.", "Its executives said that two major changes since 2005 account for the brand's expanding bottom line: a revamped line of products and a wider retail distribution network.", "Two years ago, Burt's Bees, which had previously relied on sales from thousands of independent gift stores and pharmacies, began selling nationally at CVS and Walgreens."], "doc_labels": [2], "doc_id": "1847760_BUS"}
10
+ {"sent_labels": [[0, 5], [5], [0], [0], [0], [0], [0], [0], [0], [0], [0]], "word_labels": [[20, 6, 24, 27, 28], [18, 22, 11, 12], [11, 3, 4], [1, 6, 15, 21, 22, 25, 26, 32, 33, 35, 41, 45, 46, 47, 51], [19, 4, 8, 10, 28, 14, 15], [1, 36, 38, 10, 43, 19, 20, 27], [17, 23, 11, 12, 13], [17, 5], [16, 18, 5], [33, 36, 5, 15, 17, 18, 30, 37], [18, 4, 10, 29]], "sents": ["Enhanced interrogation techniques, which Mr. Romney said he would support, refer to methods outside those allowed by the Army's code of justice or the Geneva Conventions. ", "The most publicly discussed technique that has reportedly been used on terrorism suspects involves what is known as water-boarding, where a prisoner is strapped down, head beneath his feet, as water is poured repeatedly on a cloth covering the mouth until the person thinks he is about to drown.", "The moderators from Fox News allowed and at times encouraged the candidates to mix it up among themselves.", "Mr. Romney sought to skewer Mr. McCain by association, noting his sponsorship of two bills that were particularly unpopular among conservative Republicans: an immigration bill that he was negotiating with Senator Edward M. Kennedy and the landmark campaign finance bill he drafted with Senator Russ Feingold, the Wisconsin Democrat.", "''My fear is that McCain-Kennedy would do to immigration what McCain-Feingold has done to campaign finance and money in politics, and that's bad,'' Mr. Romney said.", "Mr. McCain responded with a reference to criticism of Mr. Romney for switching positions on issues like abortion and gay rights as he has moved from the political arena of Massachusetts, where he ran for governor and senator, to running for president. ", "''Well, I've taken and kept a consistent position on campaign finance reform,'' Mr. McCain said in response to Mr. Romney. ", "''I have kept a consistent position on right to life. And I haven't changed my position even on even-numbered years or have changed because of the different offices that I may be running for.''", "On several occasions, the candidates sought to divert problematic questions by trying to turn their attacks on Democrats. ", "Mike Huckabee, the former governor of Arkansas, invoked John Edwards, the former senator from North Carolina, as part of an overall wave of criticism by this Republican field of Congress and the White House for spending too much money.", "''We've had a Congress that has spent money like Edwards at a beauty shop,'' Mr. Huckabee said to roars of laughter at the allusion to Mr. Edwards's paying $400 for a haircut."], "doc_labels": [0, 5], "doc_id": "1847825_GOV_MIL"}
11
+ {"sent_labels": [[6], [6], [6], [6], [6], [6], [6], [0, 6], [6]], "word_labels": [[3, 4, 6], [32, 4, 24], [5, 12], [4, 23], [9, 7], [13], [13, 17, 26], [33, 24], [12, 13]], "sents": ["Norwegian and French scientists analyzed the DNA of more than 4,000 samples of nine flowering plant species from Svalbard, a group of islands between the Scandinavian mainland and the North Pole. ", "They said they found genetic patterns that could be explained only by the repeated re-establishment of plant communities after the arrival of seeds or plant fragments from Russia, Greenland or other Arctic regions hundreds of miles away.", "The wide dispersal of the plants presumably occurs through a combination of Arctic winds, driftwood or dirt carried in floating ice and bird droppings, the scientists said.", "Julie Brigham-Grette, a geosciences professor at the University of Massachusetts, said the findings were consistent with research from Alaska showing that forests had extended farther north during a period, warmer than the present, that peaked around 11,000 years ago.", "''As the proper habitat is available, plants will survive,'' she said. ", "''I have not seen this demonstrated so clearly as it is in this paper.", "If dispersal is not a limiting factor, then maybe the rate of warming ongoing in the Arctic will not be a limiting factor in plant survival in distant places.''", "Inger Greve Alsos, the study's lead author, said natural adaptability in the plants might be tested if the projections for rapid Arctic warming from the United Nations Intergovernmental Panel on Climate Change came to pass. ", "She also cautioned that the evidence for resilience and long-distance mobility in Arctic plants could be the exception, not the rule."], "doc_labels": [6], "doc_id": "1854557_GEN"}
en_syn_docs.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f7093d11fa4f83a37f6f0bd47703f1fe99287f50eea6e37a6639d01407ad8c5
3
+ size 14593444
en_test.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fd05bc9d58cc1b5278907b4a6ea464d48643b0e07f65e387acba941c5f96831
3
+ size 22848778
en_train.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1424047442b1f646bf424fdf2f66e9d7a3371c14bfd1afd1512fa7d7a1876dd
3
+ size 188808597
en_wiki_mturk_docs.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"sent_labels": [[1], [1], [1], [1], [1], [1], [1, 4], [1, 4], [1, 4], [4], [6], [1]], "word_labels": [[23], [6], [15], [30], [32], [8, 3], [16], [13], [11, 25], [6, 14, 15, 16], [6], [32]], "sents": ["In One of Us , after returning to the developmental stage of approximately two years old she resumes life as a contented and happy toddler with no memory of her adult life . ", "Richard , her father , is happy . ", "However , her mother Lily Tyler reappears in Isabelle ' s and Richard ' s lives . ", "While she doesn ' t know how she got there she is unhappy about how Richard took away Isabelle ' s life as a young adult and all she had learned .", "Lily is really a projection into his mind by another 4400 , Byron Lillibridge who vanished on February 28 , 1966 and Kyle Baldwin in Promise City , although she appears and feels real both to herself and to Richard . ", "She soon is returned to whence she came .", "Richard , realizing it was wrong to take away Isabelle ' s adult life reverses the youth process with the help of another 4400 , Cora Tomkins who disappeared on November 2 , 1950 ( making her a contemporary of Richard who disappeared seven months later in May 1951 ) . ", "She was the one who produced the water like substance that reverses the aging process .", "She also has the ability to restore the person to the age that person was before and returns her to her early 20 ' s age .", "She and her father are soon captured by Tom Baldwin , who is really under the influence of an enigmatic anti - 4400 faction from the future known as the \" Marked \" ", "( which Matthew Ross was a member of ) .", "After regaining her powers and being strong - armed into helping the Marked , she resumes her bloody campaign against the 4400 and kidnaps Collier , but ultimately rebels once again , sacrificing herself to free Collier and freed Tom , who had come to Collier ' s aid beforehand . "], "doc_labels": [4], "doc_id": "1011274_HEA"}
2
+ {"sent_labels": [[6], [6], [6], [6], [6], [6], [6], [6]], "word_labels": [[2, 3], [1, 3, 11], [2, 26, 29], [0, 3, 7], [0, 2, 3, 8, 23, 25], [1], [3, 15], [1, 7, 8, 9, 10]], "sents": ["Observations of atmospheric chemistry are essential to our understanding .", "Routine observations of chemical composition tell us about changes in atmospheric composition over time .", "One important example of this is the Keeling Curve - a series of measurements from 1958 to today which show a steady rise in of the concentration of carbon dioxide .", "Observations of atmospheric chemistry are made in observatories such as that on Mauna Loa and on mobile platforms such as aircraft ( e . g . the UK ' s Facility for Airborne Atmospheric Measurements ) , ships and balloons . ", "Observations of atmospheric composition are increasingly made by satellites with important instruments such as GOME and MOPITT giving a global picture of air pollution and chemistry .", "Surface observations have the advantage that they provide long term records at high time resolution but are limited in the vertical and horizontal space they provide observations from .", "Some surface based instruments e . g . LIDAR can provide concentration profiles of chemical compounds and aerosol but are still restricted in the horizontal region they can cover .", "Many observations are available on line in Atmospheric Chemistry Observational Databases ."], "doc_labels": [6], "doc_id": "209912_GEN"}
3
+ {"sent_labels": [[3], [3], [3], [1, 3], [3], [1, 3], [1, 3], [1, 3], [1, 3]], "word_labels": [[1, 5, 8, 14, 18, 27], [24, 21, 14], [20, 40, 30, 24], [3], [20, 11], [1, 9, 38], [3, 11], [34, 17, 26], [6]], "sents": ["In cases of hudud , punishments for serious crimes , 12th - century Maliki jurist Averroes wrote that jurists disagree about the status of women ' s testimony . ", "According to Averroes , most scholars say that in this case women ' s testimony is unacceptable regardless of whether they testify alongside male witnesses . . ", "However , he writes that the school of thought known as the Zahiris believe that if two or more women testify alongside a male witness , then ( as in cases regarding financial transactions , discussed below ) , their testimony is acceptable . . ", "In case of witnesses for financial documents , the Qur ' an asks for two men or one man and two women . ", "It is disputed whether this means that a woman ' s testimony worth half that of a man either in disputes about financial transactions or as a general matter .", "Many Muslims believe that the legal value of the testimony of women is half that of a man . [ citation needed ] On the other hand , Javed Ahmed Ghamidi writes that Islam asks for two women witnesses against one male because this responsibility is not very suited to their temperament , sphere of interest , and usual environment . ", "He argues that Islam makes no claim that woman ' s testimony is half in other cases .", "Ibn al - Qayyim also argues that the verse referred to relates to the heavy responsibility of testifying by which an owner of wealth protects his rights , not with the decision of a court ; the two are completely different from each other . ", "It is also argued that this command shows that Qur ' an does not want to make difficulties for women ."], "doc_labels": [3], "doc_id": "1994386_LAW"}
4
+ {"sent_labels": [[1], [1], [1], [1], [1], [1, 6], [1], [1]], "word_labels": [[4], [5, 7], [5, 13], [1, 18, 12], [5], [2, 4, 5, 8, 11, 14, 16, 17, 19], [2], [8]], "sents": ["Like many Japanese New Religions the group is syncretistic .", "The primary connections are likely Buddhism and Taoism .", "Although the idea of \" Kyososoma \" as God made flesh is directly Christian in origin . ", "The sect is dualistic in that it believes in a material and spiritual division , but apparently it values both . ", "The group strongly believes in miracles .", "They also believe the deceased faithful go to heaven through Sublime Transmigration , leaving bodies without rigor mortis or putrefaction . ", "Also that miracles can occur to animals , plants , and even inanimate objects . ", "That in fact the world is full of miracles ."], "doc_labels": [6], "doc_id": "1924871_GEN"}
5
+ {"sent_labels": [[0], [0], [0], [0], [0], [0], [0], [0]], "word_labels": [[10], [12, 28], [22, 36], [32, 6, 8, 42, 18, 7, 29], [6, 12, 15, 18, 27], [24, 26, 29], [6, 9], [7]], "sents": ["There is debate about who should be considered the first President of Mongolia .", "The title does not actually date back to before Mongolia ' s democratisation , but the office itself is seen as extending through Mongolia ' s period of communist rule . ", "Sometimes , the Bogd Khan ( seen as the reincarnations of senior lama ) are seen as Mongolia ' s first \" presidents \" , but more commonly , the title is given to the secular leaders who followed them . ", "Balingiin Tserendorj , who was acting head of state in 1924 , is sometimes seen as the first president , but it was not until Navaandorjiin Jadambaa was appointed Chairman of the State Great Hural in November that there was an official leader . ", "Only a day later , the leadership role was reorganised as the Chairman of the Presidium of the State Little Khural ( the Little Hural being the executive committee of the Great Hural ) . ", "Later , the Little Hural was abolished , and its powers were returned to the Great Hural as such , the title of the president became Chairman of the Presidium of the State Great Hural . ", "This was shortly afterwards changed to Chairman of the Presidium of the People ' s Great Hural , following a change in nomenclature . ", "Finally , in 1990 , the title President of Mongolia was adopted . "], "doc_labels": [0], "doc_id": "1712216_GOV"}
6
+ {"sent_labels": [[5], [1, 2], [5], [5], [1, 2], [1, 2], [1, 2], [1, 6]], "word_labels": [[1], [3, 5, 12], [3, 10, 11, 12], [2, 3], [17, 9, 13], [1, 6], [3], [20]], "sents": ["Phoenix Force ' s demolitions expert .", "Manning grew up hunting and working for his uncle ' s demolitions company .", "During the Vietnam War , Manning was attached to a US Special Forces team as an \" observer \" ", "( much like McCarter ) .", "Following the war , Manning settled down , got married and became an executive of a major import - export firm . ", "The marriage failed , but the business prospered . ", "Because of his wealth , Manning is the most unlikely of the Phoenix Force members . ", "He seems more down to earth than the others , annoyed at McCarter ' s actions although the two are friends . "], "doc_labels": [5], "doc_id": "1671591_MIL"}
7
+ {"sent_labels": [[0], [0], [2], [2], [2], [0], [0, 2], [2], [0]], "word_labels": [[5, 21], [4, 9], [19, 7], [1, 5, 10], [18, 4], [17, 22, 6, 25, 26], [16, 1, 4, 5], [32, 1, 5, 11, 17, 28], [5, 8]], "sents": ["On 25 May 2006 the Minister of Communications of South Africa Dr Ivy Matsepe - Casaburri established the Local Loop Unbundling Committee chaired by Professor Tshilidzi Marwala to recommend the appropriate local loop unbundling models .", "The Local Loop Unbundling Committee submitted a report to Minister Matsepe - Casaburri on 25 May 2007 .", "This report recommends that models that permit customers to access both voice and data be offered by many different companies .", "The models recommended are Full Unbundling , Line Sharing and Bitstream Access . ", "It is recommended that customers should exercise carrier pre - selection and thus be able to switch between service providers .", "It is also recommended that an organisation be created to manage the local loop and that this organisation should be under the guidance of the regulator Icasa and that Icasa be capacitated in terms of resources .", "The committee recommended that service providers approved by Icasa should have access to the telephone exchange infrastructure whenever necessary .", "The committee recommended that a regulatory guideline be established and be managed by Icasa to guarantee that strategic issues like quality of the local loop be optimised for regulation and delivery of services .", "Based on this report the Minister has issued policy directives to Icasa to move swiftly with the unbundling process ."], "doc_labels": [1], "doc_id": "1303297_LIF"}
8
+ {"sent_labels": [[2], [2], [2], [2], [2], [2, 6], [2], [2], [2, 6], [2]], "word_labels": [[1, 2, 11, 13, 14], [16, 1, 4, 9, 11, 14, 15], [1, 19, 4, 21, 23, 14], [9, 13, 14], [7, 9, 15, 17, 18, 20, 21], [3, 7, 12, 14, 25, 26, 29, 30, 33, 35], [19, 5], [21, 22, 23, 12], [32, 1, 6, 33, 18, 19, 21, 31], [32, 33, 34, 27, 5, 6, 40, 11, 12, 18, 20, 21, 39, 37]], "sents": ["The control chart was invented by Walter A . Shewhart while working for Bell Labs in the 1920s .", "The company ' s engineers had been seeking to improve the reliability of their telephony transmission systems .", "Because amplifiers and other equipment had to be buried underground , there was a business need to reduce the frequency of failures and repairs . ", "By 1920 they had already realized the importance of reducing variation in a manufacturing process .", "Moreover , they had realized that continual process - adjustment in reaction to non - conformance actually increased variation and degraded quality . ", "Shewhart framed the problem in terms of Common - and special - causes of variation and , on May 16 , 1924 , wrote an internal memo introducing the control chart as a tool for distinguishing between the two . ", "Dr . Shewhart ' s boss , George Edwards , recalled : \" Dr . Shewhart prepared a little memorandum only about a page in length . ", "About a third of that page was given over to a simple diagram which we would all recognize today as a schematic control chart .", "That diagram , and the short text which preceded and followed it , set forth all of the essential principles and considerations which are involved in what we know today as process quality control . \" ", "Shewhart stressed that bringing a production process into a state of statistical control , where there is only common - cause variation , and keeping it in control , is necessary to predict future output and to manage a process economically . "], "doc_labels": [2], "doc_id": "492420_BUS"}
9
+ {"sent_labels": [[2], [2], [2], [2], [2], [2], [2], [2]], "word_labels": [[1, 4, 14, 17, 24, 27], [2, 35, 36, 10, 11, 14, 18, 21, 29], [2, 5, 12, 18, 21, 24, 30], [12, 5], [4, 9, 11, 19, 23], [21, 7, 10, 14, 15], [18, 21, 6, 7, 9, 12], [16, 18, 9, 13, 15]], "sents": ["A tariff is a tax placed on a specific good or set of goods exported from or imported to a country , creating an economic barrier to trade . ", "Usually the tactic is used when a country ' s domestic output of the good is falling and imports from foreign competitors are rising , particularly if there exist strategic reasons for retaining a domestic production capability . ", "Some failing industries receive a protection with an effect similar to a subsidies in that by placing the tariff on the industry , the industry is less enticed to produce goods in a quicker , cheaper , and more productive fashion . ", "The third reason for a tariff involves skirting of what is called dumping .", "Dumping curtails a country producing highly excessive amounts of goods and dumping the goods on another foreign country , producing the effect of prices that are \" too low \" . ", "Too low can refer to either the price of the good on from the foreign market being lower than the domestic market .", "The other reference refers to the producer selling the product at a price in which there is no profit or a loss .", "The purpose ( and expected outcome ) of the tariff is to encourage spending on domestic goods and services ."], "doc_labels": [2], "doc_id": "703042_BUS"}
10
+ {"sent_labels": [[2], [2], [2], [2], [2], [2], [2], [2]], "word_labels": [[1, 9], [16, 17, 31], [8], [0, 1, 12], [3, 5, 6], [0, 19, 20, 11], [2, 3, 9, 11, 23], [2, 5, 17, 13]], "sents": ["Heavy industry was always the focus of the Soviet economy , even in its later years . ", "The fact that it received special attention from the planners , combined with the fact that industrial production was relatively easy to plan even without minute feedback , led to significant growth in that sector . ", "The Soviet Union became one of the leading industrial nations of the world .", "Industrial production was disproportionately high in the Soviet Union compared to Western economies .", "However , the production of consumer goods was disproportionately low . ", "Economic planners made little effort to determine the wishes of household consumers , resulting in severe shortages of many consumer goods . ", "Whenever these consumer goods would become available on the market , consumers routinely had to stand in long lines ( queues ) to buy them . ", "A black market developed for goods that were particularly sought after but constantly underproduced ( such as cigarettes ) ."], "doc_labels": [0], "doc_id": "638233_GOV"}
11
+ {"sent_labels": [[1], [1], [1], [1], [1], [1], [1, 6], [1], [1]], "word_labels": [[4], [0, 16], [7], [1, 9], [8, 13], [33, 37, 42, 12, 29, 16, 21, 25], [1, 19, 23, 10, 26], [11, 36, 5, 38, 39, 25, 13, 15], [1, 19, 4, 7, 20, 10]], "sents": ["An even number of players from four to ten sit alternating around in a circle .", "Players take turns as the \" giver , \" who attempts to prompt his or her teammates to guess as many keywords as possible in the allotted time . ", "However , each card also has \" taboo \" ", "( forbidden ) words listed which may not be spoken .", "Should the giver say one , a \" censor \" on the opposing team hits the buzzer and the giver must move on to the next word . ", "For example , the giver might have to get his or her team to say \" baseball \" without saying \" sport , \" \" game , \" \" pastime , \" \" hitter , \" \" pitcher , \" nor \" baseball . \" ", "The giver may NOT say a part of a \" taboo \" word ; for example , using \" base \" in \" baseball \" is taboo . ", "The giver may only use speech to prompt his or her teammates ; gestures , sounds ( e . g . barking ) , or drawings are not allowed ( though modifications may be made for deaf or mute players ) . ", "The giver ' s hints may not rhyme with a taboo word , or be an abbreviation of a taboo word . "], "doc_labels": [1], "doc_id": "2050812_LIF"}
12
+ {"sent_labels": [[2], [2], [2], [2], [2], [2], [5], [2, 5]], "word_labels": [[17, 10, 3], [1, 15], [4], [4], [1, 2], [1, 4, 5], [4, 15], [4, 15]], "sents": ["According to the Company ' s website , they provide jobs under 6 month - 1 year contracts in the Middle East . ", "The jobs consist of personal guards , drivers , static guards , static supervisors , logistics supporters , and English teachers . ", "For most of these jobs at least \" level 3 or 4 \" ", "English speaking abilities are required .", "The company recruits citizens that have experience with battle .", "The company ' s administrative manager , Alejandro Fernandez , says \" A Peruvian in Baghdad will not panic if he has to face a blast or a blackout because he has already experienced that on the streets of Lima \" . ", "Although most of the training is done on site , the Peruvian Army is training recruited mercenaries in Huachipa . ", "The Peruvian Army was paid $ 127,690 , or 435,840 soles by Triple Canopy for training total of 678 people . "], "doc_labels": [5], "doc_id": "566645_MIL"}
13
+ {"sent_labels": [[1], [5], [3], [3], [0, 1, 3], [1, 2, 3], [1], [3], [3]], "word_labels": [[1, 7, 11], [12, 19, 23], [7, 9, 10, 15, 27, 28, 29], [8, 11, 13, 20, 22], [2, 5, 8, 9, 12, 13, 16, 17, 18, 20], [1, 3, 6, 9, 10], [4, 5, 18], [12, 14, 18], [7, 9, 12, 15, 19, 20, 21, 22]], "sents": ["Cound received his B . A . degree from George Washington University .", "He took a short break during his undergrad years at GWU to serve in the U . S . Army , months before WWII came to an end . ", "He earned an LL . B . degree from Harvard Law School , where he graduated , magna cum laude , and was Note Editor of the Harvard Law Review . ", "Upon completion of his LL . B . degree , Cound clerked for Judge Learned Hand on the United States Court of Appeals for the Second Circuit . ", "He then served as an attorney for the Appellate Section of the Civil Division of the United States Department of Justice .", "Cound joined the faculty of the University of Minnesota Law School in 1956 .", "He has been a Visiting Professor at the University of California at Los Angeles Law School and has taught at the University of North Carolina School of Law , University of Texas School of Law , University of Georgia School of Law , University of Kentucky College of Law , Washington University School of Law , Hamline University School of Law , and the Christian Albrechts Universitat in Kiel , Germany . ", "He also has served on the faculty of the Association of American Law Schools Orientation Program in American Law .", "Cound is a coauthor of a leading casebook on civil procedure and served as a Reporter for the Minnesota Criminal Jury Institution Guides from 1971 to 1977 ."], "doc_labels": [3], "doc_id": "1081411_LAW"}
14
+ {"sent_labels": [[1, 4], [2], [1, 4], [1, 4], [2], [2], [2], [4, 6]], "word_labels": [[0, 3, 8], [2, 21, 22], [0, 6, 10], [2], [1, 10, 2], [7], [5], [5]], "sents": ["Zumba is a fitness program inspired by Latin dance .", "It was founded by Miami - based dancer and choreographer Alberto ' Beto ' Perez , who teamed up with two local entrepreneurs : Alberto Perlman and Alberto Aghion . ", "Zumba combines Latin rhythms with cardiovascular exercise to create an aerobic routine that is fun and easy to follow .", "The name Zumba is derived from a Colombian word meaning to move fast and have fun .", "Zumba sells DVDs / videos through its website and via infomercials .", "More than 3 million DVDs have been sold in over 30 countries .", "Zumba currently has over 9,000 instructors worldwide . ", "On October 15 , 2007 Zumba was showcased on the Today Show . "], "doc_labels": [4], "doc_id": "2382431_HEA"}
zh_dev.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62fc24b7f15131290c11814865d80f7b4555c7997c2955780ecc944e4141ba4c
3
+ size 3612936
zh_syn_docs.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ebedb5b2d749eb30eee60699cb1ecb51e1500908217d30416b9cf4e407cf4b7
3
+ size 1585390
zh_test.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:546873044013a717c964d8330170c4f3806b1fc40879c1e012491b805603b826
3
+ size 3706453
zh_train.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9031ea2854112a3addc958f16c8a183ae5de42f386629cab5276a492a9eaed7
3
+ size 40580101
zh_wiki_mturk_docs.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"doc_id": "\u7537\u6027\u6050\u61fc\u75c7_\u751f\u6d3b_\u5065\u5eb7", "word_labels": [[2, 3, 5, 10], [2, 13, 17, 20, 27, 38], [2, 3, 5, 12, 13], [6, 7, 8, 13], [3, 5, 7, 10, 12, 18, 20, 21, 22], [2, 3, 8], [1, 3, 4, 6, 7, 9, 10, 12, 19, 23], [3, 4, 7, 8, 13, 15, 17, 19, 21], [4, 5, 7]], "doc_labels": [1, 4], "sents": ["\u53ef\u80fd\u548c\u60a3\u8005\u65e9\u5e74\u6642\u7236\u6bcd\u8981\u6c42\u592a\u9ad8\u53d7\u6307\u8cac\u592a\u591a\u6709\u95dc\u3002", "\u5728\u9019\u7a2e\u6559\u80b2\u74b0\u5883\u4e0b\uff0c\u4ed6\u5011\u5f80\u5f80\u6703\u63d0\u9ad8\u5c0d\u81ea\u6211\u7684\u8981\u6c42\uff0c\u6703\u53bb\u63e3\u6469\u5225\u4eba\u7684\u611f\u53d7\uff0c\u5f9e\u800c\u80fd\u8b93\u81ea\u5df1\u8868\u73fe\u5f97\u66f4\u597d\uff0c\u800c\u8d8a\u662f\u9019\u6a23\u4fbf\u6703\u8d8a\u7dca\u5f35\u3002", "\u53ef\u80fd\u548c\u60a3\u8005\u65e9\u5e74\u7f3a\u4e4f\u95dc\u611b\u6709\u95dc\uff0c\u5c24\u5176\u662f\u4f86\u81ea\u7537\u6027\u89aa\u4eba\u7684\u611b\u3002", "\u4ed6\u5011\u56e0\u6b64\u7121\u5f9e\u77e5\u9053\u5982\u4f55\u548c\u964c\u751f\u7537\u6027\u5f80\u4f86\u3001\u5982\u4f55\u63a5\u53d7\u4f86\u81ea\u7537\u6027\u7684\u95dc\u611b\u3002", "\u800c\u5018\u82e5\u5176\u7236\u89aa\u904e\u65bc\u56b4\u53b2\u548c\u5147\u608d\u800c\u7f3a\u4e4f\u6eab\u548c\u548c\u7d30\u81a9\uff0c\u5247\u66f4\u5bb9\u6613\u5c0e\u81f4\u5973\u5b69\u5b50\u7522\u751f\u7537\u6027\u6050\u61fc\u5fc3\u7406\u3002", "\u53ef\u80fd\u7531\u60a3\u8005\u65e9\u5e74\u53d7\u5230\u7684\u4e0d\u7576\u6559\u80b2\u5c0e\u81f4\u3002", "\u4f8b\u5982\u5f9e\u5c0f\u53d7\u5230\u55ae\u8eab\u6bcd\u89aa\u7684\u6575\u8996\u7537\u6027\u7684\u5ba3\u50b3\u6559\u80b2\u7684\u5973\u5152\uff0c\u53ef\u80fd\u6703\u540c\u6a23\u5c0d\u964c\u751f\u7537\u6027\u7522\u751f\u5f37\u70c8\u7684\u53cd\u611f\u3002", "\u4ea6\u53ef\u80fd\u548c\u60a3\u8005\u65e9\u5e74\u53d7\u5230\u7684\u5fc3\u7406\u50b7\u5bb3\u6709\u95dc\uff0c\u4f8b\u5982\u5e7c\u5e74\u53d7\u5230\u7236\u5144\u6216\u7537\u5b69\u5b50\u7684\u6b3a\u4fae\u6216\u8650\u5f85\u3002", "\u4ea6\u53ef\u80fd\u662f\u53d7\u904e\u6027\u4fb5\u72af\u3001\u6027\u9a37\u64fe\u7b49\u3002"], "sent_labels": [[1, 4], [1, 4], [1, 4], [1, 4], [1, 4], [4], [1, 4], [1, 4], [4]]}
2
+ {"doc_id": "\u516c\u7406_\u666e\u901a", "word_labels": [[0, 1, 3, 5, 6, 8, 10, 11, 14, 15, 16, 18, 19, 21], [1, 2, 3, 7, 8], [0, 1, 3, 5, 6], [0, 1, 2], [3, 4, 6, 8, 13, 15], [1, 2, 5, 7, 8, 9, 12, 15, 17, 19, 21, 23, 25, 31, 34, 39, 44, 46, 55, 58], [3, 4, 6, 8, 10, 14, 16, 17, 19, 21], [0]], "doc_labels": [6], "sents": ["\u5171\u540c\u6982\u5ff5\u7b49\u540c\u65bc\u76f8\u540c\u4e8b\u7269\u7684\u4e8b\u7269\u6703\u76f8\u4e92\u7b49\u540c\u82e5\u7b49\u540c\u7269\u52a0\u4e0a\u7b49\u540c\u7269\uff0c\u5247\u6574\u9ad4\u6703\u76f8\u7b49\u3002", "\u82e5\u7b49\u540c\u7269\u6e1b\u53bb\u7b49\u540c\u7269\uff0c\u5247\u5176\u5dee\u6703\u76f8\u7b49\u3002", "\u76f8\u4e92\u91cd\u5408\u7684\u4e8b\u7269\u6703\u76f8\u4e92\u7b49\u540c\u3002", "\u6574\u9ad4\u5927\u65bc\u90e8\u5206\u3002", "\uff08\u8a3b\uff1a\u7576\u96c6\u5408\u5167\u6709\u7121\u9650\u500b\u5143\u7d20\u7684\u6642\u5019\uff0c\u8a72\u516c\u7406\u7684\u6b63\u78ba\u6027\u6709\u5f85\u8a0e\u8ad6\u3002", "\u4f8b\u5982\u4e09\u89d2\u5f62\u5e95\u908a\u4e0a\u7684\u9ede\u7684\u96c6\u5408\u540c\u5e95\u908a\u4e0a\u7684\u4e2d\u4f4d\u7dda\u4e0a\u7684\u9ede\u7684\u96c6\u5408\uff1a\u4e2d\u4f4d\u7dda\u7684\u9577\u5ea6\u70ba\u5e95\u908a\u7684\u4e00\u534a\uff0c\u5373\u80fd\u88ab\u201c\u5305\u542b\u201d\u5728\u5e95\u908a\u4e0a\uff1b\u4f46\u662f\u5728\u5e95\u908a\u4e0a\u9078\u64c7\u4efb\u610f\u4e00\u9ede\u8207\u9802\u9ede\u9023\u63a5\uff0c\u5747\u6703\u5f97\u5230\u5c0d\u61c9\u7684\u4e2d\u4f4d\u7dda\u4e0a\u7684\u9ede\u3002", "\u5373\uff0c\u96d6\u7136\u4e2d\u4f4d\u7dda\u7684\u9577\u5ea6\u70ba\u5e95\u908a\u7684\u4e00\u534a\uff0c\u4f46\u662f\u5176\u96c6\u5408\u5167\u7684\u5143\u7d20\u500b\u6578\u548c\u5e95\u908a\u7684\u7b49\u52e2\u3002", "\uff09"], "sent_labels": [[6], [6], [6], [6], [6], [6], [6], [6]]}
3
+ {"doc_id": "\u80e1\u4f5b\u6c34\u58e9_\u653f\u5e9c_\u666e\u901a", "word_labels": [[1, 2, 8, 9, 12, 16, 20, 23, 25, 26, 27, 29, 30, 31, 32, 33], [5, 8, 9, 16, 24, 25], [3, 5, 16, 17, 20, 28, 34], [2, 3, 5, 9], [0, 2, 5, 6, 8, 9, 10, 14, 15], [3, 6, 8, 9, 11, 14, 27, 32], [7, 9, 13, 17, 26], [0, 1, 2, 5, 6, 14, 16, 18, 19], [3, 4], [4, 5, 13, 14, 18, 21, 24, 27, 28, 36, 37, 39, 40, 41, 43, 47, 49, 51, 52, 54]], "doc_labels": [0, 6], "sents": ["\u5728\u5efa\u9020\u6c34\u58e9\u4e4b\u524d\uff0c\u5fc5\u9808\u5148\u628a\u79d1\u7f85\u62c9\u591a\u6cb3\u5206\u6d41\uff0c\u4f46\u6cb3\u6d41\u5169\u65c1\u6eff\u4f48\u61f8\u5d16\uff0c\u56e0\u6b64\u60df\u4e00\u65b9\u6cd5\u662f\u5728\u5cfd\u8c37\u5169\u908a\u947d\u6316\u7206\u7834\uff0c\u958b\u95e2\u56db\u689d\u5206\u6d41\u96a7\u9053\u3002", "\u7136\u800c\u958b\u95e2\u5206\u6d41\u96a7\u9053\u7684\u5de5\u4eba\u751f\u6d3b\u548c\u5de5\u4f5c\u74b0\u5883\u6bcf\u6cc1\u6108\u4e0b\uff0c\u4ee4\u8a31\u591a\u5de5\u4eba\u5c0d\u9ad8\u723e\u8d8a\u4f86\u8d8a\u4e0d\u6eff\uff0c\u751a\u81f3\u7b56\u5283\u7f77\u5de5\u3002", "8\u67087\u65e5\uff0c\u5de5\u4eba\u6b63\u5f0f\u7f77\u5de5\uff0c\u7576\u6642\u4ecd\u6709\u5927\u91cf\u6709\u8cc7\u683c\u53d6\u4ee3\u4ed6\u5011\u7684\u5931\u696d\u4eba\u58eb\uff0c\u56e0\u6b64\u5de5\u4eba\u662f\u5192\u4e00\u500b\u5f88\u5927\u7684\u98a8\u96aa\uff0c\u751a\u81f3\u6709\u6a5f\u6703\u5931\u53bb\u5de5\u4f5c\u3002", "\u9ad8\u723e\u9078\u64c7\u958b\u9664\u7f77\u5de5\u7684\u5de5\u4eba\uff0c\u7136\u5f8c\u91cd\u65b0\u62db\u8058\u3002", "1932\u5e74\uff0c\u6cb3\u5167\u9996\u6b21\u6d41\u5165\u96a7\u9053\uff0c\u5206\u6d41\u5de5\u7a0b\u6210\u529f\uff0c\u80fd\u5920\u6b63\u5f0f\u5efa\u9020\u6c34\u58e9\u3002", "\u9918\u4e0b\u7684\u5de5\u7a0b\u53ea\u662f\u5229\u7528\u6df7\u51dd\u571f\u53bb\u5efa\u8a2d\u6c34\u58e9\uff0c\u653f\u5e9c\u7d66\u4e88\u7684\u9650\u671f\u70ba4\u5e74\u534a\uff0c\u6642\u9593\u96d6\u591a\uff0c\u4f46\u9ad8\u723e\u6b32\u63d0\u65e9\u5b8c\u5de5\uff0c\u4ee5\u7372\u5f97\u5927\u7b46\u734e\u91d1\u3002", "1933\u5e74\uff0c\u7e3d\u5171\u50be\u6ce8\u4e86\u4e00\u767e\u842c\u7acb\u65b9\u78bc\u7684\u6df7\u51dd\u571f\uff0c1935\u5e74\uff0c\u6c34\u58e9\u63d0\u65e9\u4e86\u5169\u5e74\u5b8c\u5de5\uff0c\u800c\u9ad8\u723e\u4ea6\u7372\u5f97\u4e00\u7b46\u734e\u91d1\u3002", "\u80e1\u4f5b\u6c34\u58e9\u5de5\u7a0b\u6d69\u5927\uff0c\u5efa\u8a2d\u5de5\u7a0b\u4e2d\u6709112\u540d\u5de5\u4eba\u56e0\u5404\u7a2e\u4e8b\u6545\u5931\u53bb\u6027\u547d\u6c34\u58e9\u9ad8\u5ea6\uff1a221.", "4\u7c73\u6c34\u58e9\u9577\u5ea6\uff1a379.", "2\u7c73\u6c34\u58e9\u539a\u5ea6\uff1a\u5168\u539a200\u7c73\u6df7\u51dd\u571f\u6578\u91cf\uff1a333\u842c\u7acb\u65b9\u7c73\u5e74\u767c\u96fb\u91cf\uff1a2080\u5146\u74e6\u6642\u4ea4\u901a\u6d41\u91cf\uff1a\u6bcf\u65e5\u7d0413000\u81f316000\u4eba\u6a6b\u904e\u6c34\u58e9\u7c73\u5fb7\u6e56\u7d71\u8a08\u8cc7\u6599\uff1a\u9762\u7a4d157900\u82f1\u755d\uff0c\u6df1152\u7c73\uff0c\u6e56\u5cb8\u7dda\u9577885\u516c\u91cc"], "sent_labels": [[6], [2], [2], [2], [2, 6], [0, 2], [0, 2, 6], [6], [6], [6]]}
4
+ {"doc_id": "\u611f\u60c5\u7ebd\u5e26_\u751f\u6d3b", "word_labels": [[3, 4, 13, 15, 18], [2, 3, 4, 5, 7], [3, 4, 9, 10], [0, 1, 3, 8, 10, 12], [7, 16], [0, 11, 18, 20, 21], [4, 5, 13], [3, 10, 11, 15, 18], [6, 8, 12], [2, 3, 5, 6]], "doc_labels": [1], "sents": ["1958\u5e74\uff0c\u82f1\u56fd\u53d1\u5c55\u5fc3\u7406\u5b66\u5bb6\u7ea6\u7ff0\u00b7\u9c8d\u6bd4\u53d1\u8868\u4e86\u4e00\u7bc7\u72ec\u521b\u7684\u201c\u5b69\u5b50\u4e0e\u6bcd\u4eb2\u4e0e\u751f\u4ff1\u6765\u7684\u8054\u7cfb\u201d\uff08theNatureoftheChild'sTietohisMother\uff09\u3002", "\u8fd9\u662f\u4f9d\u9644\u7406\u8bba\u6982\u5ff5\u53d1\u5c55\u7684\u5148\u9a71\u3002", "\u5b83\u56ca\u62ec\u4e86\u60c5\u611f\u94fe\u7ed3\uff08\u6709\u65f6\u4e5f\u79f0\u4e3a\u611f\u60c5\u94fe\u7ed3\uff09\u3002", "\u60c5\u611f\u94fe\u7ed3\u57fa\u4e8e\u4eba\u7c7b\u6709\u76f8\u540c\u7684\u8981\u4f9d\u9644\u4e8e\u4ed6\u4eba\u7684\u8d8b\u52bf\u3002", "\u4f8b\u5982\uff0c\u53bb\u5728\u4ed6\u4eba\u8eab\u4e0a\u5bfb\u627e\u4eb2\u8fd1\u611f\uff0c\u5e76\u5728\u8fd9\u4e2a\u4eba\u5728\u573a\u65f6\u611f\u5230\u5b89\u5168\u3002", "\u4f9d\u9644\u7406\u8bba\u662f\u5728\u5bf9\u52a8\u7269\u7684\u5b9e\u9a8c\u548c\u89c2\u5bdf\u4e0a\u5f62\u6210\u7684\uff0c\u4e0d\u8fc7\u4e5f\u6709\u5bf9\u4e8e\u5a74\u513f\u7f3a\u5c11\u6210\u4eba\u5173\u6000\u7684\u89c2\u5bdf\u7684\u501f\u9274\u3002", "\u5f88\u591a\u5173\u4e8e\u4f9d\u9644\u7684\u65e9\u671f\u7814\u7a76\u90fd\u662f\u7531\u7ea6\u7ff0\u00b7\u9c8d\u6bd4\u548c\u4ed6\u7684\u540c\u4f34\u5b8c\u6210\u7684\u3002", "\u9c8d\u6bd4\u63d0\u51fa\uff0c\u5a74\u513f\u4e0e\u751f\u4ff1\u6765\u5730\u6709\u4e00\u79cd\u5efa\u7acb\u611f\u60c5\u4f9d\u9644\uff08\u4f8b\u5982\uff0c\u94fe\u7ed3\uff09\u7684\u9700\u8981\u3002", "\u56e0\u4e3a\u8fd9\u4f1a\u786e\u4fdd\u4ed6\u4eec\u5f97\u5230\u9700\u8981\u7684\u7167\u987e\u4ee5\u589e\u52a0\u4ed6\u4eec\u751f\u5b58\u4e0b\u6765\u7684\u673a\u4f1a\u3002", "\u9c8d\u6bd4\u6ca1\u6709\u63cf\u8ff0\u4f9d\u9644\u7684\u4e92\u76f8\u5173\u7cfb\u3002"], "sent_labels": [[4], [6], [4], [4], [4], [4], [6], [4], [4], [1, 4, 6]]}
5
+ {"doc_id": "\u5584\u592a\u606f_\u5065\u5eb7", "word_labels": [[1, 11, 12, 14, 22, 23, 24, 25, 27, 28, 29, 30, 32, 33, 36, 38], [0, 1, 2, 3, 5, 6, 7], [9, 11, 13, 16], [0, 2], [0, 1, 2, 3, 5, 8, 9, 10, 12, 14, 15, 16, 19, 21], [0, 2, 4], [0, 2, 3], [0, 2], [5, 7, 9, 11, 17]], "doc_labels": [4], "sents": ["\u300a\u9748\u6a1e\u3001\u53e3\u554f\u7bc7\u300b\u9ec3\u5e1d\u66f0\uff1a\u300c\u4eba\u4e4b\u592a\u606f\u8005\uff0c\u4f55\u6c23\u4f7f\u7136\uff1f\u300d\u5c90\u4f2f\u66f0\uff1a\u300c\u6182\u601d\u5247\u5fc3\u7cfb\u6025\uff0c\u5fc3\u7cfb\u6025\u5247\u6c23\u9053\u7d04\uff0c\u7d04\u5247\u4e0d\u5229\uff0c\u6545\u592a\u606f\u4ee5\u4f38\u51fa\u4e4b\u3002", "\u88dc\u624b\u5c11\u9670\u5fc3\u4e3b\uff0c\u8db3\u5c11\u967d\u7559\u4e4b\u4e5f\u3002", "\u300d\u300a\u985e\u7d93\u300b\u5f35\u4ecb\u8cd3\u6ce8\uff1a\u592a\u606f\u8005\uff0c\u606f\u9577\u800c\u5927\uff0c\u5373\u5606\u606f\u4e5f\u3002", "\u7d04\uff0c\u7336\u675f\u7e1b\u4e5f\u3002", "\u6182\u6101\u601d\u616e\u5247\u6c23\u9b31\u4e0d\u4f38\uff0c\u800c\u5fc3\u7cfb\u6025\u3001\u6c23\u9053\u7d04\uff0c\u7d04\u5247\u6eff\u60b6\u65bc\u4e2d\uff0c\u6b64\u5606\u606f\u4e4b\u4e0d\u5bb9\u5df2\u4e5f\u3002", "\u624b\u5c11\u9670\uff0c\u5fc3\u7d93\u4e5f\u3002", "\u5fc3\u4e3b\uff0c\u624b\u53a5\u9670\u7d93\u4e5f\u3002", "\u8db3\u5c11\u967d\uff0c\u81bd\u7d93\u4e5f\u3002", "\u52a9\u6728\u706b\u4e4b\u81df\uff0c\u5247\u967d\u6c23\u53ef\u8212\u3001\u6291\u9b31\u53ef\u89e3\uff0c\u6545\u7686\u5b9c\u7559\u91dd\u88dc\u4e4b\u3002"], "sent_labels": [[4], [4], [4], [4], [4], [4], [4], [4], [4]]}
6
+ {"doc_id": "\u623f\u5730\u4ea7_\u5546\u4e1a", "word_labels": [[5, 6, 7, 13, 14, 16, 17], [1, 2, 3, 4, 10, 11, 12], [4, 5, 6], [0, 1, 4, 5, 6], [0, 11], [4, 5, 6, 7, 9, 10, 11, 12], [0], [2, 4, 7, 8, 12, 14, 16, 17], [5, 6, 8, 9, 10, 12, 13, 14, 17, 18, 19, 20], [5, 8, 9], [2, 3]], "doc_labels": [2], "sents": ["\u8fdb\u5165\u672c\u4e16\u7eaa\uff0c\u4e2d\u56fd\u5927\u9646\u623f\u5730\u4ea7\u5e02\u573a\u6539\u9769\u5728\u65b0\u7684\u4e00\u6ce2\u623f\u5730\u4ea7\u6295\u8d44\u70ed\u6f6e\u7684\u63a8\u52a8\u4e0b\u8fc5\u901f\u5347\u6e29\u3002", "2001\u5e74\u623f\u5730\u4ea7\u6295\u8d446245\u4ebf\u5143\uff0c\u5360\u5168\u793e\u4f1a\u603b\u6295\u8d4436898\u4ebf\u5143\u768416.", "9%\uff0c\u52302004\u5e74\u623f\u5730\u4ea7\u6295\u8d44\u5347\u9ad8\u523014480.", "75\u4ebf\u5143\uff0c\u5360\u793e\u4f1a\u603b\u6295\u8d4458620.", "28\u4ebf\u5143\u768424.", "7%\uff082005\u5e74\u4e0a\u534a\u5e74\u623f\u5730\u4ea7\u4e1a\u6295\u8d446193\u4ebf\u5143\uff0c\u603b\u6295\u8d4432895\u4ebf\u5143\uff0c\u5360\u6bd418\u3002", "8%\uff09\u3002", "\u4e0e\u6b64\u540c\u65f6\uff0c\u653f\u5e9c\u4e5f\u51fa\u53f0\u591a\u9879\u4f18\u60e0\u653f\u7b56\uff0c\u4ee5\u671f\u671b\u623f\u5730\u4ea7\u4e1a\u6210\u4e3a\u65b0\u5174\u7684\u652f\u67f1\u4ea7\u4e1a\u3002", "\u4f7f\u7528\u7684\u65b9\u6cd5\u5305\u62ec\uff0c\u9000\u8fd8\u4e2a\u4eba\u6240\u5f97\u7a0e\uff0c\u964d\u4f4e\u4ea4\u6613\u5951\u7a0e\uff0c\u653e\u5bbd\u94f6\u884c\u8d37\u6b3e\u6761\u4ef6\uff0c\u52a0\u5927\u623f\u5730\u4ea7\u4e1a\u6276\u6301\u529b\u5ea6\u7b49\u7b49\u3002", "\u5728\u8fd9\u79cd\u80cc\u666f\u4e0b\uff0c\u623f\u4ef7\u5f00\u59cb\u8fc5\u901f\u7a9c\u5347\u3002", "\u8fd9\u6ce2\u623f\u4ef7\u6ce2\u52a8\u4e2d\u6700\u4e3a\u660e\u663e\u7684\u4f8b\u5b50\u5c31\u662f\u4e2d\u56fd\u6700\u5927\u7684\u57ce\u5e02\u4e0a\u6d77\u3002"], "sent_labels": [[2], [2], [2], [2], [2], [2], [2, 6], [0, 2], [0, 2], [2], [2]]}
7
+ {"doc_id": "\u6b50\u6d32\u4eba\u6b0a\u516c\u7d04_\u6cd5\u5f8b", "word_labels": [[0, 1, 2, 11, 12, 15], [8, 9, 11, 22, 24], [0, 22, 23, 27, 28], [0, 7, 17], [0, 4, 5, 6, 8, 9, 11], [4, 7, 8, 9, 11], [2, 7, 15, 19, 24, 27, 28, 33, 37, 40, 41, 42, 44], [3, 4, 5, 7, 9, 10, 14, 18, 24, 30, 34]], "doc_labels": [3], "sents": ["\u6b50\u6d32\u4eba\u6b0a\u516c\u7d04\u662f\u5728\u7b2c\u4e8c\u6b21\u4e16\u754c\u5927\u6230\u5f8c\u7531\u6b50\u6d32\u59d4\u54e1\u6703\u958b\u59cb\u9032\u884c\u8d77\u8349\u3002", "SirDavidMaxwell-Fyfe\u65bc1949\u5e74\u52301952\u5e74\u64d4\u4efb\u8a72\u59d4\u54e1\u6703\u7684\u6cd5\u5f8b\u53ca\u884c\u653f\u90e8\u9580\u7684\u9996\u9577\u671f\u9593\uff0c\u76e3\u7763\u8457\u6574\u500b\u516c\u7d04\u7684\u8d77\u8349\u904e\u7a0b\u3002", "\u516c\u7d04\u7684\u8a2d\u8a08\u672c\u8eab\u662f\u63c9\u5408\u4e86\u82f1\u570b\u3001\u6cd5\u570b\u53ca\u5176\u4ed6\u6b50\u6d32\u6210\u54e1\u570b\u4e2d\u80fd\u7a69\u56fa\u4fdd\u969c\u300c\u6709\u6548\u7684\u653f\u6cbb\u6c11\u4e3b\u300d\u4e4b\u50b3\u7d71\u516c\u6c11\u81ea\u7531\u3002", "\u516c\u7d04\u65bc1950\u5e7411\u67084\u65e5\u5728\u7f85\u99ac\u958b\u653e\u7c3d\u7f72\uff0c\u4e26\u65bc1953\u5e749\u67083\u65e5\u88ab\u6279\u51c6\u4e26\u958b\u59cb\u65bd\u884c\u3002", "\u516c\u7d04\u672c\u8eab\u85c9\u7531\u6b50\u6d32\u4eba\u6b0a\u6cd5\u9662\u4ee5\u53ca\u6b50\u6d32\u59d4\u54e1\u6703\u52a0\u4ee5\u76e3\u7763\u3002", "\u76f4\u5230\u6700\u8fd1\uff0c\u672c\u516c\u7d04\u4ea6\u53d7\u5230\u6b50\u6d32\u4eba\u6b0a\u59d4\u54e1\u6703\u7684\u76e3\u7763\u3002", "\u800c\u5176\u904b\u4f5c\u7684\u7a0b\u5e8f\u70ba\uff0c\u5728\u539f\u544a\u7aed\u76e1\u5176\u6240\u5728\u6210\u54e1\u570b\u5167\u4efb\u4f55\u7684\u6551\u6fdf\u7ba1\u9053\u5f8c\uff0c\u82e5\u539f\u544a\u4ecd\u7136\u8a8d\u70ba\u5176\u53d7\u5230\u81ea\u7136\u6cd5\u4fdd\u969c\u7684\u4eba\u6b0a\u672a\u80fd\u7372\u5f97\u5145\u5206\u7684\u4fdd\u969c\u6642\uff0c\u5247\u539f\u544a\u53ef\u4ee5\u5411\u6b50\u6d32\u4eba\u6b0a\u6cd5\u9662\u63d0\u8d77\u8a34\u8a1f\u3002", "\u5728\u904e\u53bb\uff0c\u6b50\u6d32\u4eba\u6b0a\u59d4\u54e1\u6703\u53ef\u4ee5\u5be9\u67e5\u662f\u5426\u99c1\u56de\u8a72\u8a34\u8a1f\uff0c\u82e5\u8a8d\u70ba\u8a72\u6848\u6709\u8cc7\u683c\u9032\u5165\u6cd5\u9662\u6642\uff0c\u4e26\u80fd\u5c31\u8a72\u6848\u63d0\u4f9b\u610f\u898b\uff0c\u4e0d\u904e\u9019\u500b\u7a0b\u5e8f\u73fe\u5728\u5df2\u7d93\u88ab\u5ee2\u6b62\u4e86\u3002"], "sent_labels": [[3], [3], [3], [3], [0, 3], [3], [3], [3]]}
8
+ {"doc_id": "\u4fc4\u7f57\u65af_\u653f\u5e9c", "word_labels": [[0, 2, 3, 4, 6, 7, 15, 20, 21, 22, 24, 26], [1, 2, 11, 12, 22, 23, 27, 32, 33], [0, 1, 5, 7, 8, 9, 11, 13, 16], [0, 1, 12], [2, 3, 5, 8, 10, 11], [0, 3, 4, 6, 7, 9], [0, 2, 4, 5, 6, 12, 13, 14, 16, 18, 20, 26], [28, 30, 32], [3, 4, 5, 8, 9], [3, 4, 5, 8], [4, 7, 8, 17], [0, 1, 5, 6, 9, 10, 13, 14, 15, 17, 21, 25, 28]], "doc_labels": [0], "sents": ["\u83ab\u65af\u79d1\uff0c\u4fc4\u7f57\u65af\u8054\u90a6\u9996\u90fd\uff0c\u5168\u570b\u653f\u6cbb\u3001\u7ecf\u6d4e\u3001\u79d1\u5b66\u6587\u5316\u53ca\u4ea4\u901a\u4e2d\u5fc3\uff0c\u540c\u65f6\u4e5f\u662f\u4fc4\u8054\u90a6\u4e3b\u4f53\u4e2d\u83ab\u65af\u79d1\u5dde\u7684\u9996\u5e9c\u3002", "\u6574\u4e2a\u83ab\u65af\u79d1\u4eba\u53e3\u8fbe\u5230\u4e8614,612,602\uff08\u5e02\u533a\u4eba\u53e3\uff1a10,472,629\uff09\uff0c\u662f\u6b27\u6d32\u4eba\u53e3\u6700\u591a\u7684\u57ce\u5e02\uff0c\u5360\u636e\u4e86\u6574\u4e2a\u4fc4\u7f57\u65af\u4eba\u53e3\u76841/10\u3002", "\u83ab\u65af\u79d1\u9762\u79ef1,081\u5e73\u65b9\u516c\u91cc\uff0c\u5e02\u533a\u4e1c\u897f\u957f30\u516c\u91cc\uff0c\u5357\u5317\u957f40\u516c\u91cc\u3002", "\u83ab\u65af\u79d1\u5efa\u57ce\u4e8e1147\u5e74\uff0c\u8fc4\u4eca\u5df2\u6709800\u4f59\u5e74\u7684\u5386\u53f2\u3002", "\u6b64\u5916\u5728\u82cf\u8054\u65f6\u671f\uff0c\u83ab\u65af\u79d1\u662f1980\u5e74\u5965\u8fd0\u4f1a\u7684\u4e3b\u529e\u57ce\u5e02\u3002", "\u8056\u5f7c\u5f97\u5821\uff0c\u4f4d\u4e8e\u4fc4\u7f57\u65af\u897f\u5317\u90e8\uff0c\u82ac\u862d\u7063\u6cbf\u5cb8\uff0c\u59cb\u5efa\u65bc1703\u5e74\u3002", "\u8056\u5f7c\u5f97\u5821\u8207\u83ab\u65af\u79d1\u540c\u70ba\u4fc4\u7f57\u65af\u8054\u90a6\u76f4\u8f96\u5e02\uff0c\u540c\u65f6\u5b83\u4e5f\u662f\u4fc4\u8054\u90a6\u4e3b\u4f53\u4e2d\u5217\u5b81\u683c\u52d2\u5dde\u7684\u9996\u5e9c\u3001\u4fc4\u7f85\u65af\u6700\u5927\u6e2f\u5e02\u548c\u4ec5\u6b21\u4e8e\u83ab\u65af\u79d1\u7684\u7b2c\u4e8c\u5927\u57ce\u5e02\u3002", "\u4f0f\u5c14\u52a0\u683c\u52d2\uff0c\u4f4d\u65bc\u4f0f\u723e\u52a0\u6cb3\u6cbf\u5cb8\uff0c\u59cb\u5efa\u4e8e1589\u5e74\uff0c1925\u5e744\u670810\u65e5\u6539\u79f0\u65af\u5927\u6797\u683c\u52d2\u6216\u53f2\u8fbe\u6797\u683c\u52d2\uff08/Stalingrad\uff09\uff0c1961\u5e74\u6539\u79f0\u4f0f\u5c14\u52a0\u683c\u52d2\uff0c\u662f\u4fc4\u7f57\u65af\u5357\u90e8\u4f0f\u5c14\u52a0\u683c\u52d2\u5dde\u7684\u9996\u5e9c\u3002", "\u8449\u5361\u6377\u7433\u5821\uff0c\u4f4d\u65bc\u70cf\u62c9\u723e\u5c71\u6771\u9e93\uff0c1723\u5e74\u5efa\u57ce\u3002", "\u73fe\u5728\u662f\u70cf\u62c9\u723e\u806f\u90a6\u5340\u4e2d\u5fc3\u57ce\u5e02\u548c\u65af\u7dad\u723e\u5fb7\u6d1b\u592b\u65af\u514b\u5dde\u9996\u5e9c\u3002", "\u8a72\u57ce\u6b77\u4f86\u90fd\u662f\u4fc4\u7f85\u65af\u91cd\u8981\u7684\u5de5\u696d\u4e2d\u5fc3\uff0c\u4e5f\u662f\u70cf\u62c9\u723e\u5730\u5340\u6700\u5927\u57ce\u5e02\u3002", "\u65b0\u897f\u4f2f\u5229\u4e9e\uff0c\u4f4d\u65bc\u9102\u7562\u6cb3\u7554\uff0c1893\u5e74\u5efa\u57ce\uff0c\u662f\u65b0\u897f\u4f2f\u5229\u4e9e\u5dde\u9996\u5e9c\u548c\u4fc4\u7f85\u65af\u4eba\u53e3\u7b2c\u4e09\u5927\u57ce\u5e02\uff0c\u4e5f\u662f\u4fc4\u7f85\u65af\u8457\u540d\u7684\u79d1\u5b78\u57ce\u3002"], "sent_labels": [[0], [6], [6], [6], [6], [6], [0], [0], [6], [0], [0, 6], [0, 6]]}
9
+ {"doc_id": "\u641c\u5c0b\u5f15\u64ce\u884c\u92b7_\u5546\u4e1a", "word_labels": [[1, 11, 15, 22], [0, 3, 4, 9, 10, 11, 16, 24, 25], [6], [0], [4, 10], [2], [2, 6, 7, 8, 9], [0, 8, 11, 12, 15], [6, 12, 13, 19]], "doc_labels": [2], "sents": ["\u96a8\u8457\u7db2\u8def\u7ad9\u9ede\u65bc90\u5e74\u4ee3\u4e2d\u5f8c\u671f\u96e8\u5f8c\u6625\u7b4d\u822c\u589e\u52a0\uff0c\u641c\u5c0b\u5f15\u64ce\u958b\u59cb\u986f\u8457\u7684\u5e6b\u52a9\u4eba\u5011\u5feb\u901f\u7684\u5c0b\u627e\u6240\u8981\u8cc7\u8a0a\u3002", "\u641c\u5c0b\u5f15\u64ce\u958b\u767c\u5176\u5546\u696d\u6a21\u578b\u4ee5\u70ba\u5b83\u5011\u7684\u670d\u52d9\u7c4c\u63aa\u8cc7\u91d1\uff0c\u5982OpenText\u516c\u53f8\u65bc1996\u5e74\u63d0\u4f9b\u7684\u6bcf\u9ede\u64ca\u4ed8\u8cbb\u65b9\u6848\u4ee5\u53ca\u96a8\u5f8cGoto.", "com\u516c\u53f8\u65bc1998\u5e74\u7684\u985e\u4f3c\u65b9\u6848\u3002", "Goto.", "com\u96a8\u5f8c\u65bc2001\u5e74\u66f4\u540d\u70ba\u5546\u5e8f\u66f2(Overture)\u516c\u53f8\uff0c\u4e26\u4e14\u88abYahoo!", "\u65bc2003\u5e74\u6536\u8cfc\u3002", "\u73fe\u4eca\u900f\u904e\u96c5\u864e\u641c\u5c0b\u884c\u92b7\u63d0\u4f9b\u5ee3\u544a\u5546\u4ed8\u8cbb\u641c\u5c0b\u6a5f\u6703\u3002", "Google\u4e5f\u958b\u59cb\u65bc2000\u5e74\u900f\u904eGoogleAdWords\u65b9\u6848\uff0c\u65bc\u641c\u5c0b\u7d50\u679c\u9801\u63d0\u4f9b\u5ee3\u544a\u3002", "\u622a\u81f32007\u5e74\uff0c\u6bcf\u9ede\u64ca\u4ed8\u8cbb\u8b49\u660e\u4e86\u8a72\u65b9\u6848\u5c0d\u641c\u5c0b\u5f15\u64ce\u800c\u8a00\u662f\u9996\u8981\u7684\u300c\u5370\u9214\u6a5f\u300d\u3002"], "sent_labels": [[1], [2], [2], [6], [2], [2], [2], [2], [2]]}
10
+ {"doc_id": "\u6d88\u9664\u5bf9\u5987\u5973\u7684\u66b4\u529b\u884c\u4e3a\u5ba3\u8a00_\u6cd5\u5f8b", "word_labels": [[10, 15], [7, 9, 11, 13, 14, 16, 22], [1, 2, 8, 10, 19, 20, 21, 23], [1, 8, 10, 14, 16, 17, 21, 22, 24], [9, 10, 16, 22, 24, 26, 27, 29, 32, 34], [1, 2, 4, 8, 13], [3, 4, 6, 8, 10], [6, 8, 12, 14], [0]], "doc_labels": [3], "sents": ["\u76f4\u5230\u6700\u8fd1\uff0c\u56fd\u9645\u793e\u4f1a\u624d\u8ba4\u8bc6\u5230\u5987\u5973\u6709\u6743\u5229\u8fc7\u4e00\u79cd\u8fdc\u79bb\u66b4\u529b\u7684\u751f\u6d3b\u3002", "\u4ece\u5386\u53f2\u4e0a\u770b,\u5979\u4eec\u5bf9\u66b4\u529b\u53ca\u4fdd\u62a4\u4e86\u884c\u51f6\u8005\u7684\u514d\u7f5a\u7684\u6597\u4e89\u662f\u4e0e\u5979\u4eec\u4e3a\u4e86\u514b\u670d\u6b67\u89c6\u6240\u505a\u7684\u52aa\u529b\u5206\u4e0d\u5f00\u7684\u3002", "\u81ea\u8054\u5408\u56fd\u6210\u7acb\u4ee5\u6765\uff0c\u5b83\u4e00\u76f4\u5173\u5fc3\u5973\u6743\u7684\u8fdb\u6b65\uff0c\u4f46\u76f4\u52301993\u5e74\uff0c\u5b83\u624d\u5c06\u5973\u6027\u53cd\u6297\u66b4\u529b\u7684\u6597\u4e89\u5f53\u505a\u662f\u81ea\u5df1\u7684\u76ee\u6807\u3002", "\u8be5\u51b3\u8bae\u7684\u76ee\u7684\u4e4b\u4e00\u662f\u626d\u8f6c\u73b0\u4eca\u653f\u5e9c\u7684\u7acb\u573a,\u5373\u9488\u5bf9\u5987\u5973\u7684\u66b4\u529b\u884c\u4e3a\u662f\u4e00\u4e2a\u65e0\u9700\u56fd\u5bb6\u5e72\u9884\u7684\u79c1\u4eba\u95ee\u9898\u3002", "\u4e3a\u4e86\u7eaa\u5ff51993\u5e74\u76843\u67088\u65e5\u56fd\u9645\u5987\u5973\u8282,\u8054\u5408\u56fd\u79d8\u4e66\u957f\u5e03\u7279\u7f57\u65af\u00b7\u52a0\u5229\u9884\u5148\u4e3e\u884c\u4e86\u4e00\u4e2a\u53d1\u5e03\u4f1a\uff0c\u660e\u786e\u5730\u9610\u8ff0\u4e86\u8054\u5408\u56fd\u5728\u4fc3\u8fdb\u548c\u4fdd\u62a4\u5987\u5973\u7684\u6743\u5229\u65b9\u9762\u7684\u89d2\u8272\u548c\u8d23\u4efb\u3002", "\u201c\u4e3a\u4e86\u5987\u5973\u7684\u6743\u529b\u800c\u8fdb\u884c\u7684\u6597\u4e89\u8ddf\u521b\u5efa\u4e00\u4e2a\u5168\u65b0\u8054\u5408\u56fd\u7684\u4efb\u52a1\u662f\u76f8\u540c\u7684\u3002", "\u5b83\u4eec\u90fd\u80fd\u591f\u4fc3\u8fdb\u548c\u5e73\u4e0e\u57f9\u80b2\u548c\u7ef4\u6301\u4e86\u548c\u5e73\u7684\u4ef7\u503c\u89c2\u3002", "\u4eca\u5929\u6bd4\u4ee5\u5f80\u4efb\u4f55\u65f6\u5019\uff0c\u5973\u6027\u7684\u4e8b\u4e1a\u90fd\u66f4\u662f\u662f\u5168\u4eba\u7c7b\u7684\u4e8b\u4e1a\u3002", "\u201d"], "sent_labels": [[3], [3], [0, 3], [0, 3], [0, 3], [3], [3], [3], [6]]}
11
+ {"doc_id": "\u514b\u52de\u65af\u00b7\u99ae\u00b7\u65bd\u9676\u82ac\u8c9d\u683c_\u519b\u4e8b", "word_labels": [[1, 3, 5, 7, 12], [0, 6, 8, 15], [0, 12, 13, 14, 16, 18, 30, 31, 49], [1, 26, 32], [2, 8, 13], [4, 6, 8], [5, 6, 29, 31, 32, 33, 40, 46, 47, 48, 49, 52], [8, 9, 14], [2, 6, 8, 13, 26, 30, 32, 34, 35], [1, 8]], "doc_labels": [5], "sents": ["\u65bd\u9676\u82ac\u8d1d\u683c\u5728\u7b49\u5f85\u5e0c\u7279\u52d2\u3001\u5e0c\u59c6\u83b1\u3001\u6208\u6797\u4e00\u8d77\u51fa\u73b0\u65f6\u8fdb\u884c\u523a\u6740\uff0c\u4f46\u673a\u4f1a\u51e0\u6b21\u90fd\u6ca1\u6709\u51fa\u73b0\uff0c\u4e8e\u662f\u4ed6\u7ee7\u7eed\u4e8e1944\u5e747\u670820\u65e5\u5728\u72fc\u5821\uff08Wolfsschanze\uff09\u8fdb\u884c\u6d3b\u52a8\u3002", "\u65bd\u9676\u82ac\u4f2f\u683c\u5e26\u7740\u4e00\u4e2a\u88c5\u67092\u5305\u70b8\u836f\u8fdb\u5165\u7b80\u62a5\u5385\uff0c\u4f46\u662f\u4f1a\u8bae\u5730\u70b9\u4e0d\u5e78\u7531\u5821\u5792\u6df7\u51dd\u571f\u5efa\u7b51\u5185\u6539\u5230\u4e86\u65bd\u4f69\u5c14\u7684\u5c0f\u6728\u5c4b\uff0c\u56e0\u4e3a\u5929\u6c14\u592a\u70ed\u7684\u7f18\u6545\u3002", "\u65bd\u9676\u82ac\u8d1d\u683c\u85c9\u53e3\u9032\u5165\u51f1\u7279\u723e\u8fa6\u516c\u5ba4\u7684\u6d17\u624b\u9593\uff0c\u7528\u6e96\u5099\u597d\u7684\u9257\u5b50\u526a\u65b7\u539f\u5b50\u7b46\u578b\u7684\u96f7\u7ba1\uff08\u555f\u52d5\uff09\uff0c\u518d\u5c07\u5176\u63d2\u5165\u4e00\u584a1\u516c\u65a4\u7684\u70b8\u85e5\u584a\uff0c\u4f46\u7531\u4e8e\u4ed6\u5931\u53bb\u4e86\u53f3\u624b\uff0c\u5de6\u624b\u53c8\u53ea\u67093\u4e2a\u624b\u6307\uff0c\u4f7f\u7528\u94b3\u5b50\u9047\u5230\u56f0\u96be\u3002", "\u4e00\u4e2a\u8b66\u536b\u6572\u95e8\u50ac\u4fc3\u4ed6\u5feb\u70b9\uff0c\u56e0\u4e3a\u4f1a\u8bae\u9a6c\u4e0a\u5f00\u59cb\u4e86\uff0c\u5e76\u4e14\u4ed6\u6253\u5f00\u4e86\u95e8\uff0c\u56e0\u6b64\u4ed6\u6ca1\u80fd\u542f\u52a8\u7b2c\u4e8c\u5757\u70b8\u836f\uff0c\u628a\u5b83\u7559\u7ed9\u4e86\u526f\u5b98WernervonHaeften\u3002", "\u4ed6\u56de\u5230\u4f1a\u8bae\u5ba4\uff0c\u628a\u516c\u6587\u5305\u653e\u5728\u4f1a\u8bae\u684c\u4e0b\uff0c\u5c3d\u53ef\u80fd\u9760\u8fd1\u5e0c\u7279\u52d2\u3002", "\u51e0\u5206\u949f\u540e\uff0c\u501f\u53e3\u63a5\u7535\u8bdd\u79bb\u5f00\u4f1a\u8bae\u5ba4\u3002", "\u4ed6\u79bb\u5f00\u540e\u516c\u6587\u5305\u88ab\u6d77\u56e0\u8332\u00b7\u5e03\u862d\u5fb7\u4e0a\u6821\uff08ColonelHeinzBrandt\uff09\u632a\u52a8\u4e86\uff0c\u4ece\u539f\u4f4d\u79fb\u5230\u53e6\u4e00\u908a\u6703\u8b70\u684c\u89d2\u5f8c\uff0c\u5f9e\u800c\u8b93\u70b8\u5f48\u5728\u7206\u70b8\u6642\u5a01\u529b\u6e1b\u5f31\uff0c\u4ee4\u5e0c\u7279\u52d2\u4e0d\u88ab\u5176\u6bba\u5bb3\uff0c\u800c\u5e03\u862d\u5fb7\u5f8c\u4f86\u88ab\u70b8\u5f48\u70b8\u6b7b\uff0c\u4e26\u70b8\u65b7\u4e86\u4e00\u689d\u817f\u3002", "12\u9ede40\u81f350\u5206\u671f\u9593\uff0c\u70b8\u5f48\u5f15\u7206\uff0c\u5c07\u6703\u8b70\u5ba4\u6574\u500b\u6467\u6bc0\u3002", "\u4e09\u540d\u8ecd\u5b98\u548c\u4e00\u4f4d\u901f\u8a18\u54e1\u56e0\u6b64\u6b7b\u4ea1\uff0c\u7136\u800c\u4e3b\u8981\u76ee\u6a19\u5e0c\u7279\u52d2\u5c31\u548c\u5176\u4ed6\u4eba\u4e00\u6a23\u56e0\u70ba\u684c\u89d2\u4e4b\u539a\u5be6\u800c\u53ea\u53d7\u4e86\u8f15\u50b7\uff1a\u8932\u5b50\u88ab\u71d2\u7126\u4e14\u8033\u819c\u88ab\u9707\u7834\uff0c\u5982\u540c\u6703\u8b70\u5ba4\u5167\u53e6\u591624\u4eba\u3002", "\u4f46\u662f\u65bd\u9676\u82ac\u8d1d\u683c\u8ba4\u4e3a\u5c4b\u4e2d\u6ca1\u6709\u4eba\u80fd\u5e78\u5b58\u3002"], "sent_labels": [[5], [5], [5], [5], [5], [6], [5], [5], [5], [5]]}
12
+ {"doc_id": "\u8ecd\u6236_\u519b\u4e8b", "word_labels": [[2, 9, 11], [2, 3], [7, 15, 17, 20, 21], [4, 8], [24], [0], [0, 2, 4, 6, 8], [0, 2, 4, 6, 8], [0, 2, 4, 6, 8, 10], [3, 4, 6, 7, 9], [4, 9], [2, 3, 10]], "doc_labels": [5], "sents": ["\u6240\u8b02\u7684\u8ecd\u6236\u5c31\u662f\u5176\u6236\u7c4d\u7a2e\u985e\u5c6c\u65bc\u8ecd\u7c4d\u7684\u8ecd\u4eba\u3002", "\u660e\u4ee3\u7684\u8ecd\u6236\u5236\u5ea6\u539f\u5247\u4e0a\u662f\u627f\u8972\u5143\u4ee3\u6236\u7c4d\u7684\u5206\u985e\u800c\u4f86\u7684\u3002", "\u300a\u5143\u53f2\u300b\uff0c\u5377\u4e5d\u516b\uff0c\u3008\u5175\u5236\u4e00\u3009\u4e91\uff1a\u300c\u5929\u4e0b\u65e2\u5e73\uff0c\u5617\u70ba\u8ecd\u8005\u5b9a\u5165\u5c3a\u7c4d\u4f0d\u7b26\uff0c\u4e0d\u53ef\u66f4\u6613\u3002", "\u300d\u6240\u4ee5\u5143\u4ee3\u6709\u8ecd\u4eba\u5c08\u5c6c\u4e4b\u300c\u8ecd\u6236\u300d\u3002", "\u660e\u4ee3\u7684\u6236\u7c4d\u5206\u985e\uff0c\u4f9d\u300a\u660e\u53f2\u300b\u3008\u98df\u8ca8\u5fd7\u3009\u300c\u6236\u53e3\u300d\u4e91\uff1a\u300c\u51e1\u6236\u4e09\u7b49\uff0c\u66f0\u6c11\u3001\u66f0\u8ecd\u3001\u66f0\u5320\u3002", "\u6c11\u6709\u5112\uff0c\u6709\u91ab\uff0c\u6709\u9670\u967d\u3002", "\u8ecd\u6709\u6821\u5c09\uff0c\u6709\u529b\u58eb\uff0c\u5f13\u3001\u8216\u5175\u3002", "\u5320\u6709\u5eda\u5f79\u3001\u88c1\u7e2b\u3001\u99ac\u3001\u8239\u4e4b\u985e\u3002", "\u7015\u6d77\u6709\u9e7d\u3001\u7ac8\uff0c\u5bfa\u6709\u50e7\uff0c\u89c0\u6709\u9053\u58eb\u3002", "\u7562\u4ee5\u5176\u696d\u8457\u7c4d\uff0c\u4eba\u6236\u4ee5\u7c4d\u70ba\u65b7\u3002", "\u300d\u6240\u4ee5\u6709\u5c08\u70ba\u8ecd\u4eba\u800c\u8a2d\u4e4b\u300c\u8ecd\u6236\u300d\u3002", "\u9664\u975e\u6210\u4e3a\u5175\u90e8\u5c1a\u4e66\uff0c\u5426\u5219\u4e5f\u8981\u4e16\u4e16\u505a\u519b\u3002"], "sent_labels": [[5], [5], [5], [5], [5], [0, 6], [5], [6], [1, 6], [0], [5], [5]]}