Spaces:
Running
Running
“[shujaatalishariati]”
commited on
Commit
·
847e3e1
1
Parent(s):
9367038
Initial commit for Gradio app with GECToR
Browse files- app.py +37 -36
- gector/bert_token_embedder.py +269 -0
- gector/datareader.py +151 -0
- gector/gec_model.py +298 -0
- gector/seq2labels_model.py +194 -0
- gector/tokenization.py +181 -0
- gector/tokenizer_indexer.py +161 -0
- gector/trainer.py +845 -0
- output_vocabulary/d_tags.txt +4 -0
- output_vocabulary/labels.txt +5002 -0
- output_vocabulary/non_padded_namespaces.txt +2 -0
- requirements.txt +8 -4
- utils/filter_brackets.py +35 -0
- utils/helpers.py +233 -0
- utils/prepare_clc_fce_data.py +123 -0
- utils/preprocess_data.py +488 -0
app.py
CHANGED
|
@@ -7,6 +7,8 @@ import nltk
|
|
| 7 |
from nltk.corpus import wordnet
|
| 8 |
from textblob import TextBlob
|
| 9 |
from pattern.en import conjugate, lemma, pluralize, singularize
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# Initialize the English text classification pipeline for AI detection
|
| 12 |
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
|
|
@@ -84,29 +86,41 @@ def correct_singular_plural_errors(text):
|
|
| 84 |
|
| 85 |
return ' '.join(corrected_text)
|
| 86 |
|
| 87 |
-
# Function to check and correct article errors
|
| 88 |
-
def correct_article_errors(text):
|
| 89 |
-
doc = nlp(text)
|
| 90 |
-
corrected_text = []
|
| 91 |
-
for token in doc:
|
| 92 |
-
if token.text in ['a', 'an']:
|
| 93 |
-
next_token = token.nbor(1)
|
| 94 |
-
if token.text == "a" and next_token.text[0].lower() in "aeiou":
|
| 95 |
-
corrected_text.append("an")
|
| 96 |
-
elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
|
| 97 |
-
corrected_text.append("a")
|
| 98 |
-
else:
|
| 99 |
-
corrected_text.append(token.text)
|
| 100 |
-
else:
|
| 101 |
-
corrected_text.append(token.text)
|
| 102 |
-
return ' '.join(corrected_text)
|
| 103 |
-
|
| 104 |
# Function to correct overall grammar using TextBlob
|
| 105 |
-
def
|
| 106 |
blob = TextBlob(text)
|
| 107 |
corrected_text = str(blob.correct()) # TextBlob's built-in grammar correction
|
| 108 |
return corrected_text
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
# Paraphrasing function using SpaCy and NLTK (Humanifier)
|
| 111 |
def paraphrase_with_spacy_nltk(text):
|
| 112 |
doc = nlp(text)
|
|
@@ -132,28 +146,17 @@ def paraphrase_with_spacy_nltk(text):
|
|
| 132 |
else:
|
| 133 |
paraphrased_words.append(token.text)
|
| 134 |
|
| 135 |
-
|
| 136 |
-
paraphrased_sentence = ' '.join(paraphrased_words)
|
| 137 |
-
|
| 138 |
-
return paraphrased_sentence
|
| 139 |
|
| 140 |
# Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
|
| 141 |
def paraphrase_and_correct(text):
|
| 142 |
# Step 1: Paraphrase the text
|
| 143 |
paraphrased_text = paraphrase_with_spacy_nltk(text)
|
| 144 |
|
| 145 |
-
# Step 2: Apply grammatical corrections
|
| 146 |
-
corrected_text =
|
| 147 |
-
corrected_text = capitalize_sentences_and_nouns(corrected_text)
|
| 148 |
-
corrected_text = correct_singular_plural_errors(corrected_text)
|
| 149 |
|
| 150 |
-
|
| 151 |
-
corrected_text = correct_tense_errors(corrected_text)
|
| 152 |
-
|
| 153 |
-
# Step 4: Correct overall grammar using TextBlob
|
| 154 |
-
final_text = correct_grammar(corrected_text)
|
| 155 |
-
|
| 156 |
-
return final_text
|
| 157 |
|
| 158 |
# Gradio app setup with two tabs
|
| 159 |
with gr.Blocks() as demo:
|
|
@@ -163,15 +166,13 @@ with gr.Blocks() as demo:
|
|
| 163 |
label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
|
| 164 |
score1 = gr.Textbox(lines=1, label='Prob')
|
| 165 |
|
| 166 |
-
# Connect the prediction function to the button
|
| 167 |
button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
|
| 168 |
|
| 169 |
with gr.Tab("Humanifier"):
|
| 170 |
text_input = gr.Textbox(lines=5, label="Input Text")
|
| 171 |
paraphrase_button = gr.Button("Paraphrase & Correct")
|
| 172 |
-
output_text = gr.Textbox(label="Paraphrased Text")
|
| 173 |
|
| 174 |
-
# Connect the paraphrasing function to the button
|
| 175 |
paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
|
| 176 |
|
| 177 |
# Launch the app
|
|
|
|
| 7 |
from nltk.corpus import wordnet
|
| 8 |
from textblob import TextBlob
|
| 9 |
from pattern.en import conjugate, lemma, pluralize, singularize
|
| 10 |
+
from gector.gec_model import GecBERTModel # Import GECToR Model
|
| 11 |
+
from utils.helpers import read_lines, normalize # GECToR utilities
|
| 12 |
|
| 13 |
# Initialize the English text classification pipeline for AI detection
|
| 14 |
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
|
|
|
|
| 86 |
|
| 87 |
return ' '.join(corrected_text)
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
# Function to correct overall grammar using TextBlob
|
| 90 |
+
def correct_grammar_textblob(text):
|
| 91 |
blob = TextBlob(text)
|
| 92 |
corrected_text = str(blob.correct()) # TextBlob's built-in grammar correction
|
| 93 |
return corrected_text
|
| 94 |
|
| 95 |
+
# Initialize GECToR Model for Grammar Correction
|
| 96 |
+
def load_gector_model():
|
| 97 |
+
model_path = ["gector/roberta_1_gector.th"] # Ensure model file is placed correctly
|
| 98 |
+
vocab_path = "output_vocabulary"
|
| 99 |
+
model = GecBERTModel(vocab_path=vocab_path,
|
| 100 |
+
model_paths=model_path,
|
| 101 |
+
max_len=50,
|
| 102 |
+
min_len=3,
|
| 103 |
+
iterations=5,
|
| 104 |
+
min_error_probability=0.0,
|
| 105 |
+
lowercase_tokens=0,
|
| 106 |
+
model_name="roberta",
|
| 107 |
+
special_tokens_fix=1,
|
| 108 |
+
log=False,
|
| 109 |
+
confidence=0,
|
| 110 |
+
del_confidence=0,
|
| 111 |
+
is_ensemble=False,
|
| 112 |
+
weigths=None)
|
| 113 |
+
return model
|
| 114 |
+
|
| 115 |
+
# Load the GECToR model
|
| 116 |
+
gector_model = load_gector_model()
|
| 117 |
+
|
| 118 |
+
# Function to correct grammar using GECToR
|
| 119 |
+
def correct_grammar_gector(text):
|
| 120 |
+
sentences = [text.split()]
|
| 121 |
+
corrected_sentences, _ = gector_model.handle_batch(sentences)
|
| 122 |
+
return " ".join(corrected_sentences[0])
|
| 123 |
+
|
| 124 |
# Paraphrasing function using SpaCy and NLTK (Humanifier)
|
| 125 |
def paraphrase_with_spacy_nltk(text):
|
| 126 |
doc = nlp(text)
|
|
|
|
| 146 |
else:
|
| 147 |
paraphrased_words.append(token.text)
|
| 148 |
|
| 149 |
+
return ' '.join(paraphrased_words)
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
# Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
|
| 152 |
def paraphrase_and_correct(text):
|
| 153 |
# Step 1: Paraphrase the text
|
| 154 |
paraphrased_text = paraphrase_with_spacy_nltk(text)
|
| 155 |
|
| 156 |
+
# Step 2: Apply grammatical corrections using GECToR
|
| 157 |
+
corrected_text = correct_grammar_gector(paraphrased_text)
|
|
|
|
|
|
|
| 158 |
|
| 159 |
+
return corrected_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
# Gradio app setup with two tabs
|
| 162 |
with gr.Blocks() as demo:
|
|
|
|
| 166 |
label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
|
| 167 |
score1 = gr.Textbox(lines=1, label='Prob')
|
| 168 |
|
|
|
|
| 169 |
button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
|
| 170 |
|
| 171 |
with gr.Tab("Humanifier"):
|
| 172 |
text_input = gr.Textbox(lines=5, label="Input Text")
|
| 173 |
paraphrase_button = gr.Button("Paraphrase & Correct")
|
| 174 |
+
output_text = gr.Textbox(label="Paraphrased and Corrected Text")
|
| 175 |
|
|
|
|
| 176 |
paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
|
| 177 |
|
| 178 |
# Launch the app
|
gector/bert_token_embedder.py
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tweaked version of corresponding AllenNLP file"""
|
| 2 |
+
import logging
|
| 3 |
+
from copy import deepcopy
|
| 4 |
+
from typing import Dict
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn.functional as F
|
| 8 |
+
from allennlp.modules.token_embedders.token_embedder import TokenEmbedder
|
| 9 |
+
from allennlp.nn import util
|
| 10 |
+
from transformers import AutoModel, PreTrainedModel
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class PretrainedBertModel:
|
| 16 |
+
"""
|
| 17 |
+
In some instances you may want to load the same BERT model twice
|
| 18 |
+
(e.g. to use as a token embedder and also as a pooling layer).
|
| 19 |
+
This factory provides a cache so that you don't actually have to load the model twice.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
_cache: Dict[str, PreTrainedModel] = {}
|
| 23 |
+
|
| 24 |
+
@classmethod
|
| 25 |
+
def load(cls, model_name: str, cache_model: bool = True) -> PreTrainedModel:
|
| 26 |
+
if model_name in cls._cache:
|
| 27 |
+
return PretrainedBertModel._cache[model_name]
|
| 28 |
+
|
| 29 |
+
model = AutoModel.from_pretrained(model_name)
|
| 30 |
+
if cache_model:
|
| 31 |
+
cls._cache[model_name] = model
|
| 32 |
+
|
| 33 |
+
return model
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class BertEmbedder(TokenEmbedder):
|
| 37 |
+
"""
|
| 38 |
+
A ``TokenEmbedder`` that produces BERT embeddings for your tokens.
|
| 39 |
+
Should be paired with a ``BertIndexer``, which produces wordpiece ids.
|
| 40 |
+
Most likely you probably want to use ``PretrainedBertEmbedder``
|
| 41 |
+
for one of the named pretrained models, not this base class.
|
| 42 |
+
Parameters
|
| 43 |
+
----------
|
| 44 |
+
bert_model: ``BertModel``
|
| 45 |
+
The BERT model being wrapped.
|
| 46 |
+
top_layer_only: ``bool``, optional (default = ``False``)
|
| 47 |
+
If ``True``, then only return the top layer instead of apply the scalar mix.
|
| 48 |
+
max_pieces : int, optional (default: 512)
|
| 49 |
+
The BERT embedder uses positional embeddings and so has a corresponding
|
| 50 |
+
maximum length for its input ids. Assuming the inputs are windowed
|
| 51 |
+
and padded appropriately by this length, the embedder will split them into a
|
| 52 |
+
large batch, feed them into BERT, and recombine the output as if it was a
|
| 53 |
+
longer sequence.
|
| 54 |
+
num_start_tokens : int, optional (default: 1)
|
| 55 |
+
The number of starting special tokens input to BERT (usually 1, i.e., [CLS])
|
| 56 |
+
num_end_tokens : int, optional (default: 1)
|
| 57 |
+
The number of ending tokens input to BERT (usually 1, i.e., [SEP])
|
| 58 |
+
scalar_mix_parameters: ``List[float]``, optional, (default = None)
|
| 59 |
+
If not ``None``, use these scalar mix parameters to weight the representations
|
| 60 |
+
produced by different layers. These mixing weights are not updated during
|
| 61 |
+
training.
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
def __init__(
|
| 65 |
+
self,
|
| 66 |
+
bert_model: PreTrainedModel,
|
| 67 |
+
top_layer_only: bool = False,
|
| 68 |
+
max_pieces: int = 512,
|
| 69 |
+
num_start_tokens: int = 1,
|
| 70 |
+
num_end_tokens: int = 1
|
| 71 |
+
) -> None:
|
| 72 |
+
super().__init__()
|
| 73 |
+
self.bert_model = deepcopy(bert_model)
|
| 74 |
+
self.output_dim = bert_model.config.hidden_size
|
| 75 |
+
self.max_pieces = max_pieces
|
| 76 |
+
self.num_start_tokens = num_start_tokens
|
| 77 |
+
self.num_end_tokens = num_end_tokens
|
| 78 |
+
self._scalar_mix = None
|
| 79 |
+
|
| 80 |
+
def set_weights(self, freeze):
|
| 81 |
+
for param in self.bert_model.parameters():
|
| 82 |
+
param.requires_grad = not freeze
|
| 83 |
+
return
|
| 84 |
+
|
| 85 |
+
def get_output_dim(self) -> int:
|
| 86 |
+
return self.output_dim
|
| 87 |
+
|
| 88 |
+
def forward(
|
| 89 |
+
self,
|
| 90 |
+
input_ids: torch.LongTensor,
|
| 91 |
+
offsets: torch.LongTensor = None
|
| 92 |
+
) -> torch.Tensor:
|
| 93 |
+
"""
|
| 94 |
+
Parameters
|
| 95 |
+
----------
|
| 96 |
+
input_ids : ``torch.LongTensor``
|
| 97 |
+
The (batch_size, ..., max_sequence_length) tensor of wordpiece ids.
|
| 98 |
+
offsets : ``torch.LongTensor``, optional
|
| 99 |
+
The BERT embeddings are one per wordpiece. However it's possible/likely
|
| 100 |
+
you might want one per original token. In that case, ``offsets``
|
| 101 |
+
represents the indices of the desired wordpiece for each original token.
|
| 102 |
+
Depending on how your token indexer is configured, this could be the
|
| 103 |
+
position of the last wordpiece for each token, or it could be the position
|
| 104 |
+
of the first wordpiece for each token.
|
| 105 |
+
For example, if you had the sentence "Definitely not", and if the corresponding
|
| 106 |
+
wordpieces were ["Def", "##in", "##ite", "##ly", "not"], then the input_ids
|
| 107 |
+
would be 5 wordpiece ids, and the "last wordpiece" offsets would be [3, 4].
|
| 108 |
+
If offsets are provided, the returned tensor will contain only the wordpiece
|
| 109 |
+
embeddings at those positions, and (in particular) will contain one embedding
|
| 110 |
+
per token. If offsets are not provided, the entire tensor of wordpiece embeddings
|
| 111 |
+
will be returned.
|
| 112 |
+
"""
|
| 113 |
+
|
| 114 |
+
batch_size, full_seq_len = input_ids.size(0), input_ids.size(-1)
|
| 115 |
+
initial_dims = list(input_ids.shape[:-1])
|
| 116 |
+
|
| 117 |
+
# The embedder may receive an input tensor that has a sequence length longer than can
|
| 118 |
+
# be fit. In that case, we should expect the wordpiece indexer to create padded windows
|
| 119 |
+
# of length `self.max_pieces` for us, and have them concatenated into one long sequence.
|
| 120 |
+
# E.g., "[CLS] I went to the [SEP] [CLS] to the store to [SEP] ..."
|
| 121 |
+
# We can then split the sequence into sub-sequences of that length, and concatenate them
|
| 122 |
+
# along the batch dimension so we effectively have one huge batch of partial sentences.
|
| 123 |
+
# This can then be fed into BERT without any sentence length issues. Keep in mind
|
| 124 |
+
# that the memory consumption can dramatically increase for large batches with extremely
|
| 125 |
+
# long sentences.
|
| 126 |
+
needs_split = full_seq_len > self.max_pieces
|
| 127 |
+
last_window_size = 0
|
| 128 |
+
if needs_split:
|
| 129 |
+
# Split the flattened list by the window size, `max_pieces`
|
| 130 |
+
split_input_ids = list(input_ids.split(self.max_pieces, dim=-1))
|
| 131 |
+
|
| 132 |
+
# We want all sequences to be the same length, so pad the last sequence
|
| 133 |
+
last_window_size = split_input_ids[-1].size(-1)
|
| 134 |
+
padding_amount = self.max_pieces - last_window_size
|
| 135 |
+
split_input_ids[-1] = F.pad(split_input_ids[-1], pad=[0, padding_amount], value=0)
|
| 136 |
+
|
| 137 |
+
# Now combine the sequences along the batch dimension
|
| 138 |
+
input_ids = torch.cat(split_input_ids, dim=0)
|
| 139 |
+
|
| 140 |
+
input_mask = (input_ids != 0).long()
|
| 141 |
+
# input_ids may have extra dimensions, so we reshape down to 2-d
|
| 142 |
+
# before calling the BERT model and then reshape back at the end.
|
| 143 |
+
all_encoder_layers = self.bert_model(
|
| 144 |
+
input_ids=util.combine_initial_dims(input_ids),
|
| 145 |
+
attention_mask=util.combine_initial_dims(input_mask),
|
| 146 |
+
)[0]
|
| 147 |
+
if len(all_encoder_layers[0].shape) == 3:
|
| 148 |
+
all_encoder_layers = torch.stack(all_encoder_layers)
|
| 149 |
+
elif len(all_encoder_layers[0].shape) == 2:
|
| 150 |
+
all_encoder_layers = torch.unsqueeze(all_encoder_layers, dim=0)
|
| 151 |
+
|
| 152 |
+
if needs_split:
|
| 153 |
+
# First, unpack the output embeddings into one long sequence again
|
| 154 |
+
unpacked_embeddings = torch.split(all_encoder_layers, batch_size, dim=1)
|
| 155 |
+
unpacked_embeddings = torch.cat(unpacked_embeddings, dim=2)
|
| 156 |
+
|
| 157 |
+
# Next, select indices of the sequence such that it will result in embeddings representing the original
|
| 158 |
+
# sentence. To capture maximal context, the indices will be the middle part of each embedded window
|
| 159 |
+
# sub-sequence (plus any leftover start and final edge windows), e.g.,
|
| 160 |
+
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
|
| 161 |
+
# "[CLS] I went to the very fine [SEP] [CLS] the very fine store to eat [SEP]"
|
| 162 |
+
# with max_pieces = 8 should produce max context indices [2, 3, 4, 10, 11, 12] with additional start
|
| 163 |
+
# and final windows with indices [0, 1] and [14, 15] respectively.
|
| 164 |
+
|
| 165 |
+
# Find the stride as half the max pieces, ignoring the special start and end tokens
|
| 166 |
+
# Calculate an offset to extract the centermost embeddings of each window
|
| 167 |
+
stride = (self.max_pieces - self.num_start_tokens - self.num_end_tokens) // 2
|
| 168 |
+
stride_offset = stride // 2 + self.num_start_tokens
|
| 169 |
+
|
| 170 |
+
first_window = list(range(stride_offset))
|
| 171 |
+
|
| 172 |
+
max_context_windows = [
|
| 173 |
+
i
|
| 174 |
+
for i in range(full_seq_len)
|
| 175 |
+
if stride_offset - 1 < i % self.max_pieces < stride_offset + stride
|
| 176 |
+
]
|
| 177 |
+
|
| 178 |
+
# Lookback what's left, unless it's the whole self.max_pieces window
|
| 179 |
+
if full_seq_len % self.max_pieces == 0:
|
| 180 |
+
lookback = self.max_pieces
|
| 181 |
+
else:
|
| 182 |
+
lookback = full_seq_len % self.max_pieces
|
| 183 |
+
|
| 184 |
+
final_window_start = full_seq_len - lookback + stride_offset + stride
|
| 185 |
+
final_window = list(range(final_window_start, full_seq_len))
|
| 186 |
+
|
| 187 |
+
select_indices = first_window + max_context_windows + final_window
|
| 188 |
+
|
| 189 |
+
initial_dims.append(len(select_indices))
|
| 190 |
+
|
| 191 |
+
recombined_embeddings = unpacked_embeddings[:, :, select_indices]
|
| 192 |
+
else:
|
| 193 |
+
recombined_embeddings = all_encoder_layers
|
| 194 |
+
|
| 195 |
+
# Recombine the outputs of all layers
|
| 196 |
+
# (layers, batch_size * d1 * ... * dn, sequence_length, embedding_dim)
|
| 197 |
+
# recombined = torch.cat(combined, dim=2)
|
| 198 |
+
input_mask = (recombined_embeddings != 0).long()
|
| 199 |
+
|
| 200 |
+
if self._scalar_mix is not None:
|
| 201 |
+
mix = self._scalar_mix(recombined_embeddings, input_mask)
|
| 202 |
+
else:
|
| 203 |
+
mix = recombined_embeddings[-1]
|
| 204 |
+
|
| 205 |
+
# At this point, mix is (batch_size * d1 * ... * dn, sequence_length, embedding_dim)
|
| 206 |
+
|
| 207 |
+
if offsets is None:
|
| 208 |
+
# Resize to (batch_size, d1, ..., dn, sequence_length, embedding_dim)
|
| 209 |
+
dims = initial_dims if needs_split else input_ids.size()
|
| 210 |
+
return util.uncombine_initial_dims(mix, dims)
|
| 211 |
+
else:
|
| 212 |
+
# offsets is (batch_size, d1, ..., dn, orig_sequence_length)
|
| 213 |
+
offsets2d = util.combine_initial_dims(offsets)
|
| 214 |
+
# now offsets is (batch_size * d1 * ... * dn, orig_sequence_length)
|
| 215 |
+
range_vector = util.get_range_vector(
|
| 216 |
+
offsets2d.size(0), device=util.get_device_of(mix)
|
| 217 |
+
).unsqueeze(1)
|
| 218 |
+
# selected embeddings is also (batch_size * d1 * ... * dn, orig_sequence_length)
|
| 219 |
+
selected_embeddings = mix[range_vector, offsets2d]
|
| 220 |
+
|
| 221 |
+
return util.uncombine_initial_dims(selected_embeddings, offsets.size())
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
# @TokenEmbedder.register("bert-pretrained")
|
| 225 |
+
class PretrainedBertEmbedder(BertEmbedder):
|
| 226 |
+
|
| 227 |
+
"""
|
| 228 |
+
Parameters
|
| 229 |
+
----------
|
| 230 |
+
pretrained_model: ``str``
|
| 231 |
+
Either the name of the pretrained model to use (e.g. 'bert-base-uncased'),
|
| 232 |
+
or the path to the .tar.gz file with the model weights.
|
| 233 |
+
If the name is a key in the list of pretrained models at
|
| 234 |
+
https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/pytorch_pretrained_bert/modeling.py#L41
|
| 235 |
+
the corresponding path will be used; otherwise it will be interpreted as a path or URL.
|
| 236 |
+
requires_grad : ``bool``, optional (default = False)
|
| 237 |
+
If True, compute gradient of BERT parameters for fine tuning.
|
| 238 |
+
top_layer_only: ``bool``, optional (default = ``False``)
|
| 239 |
+
If ``True``, then only return the top layer instead of apply the scalar mix.
|
| 240 |
+
scalar_mix_parameters: ``List[float]``, optional, (default = None)
|
| 241 |
+
If not ``None``, use these scalar mix parameters to weight the representations
|
| 242 |
+
produced by different layers. These mixing weights are not updated during
|
| 243 |
+
training.
|
| 244 |
+
"""
|
| 245 |
+
|
| 246 |
+
def __init__(
|
| 247 |
+
self,
|
| 248 |
+
pretrained_model: str,
|
| 249 |
+
requires_grad: bool = False,
|
| 250 |
+
top_layer_only: bool = False,
|
| 251 |
+
special_tokens_fix: int = 0,
|
| 252 |
+
) -> None:
|
| 253 |
+
model = PretrainedBertModel.load(pretrained_model)
|
| 254 |
+
|
| 255 |
+
for param in model.parameters():
|
| 256 |
+
param.requires_grad = requires_grad
|
| 257 |
+
|
| 258 |
+
super().__init__(
|
| 259 |
+
bert_model=model,
|
| 260 |
+
top_layer_only=top_layer_only
|
| 261 |
+
)
|
| 262 |
+
|
| 263 |
+
if special_tokens_fix:
|
| 264 |
+
try:
|
| 265 |
+
vocab_size = self.bert_model.embeddings.word_embeddings.num_embeddings
|
| 266 |
+
except AttributeError:
|
| 267 |
+
# reserve more space
|
| 268 |
+
vocab_size = self.bert_model.word_embedding.num_embeddings + 5
|
| 269 |
+
self.bert_model.resize_token_embeddings(vocab_size + 1)
|
gector/datareader.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tweaked AllenNLP dataset reader."""
|
| 2 |
+
import logging
|
| 3 |
+
import re
|
| 4 |
+
from random import random
|
| 5 |
+
from typing import Dict, List
|
| 6 |
+
|
| 7 |
+
from allennlp.common.file_utils import cached_path
|
| 8 |
+
from allennlp.data.dataset_readers.dataset_reader import DatasetReader
|
| 9 |
+
from allennlp.data.fields import TextField, SequenceLabelField, MetadataField, Field
|
| 10 |
+
from allennlp.data.instance import Instance
|
| 11 |
+
from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer
|
| 12 |
+
from allennlp.data.tokenizers import Token
|
| 13 |
+
from overrides import overrides
|
| 14 |
+
|
| 15 |
+
from utils.helpers import SEQ_DELIMETERS, START_TOKEN
|
| 16 |
+
|
| 17 |
+
logger = logging.getLogger(__name__) # pylint: disable=invalid-name
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@DatasetReader.register("seq2labels_datareader")
|
| 21 |
+
class Seq2LabelsDatasetReader(DatasetReader):
|
| 22 |
+
"""
|
| 23 |
+
Reads instances from a pretokenised file where each line is in the following format:
|
| 24 |
+
|
| 25 |
+
WORD###TAG [TAB] WORD###TAG [TAB] ..... \n
|
| 26 |
+
|
| 27 |
+
and converts it into a ``Dataset`` suitable for sequence tagging. You can also specify
|
| 28 |
+
alternative delimiters in the constructor.
|
| 29 |
+
|
| 30 |
+
Parameters
|
| 31 |
+
----------
|
| 32 |
+
delimiters: ``dict``
|
| 33 |
+
The dcitionary with all delimeters.
|
| 34 |
+
token_indexers : ``Dict[str, TokenIndexer]``, optional (default=``{"tokens": SingleIdTokenIndexer()}``)
|
| 35 |
+
We use this to define the input representation for the text. See :class:`TokenIndexer`.
|
| 36 |
+
Note that the `output` tags will always correspond to single token IDs based on how they
|
| 37 |
+
are pre-tokenised in the data file.
|
| 38 |
+
max_len: if set than will truncate long sentences
|
| 39 |
+
"""
|
| 40 |
+
# fix broken sentences mostly in Lang8
|
| 41 |
+
BROKEN_SENTENCES_REGEXP = re.compile(r'\.[a-zA-RT-Z]')
|
| 42 |
+
|
| 43 |
+
def __init__(self,
|
| 44 |
+
token_indexers: Dict[str, TokenIndexer] = None,
|
| 45 |
+
delimeters: dict = SEQ_DELIMETERS,
|
| 46 |
+
skip_correct: bool = False,
|
| 47 |
+
skip_complex: int = 0,
|
| 48 |
+
lazy: bool = False,
|
| 49 |
+
max_len: int = None,
|
| 50 |
+
test_mode: bool = False,
|
| 51 |
+
tag_strategy: str = "keep_one",
|
| 52 |
+
tn_prob: float = 0,
|
| 53 |
+
tp_prob: float = 0,
|
| 54 |
+
broken_dot_strategy: str = "keep") -> None:
|
| 55 |
+
super().__init__(lazy)
|
| 56 |
+
self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
|
| 57 |
+
self._delimeters = delimeters
|
| 58 |
+
self._max_len = max_len
|
| 59 |
+
self._skip_correct = skip_correct
|
| 60 |
+
self._skip_complex = skip_complex
|
| 61 |
+
self._tag_strategy = tag_strategy
|
| 62 |
+
self._broken_dot_strategy = broken_dot_strategy
|
| 63 |
+
self._test_mode = test_mode
|
| 64 |
+
self._tn_prob = tn_prob
|
| 65 |
+
self._tp_prob = tp_prob
|
| 66 |
+
|
| 67 |
+
@overrides
|
| 68 |
+
def _read(self, file_path):
|
| 69 |
+
# if `file_path` is a URL, redirect to the cache
|
| 70 |
+
file_path = cached_path(file_path)
|
| 71 |
+
with open(file_path, "r") as data_file:
|
| 72 |
+
logger.info("Reading instances from lines in file at: %s", file_path)
|
| 73 |
+
for line in data_file:
|
| 74 |
+
line = line.strip("\n")
|
| 75 |
+
# skip blank and broken lines
|
| 76 |
+
if not line or (not self._test_mode and self._broken_dot_strategy == 'skip'
|
| 77 |
+
and self.BROKEN_SENTENCES_REGEXP.search(line) is not None):
|
| 78 |
+
continue
|
| 79 |
+
|
| 80 |
+
tokens_and_tags = [pair.rsplit(self._delimeters['labels'], 1)
|
| 81 |
+
for pair in line.split(self._delimeters['tokens'])]
|
| 82 |
+
try:
|
| 83 |
+
tokens = [Token(token) for token, tag in tokens_and_tags]
|
| 84 |
+
tags = [tag for token, tag in tokens_and_tags]
|
| 85 |
+
except ValueError:
|
| 86 |
+
tokens = [Token(token[0]) for token in tokens_and_tags]
|
| 87 |
+
tags = None
|
| 88 |
+
|
| 89 |
+
if tokens and tokens[0] != Token(START_TOKEN):
|
| 90 |
+
tokens = [Token(START_TOKEN)] + tokens
|
| 91 |
+
|
| 92 |
+
words = [x.text for x in tokens]
|
| 93 |
+
if self._max_len is not None:
|
| 94 |
+
tokens = tokens[:self._max_len]
|
| 95 |
+
tags = None if tags is None else tags[:self._max_len]
|
| 96 |
+
instance = self.text_to_instance(tokens, tags, words)
|
| 97 |
+
if instance:
|
| 98 |
+
yield instance
|
| 99 |
+
|
| 100 |
+
def extract_tags(self, tags: List[str]):
|
| 101 |
+
op_del = self._delimeters['operations']
|
| 102 |
+
|
| 103 |
+
labels = [x.split(op_del) for x in tags]
|
| 104 |
+
|
| 105 |
+
comlex_flag_dict = {}
|
| 106 |
+
# get flags
|
| 107 |
+
for i in range(5):
|
| 108 |
+
idx = i + 1
|
| 109 |
+
comlex_flag_dict[idx] = sum([len(x) > idx for x in labels])
|
| 110 |
+
|
| 111 |
+
if self._tag_strategy == "keep_one":
|
| 112 |
+
# get only first candidates for r_tags in right and the last for left
|
| 113 |
+
labels = [x[0] for x in labels]
|
| 114 |
+
elif self._tag_strategy == "merge_all":
|
| 115 |
+
# consider phrases as a words
|
| 116 |
+
pass
|
| 117 |
+
else:
|
| 118 |
+
raise Exception("Incorrect tag strategy")
|
| 119 |
+
|
| 120 |
+
detect_tags = ["CORRECT" if label == "$KEEP" else "INCORRECT" for label in labels]
|
| 121 |
+
return labels, detect_tags, comlex_flag_dict
|
| 122 |
+
|
| 123 |
+
def text_to_instance(self, tokens: List[Token], tags: List[str] = None,
|
| 124 |
+
words: List[str] = None) -> Instance: # type: ignore
|
| 125 |
+
"""
|
| 126 |
+
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
|
| 127 |
+
"""
|
| 128 |
+
# pylint: disable=arguments-differ
|
| 129 |
+
fields: Dict[str, Field] = {}
|
| 130 |
+
sequence = TextField(tokens, self._token_indexers)
|
| 131 |
+
fields["tokens"] = sequence
|
| 132 |
+
fields["metadata"] = MetadataField({"words": words})
|
| 133 |
+
if tags is not None:
|
| 134 |
+
labels, detect_tags, complex_flag_dict = self.extract_tags(tags)
|
| 135 |
+
if self._skip_complex and complex_flag_dict[self._skip_complex] > 0:
|
| 136 |
+
return None
|
| 137 |
+
rnd = random()
|
| 138 |
+
# skip TN
|
| 139 |
+
if self._skip_correct and all(x == "CORRECT" for x in detect_tags):
|
| 140 |
+
if rnd > self._tn_prob:
|
| 141 |
+
return None
|
| 142 |
+
# skip TP
|
| 143 |
+
else:
|
| 144 |
+
if rnd > self._tp_prob:
|
| 145 |
+
return None
|
| 146 |
+
|
| 147 |
+
fields["labels"] = SequenceLabelField(labels, sequence,
|
| 148 |
+
label_namespace="labels")
|
| 149 |
+
fields["d_tags"] = SequenceLabelField(detect_tags, sequence,
|
| 150 |
+
label_namespace="d_tags")
|
| 151 |
+
return Instance(fields)
|
gector/gec_model.py
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Wrapper of AllenNLP model. Fixes errors based on model predictions"""
|
| 2 |
+
import logging
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
from time import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from allennlp.data.dataset import Batch
|
| 9 |
+
from allennlp.data.fields import TextField
|
| 10 |
+
from allennlp.data.instance import Instance
|
| 11 |
+
from allennlp.data.tokenizers import Token
|
| 12 |
+
from allennlp.data.vocabulary import Vocabulary
|
| 13 |
+
from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder
|
| 14 |
+
from allennlp.nn import util
|
| 15 |
+
|
| 16 |
+
from gector.bert_token_embedder import PretrainedBertEmbedder
|
| 17 |
+
from gector.seq2labels_model import Seq2Labels
|
| 18 |
+
from gector.tokenizer_indexer import PretrainedBertIndexer
|
| 19 |
+
from utils.helpers import PAD, UNK, get_target_sent_by_edits, START_TOKEN
|
| 20 |
+
from utils.helpers import get_weights_name
|
| 21 |
+
|
| 22 |
+
logging.getLogger("werkzeug").setLevel(logging.ERROR)
|
| 23 |
+
logger = logging.getLogger(__file__)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class GecBERTModel(object):
|
| 27 |
+
def __init__(self, vocab_path=None, model_paths=None,
|
| 28 |
+
weigths=None,
|
| 29 |
+
max_len=50,
|
| 30 |
+
min_len=3,
|
| 31 |
+
lowercase_tokens=False,
|
| 32 |
+
log=False,
|
| 33 |
+
iterations=3,
|
| 34 |
+
model_name='roberta',
|
| 35 |
+
special_tokens_fix=1,
|
| 36 |
+
is_ensemble=True,
|
| 37 |
+
min_error_probability=0.0,
|
| 38 |
+
confidence=0,
|
| 39 |
+
del_confidence=0,
|
| 40 |
+
resolve_cycles=False,
|
| 41 |
+
):
|
| 42 |
+
self.model_weights = list(map(float, weigths)) if weigths else [1] * len(model_paths)
|
| 43 |
+
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 44 |
+
self.max_len = max_len
|
| 45 |
+
self.min_len = min_len
|
| 46 |
+
self.lowercase_tokens = lowercase_tokens
|
| 47 |
+
self.min_error_probability = min_error_probability
|
| 48 |
+
self.vocab = Vocabulary.from_files(vocab_path)
|
| 49 |
+
self.log = log
|
| 50 |
+
self.iterations = iterations
|
| 51 |
+
self.confidence = confidence
|
| 52 |
+
self.del_conf = del_confidence
|
| 53 |
+
self.resolve_cycles = resolve_cycles
|
| 54 |
+
# set training parameters and operations
|
| 55 |
+
|
| 56 |
+
self.indexers = []
|
| 57 |
+
self.models = []
|
| 58 |
+
for model_path in model_paths:
|
| 59 |
+
if is_ensemble:
|
| 60 |
+
model_name, special_tokens_fix = self._get_model_data(model_path)
|
| 61 |
+
weights_name = get_weights_name(model_name, lowercase_tokens)
|
| 62 |
+
self.indexers.append(self._get_indexer(weights_name, special_tokens_fix))
|
| 63 |
+
model = Seq2Labels(vocab=self.vocab,
|
| 64 |
+
text_field_embedder=self._get_embbeder(weights_name, special_tokens_fix),
|
| 65 |
+
confidence=self.confidence,
|
| 66 |
+
del_confidence=self.del_conf,
|
| 67 |
+
).to(self.device)
|
| 68 |
+
if torch.cuda.is_available():
|
| 69 |
+
model.load_state_dict(torch.load(model_path), strict=False)
|
| 70 |
+
else:
|
| 71 |
+
model.load_state_dict(torch.load(model_path,
|
| 72 |
+
map_location=torch.device('cpu')),
|
| 73 |
+
strict=False)
|
| 74 |
+
model.eval()
|
| 75 |
+
self.models.append(model)
|
| 76 |
+
|
| 77 |
+
@staticmethod
|
| 78 |
+
def _get_model_data(model_path):
|
| 79 |
+
model_name = model_path.split('/')[-1]
|
| 80 |
+
tr_model, stf = model_name.split('_')[:2]
|
| 81 |
+
return tr_model, int(stf)
|
| 82 |
+
|
| 83 |
+
def _restore_model(self, input_path):
|
| 84 |
+
if os.path.isdir(input_path):
|
| 85 |
+
print("Model could not be restored from directory", file=sys.stderr)
|
| 86 |
+
filenames = []
|
| 87 |
+
else:
|
| 88 |
+
filenames = [input_path]
|
| 89 |
+
for model_path in filenames:
|
| 90 |
+
try:
|
| 91 |
+
if torch.cuda.is_available():
|
| 92 |
+
loaded_model = torch.load(model_path)
|
| 93 |
+
else:
|
| 94 |
+
loaded_model = torch.load(model_path,
|
| 95 |
+
map_location=lambda storage,
|
| 96 |
+
loc: storage)
|
| 97 |
+
except:
|
| 98 |
+
print(f"{model_path} is not valid model", file=sys.stderr)
|
| 99 |
+
own_state = self.model.state_dict()
|
| 100 |
+
for name, weights in loaded_model.items():
|
| 101 |
+
if name not in own_state:
|
| 102 |
+
continue
|
| 103 |
+
try:
|
| 104 |
+
if len(filenames) == 1:
|
| 105 |
+
own_state[name].copy_(weights)
|
| 106 |
+
else:
|
| 107 |
+
own_state[name] += weights
|
| 108 |
+
except RuntimeError:
|
| 109 |
+
continue
|
| 110 |
+
print("Model is restored", file=sys.stderr)
|
| 111 |
+
|
| 112 |
+
def predict(self, batches):
|
| 113 |
+
t11 = time()
|
| 114 |
+
predictions = []
|
| 115 |
+
for batch, model in zip(batches, self.models):
|
| 116 |
+
batch = util.move_to_device(batch.as_tensor_dict(), 0 if torch.cuda.is_available() else -1)
|
| 117 |
+
with torch.no_grad():
|
| 118 |
+
prediction = model.forward(**batch)
|
| 119 |
+
predictions.append(prediction)
|
| 120 |
+
|
| 121 |
+
preds, idx, error_probs = self._convert(predictions)
|
| 122 |
+
t55 = time()
|
| 123 |
+
if self.log:
|
| 124 |
+
print(f"Inference time {t55 - t11}")
|
| 125 |
+
return preds, idx, error_probs
|
| 126 |
+
|
| 127 |
+
def get_token_action(self, token, index, prob, sugg_token):
|
| 128 |
+
"""Get lost of suggested actions for token."""
|
| 129 |
+
# cases when we don't need to do anything
|
| 130 |
+
if prob < self.min_error_probability or sugg_token in [UNK, PAD, '$KEEP']:
|
| 131 |
+
return None
|
| 132 |
+
|
| 133 |
+
if sugg_token.startswith('$REPLACE_') or sugg_token.startswith('$TRANSFORM_') or sugg_token == '$DELETE':
|
| 134 |
+
start_pos = index
|
| 135 |
+
end_pos = index + 1
|
| 136 |
+
elif sugg_token.startswith("$APPEND_") or sugg_token.startswith("$MERGE_"):
|
| 137 |
+
start_pos = index + 1
|
| 138 |
+
end_pos = index + 1
|
| 139 |
+
|
| 140 |
+
if sugg_token == "$DELETE":
|
| 141 |
+
sugg_token_clear = ""
|
| 142 |
+
elif sugg_token.startswith('$TRANSFORM_') or sugg_token.startswith("$MERGE_"):
|
| 143 |
+
sugg_token_clear = sugg_token[:]
|
| 144 |
+
else:
|
| 145 |
+
sugg_token_clear = sugg_token[sugg_token.index('_') + 1:]
|
| 146 |
+
|
| 147 |
+
return start_pos - 1, end_pos - 1, sugg_token_clear, prob
|
| 148 |
+
|
| 149 |
+
def _get_embbeder(self, weigths_name, special_tokens_fix):
|
| 150 |
+
embedders = {'bert': PretrainedBertEmbedder(
|
| 151 |
+
pretrained_model=weigths_name,
|
| 152 |
+
requires_grad=False,
|
| 153 |
+
top_layer_only=True,
|
| 154 |
+
special_tokens_fix=special_tokens_fix)
|
| 155 |
+
}
|
| 156 |
+
text_field_embedder = BasicTextFieldEmbedder(
|
| 157 |
+
token_embedders=embedders,
|
| 158 |
+
embedder_to_indexer_map={"bert": ["bert", "bert-offsets"]},
|
| 159 |
+
allow_unmatched_keys=True)
|
| 160 |
+
return text_field_embedder
|
| 161 |
+
|
| 162 |
+
def _get_indexer(self, weights_name, special_tokens_fix):
|
| 163 |
+
bert_token_indexer = PretrainedBertIndexer(
|
| 164 |
+
pretrained_model=weights_name,
|
| 165 |
+
do_lowercase=self.lowercase_tokens,
|
| 166 |
+
max_pieces_per_token=5,
|
| 167 |
+
special_tokens_fix=special_tokens_fix
|
| 168 |
+
)
|
| 169 |
+
return {'bert': bert_token_indexer}
|
| 170 |
+
|
| 171 |
+
def preprocess(self, token_batch):
|
| 172 |
+
seq_lens = [len(sequence) for sequence in token_batch if sequence]
|
| 173 |
+
if not seq_lens:
|
| 174 |
+
return []
|
| 175 |
+
max_len = min(max(seq_lens), self.max_len)
|
| 176 |
+
batches = []
|
| 177 |
+
for indexer in self.indexers:
|
| 178 |
+
batch = []
|
| 179 |
+
for sequence in token_batch:
|
| 180 |
+
tokens = sequence[:max_len]
|
| 181 |
+
tokens = [Token(token) for token in ['$START'] + tokens]
|
| 182 |
+
batch.append(Instance({'tokens': TextField(tokens, indexer)}))
|
| 183 |
+
batch = Batch(batch)
|
| 184 |
+
batch.index_instances(self.vocab)
|
| 185 |
+
batches.append(batch)
|
| 186 |
+
|
| 187 |
+
return batches
|
| 188 |
+
|
| 189 |
+
def _convert(self, data):
|
| 190 |
+
all_class_probs = torch.zeros_like(data[0]['class_probabilities_labels'])
|
| 191 |
+
error_probs = torch.zeros_like(data[0]['max_error_probability'])
|
| 192 |
+
for output, weight in zip(data, self.model_weights):
|
| 193 |
+
all_class_probs += weight * output['class_probabilities_labels'] / sum(self.model_weights)
|
| 194 |
+
error_probs += weight * output['max_error_probability'] / sum(self.model_weights)
|
| 195 |
+
|
| 196 |
+
max_vals = torch.max(all_class_probs, dim=-1)
|
| 197 |
+
probs = max_vals[0].tolist()
|
| 198 |
+
idx = max_vals[1].tolist()
|
| 199 |
+
return probs, idx, error_probs.tolist()
|
| 200 |
+
|
| 201 |
+
def update_final_batch(self, final_batch, pred_ids, pred_batch,
|
| 202 |
+
prev_preds_dict):
|
| 203 |
+
new_pred_ids = []
|
| 204 |
+
total_updated = 0
|
| 205 |
+
for i, orig_id in enumerate(pred_ids):
|
| 206 |
+
orig = final_batch[orig_id]
|
| 207 |
+
pred = pred_batch[i]
|
| 208 |
+
prev_preds = prev_preds_dict[orig_id]
|
| 209 |
+
if orig != pred and pred not in prev_preds:
|
| 210 |
+
final_batch[orig_id] = pred
|
| 211 |
+
new_pred_ids.append(orig_id)
|
| 212 |
+
prev_preds_dict[orig_id].append(pred)
|
| 213 |
+
total_updated += 1
|
| 214 |
+
elif orig != pred and pred in prev_preds:
|
| 215 |
+
# update final batch, but stop iterations
|
| 216 |
+
final_batch[orig_id] = pred
|
| 217 |
+
total_updated += 1
|
| 218 |
+
else:
|
| 219 |
+
continue
|
| 220 |
+
return final_batch, new_pred_ids, total_updated
|
| 221 |
+
|
| 222 |
+
def postprocess_batch(self, batch, all_probabilities, all_idxs,
|
| 223 |
+
error_probs):
|
| 224 |
+
all_results = []
|
| 225 |
+
noop_index = self.vocab.get_token_index("$KEEP", "labels")
|
| 226 |
+
for tokens, probabilities, idxs, error_prob in zip(batch,
|
| 227 |
+
all_probabilities,
|
| 228 |
+
all_idxs,
|
| 229 |
+
error_probs):
|
| 230 |
+
length = min(len(tokens), self.max_len)
|
| 231 |
+
edits = []
|
| 232 |
+
|
| 233 |
+
# skip whole sentences if there no errors
|
| 234 |
+
if max(idxs) == 0:
|
| 235 |
+
all_results.append(tokens)
|
| 236 |
+
continue
|
| 237 |
+
|
| 238 |
+
# skip whole sentence if probability of correctness is not high
|
| 239 |
+
if error_prob < self.min_error_probability:
|
| 240 |
+
all_results.append(tokens)
|
| 241 |
+
continue
|
| 242 |
+
|
| 243 |
+
for i in range(length + 1):
|
| 244 |
+
# because of START token
|
| 245 |
+
if i == 0:
|
| 246 |
+
token = START_TOKEN
|
| 247 |
+
else:
|
| 248 |
+
token = tokens[i - 1]
|
| 249 |
+
# skip if there is no error
|
| 250 |
+
if idxs[i] == noop_index:
|
| 251 |
+
continue
|
| 252 |
+
|
| 253 |
+
sugg_token = self.vocab.get_token_from_index(idxs[i],
|
| 254 |
+
namespace='labels')
|
| 255 |
+
action = self.get_token_action(token, i, probabilities[i],
|
| 256 |
+
sugg_token)
|
| 257 |
+
if not action:
|
| 258 |
+
continue
|
| 259 |
+
|
| 260 |
+
edits.append(action)
|
| 261 |
+
all_results.append(get_target_sent_by_edits(tokens, edits))
|
| 262 |
+
return all_results
|
| 263 |
+
|
| 264 |
+
def handle_batch(self, full_batch):
|
| 265 |
+
"""
|
| 266 |
+
Handle batch of requests.
|
| 267 |
+
"""
|
| 268 |
+
final_batch = full_batch[:]
|
| 269 |
+
batch_size = len(full_batch)
|
| 270 |
+
prev_preds_dict = {i: [final_batch[i]] for i in range(len(final_batch))}
|
| 271 |
+
short_ids = [i for i in range(len(full_batch))
|
| 272 |
+
if len(full_batch[i]) < self.min_len]
|
| 273 |
+
pred_ids = [i for i in range(len(full_batch)) if i not in short_ids]
|
| 274 |
+
total_updates = 0
|
| 275 |
+
|
| 276 |
+
for n_iter in range(self.iterations):
|
| 277 |
+
orig_batch = [final_batch[i] for i in pred_ids]
|
| 278 |
+
|
| 279 |
+
sequences = self.preprocess(orig_batch)
|
| 280 |
+
|
| 281 |
+
if not sequences:
|
| 282 |
+
break
|
| 283 |
+
probabilities, idxs, error_probs = self.predict(sequences)
|
| 284 |
+
|
| 285 |
+
pred_batch = self.postprocess_batch(orig_batch, probabilities,
|
| 286 |
+
idxs, error_probs)
|
| 287 |
+
if self.log:
|
| 288 |
+
print(f"Iteration {n_iter + 1}. Predicted {round(100*len(pred_ids)/batch_size, 1)}% of sentences.")
|
| 289 |
+
|
| 290 |
+
final_batch, pred_ids, cnt = \
|
| 291 |
+
self.update_final_batch(final_batch, pred_ids, pred_batch,
|
| 292 |
+
prev_preds_dict)
|
| 293 |
+
total_updates += cnt
|
| 294 |
+
|
| 295 |
+
if not pred_ids:
|
| 296 |
+
break
|
| 297 |
+
|
| 298 |
+
return final_batch, total_updates
|
gector/seq2labels_model.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Basic model. Predicts tags for every token"""
|
| 2 |
+
from typing import Dict, Optional, List, Any
|
| 3 |
+
|
| 4 |
+
import numpy
|
| 5 |
+
import torch
|
| 6 |
+
import torch.nn.functional as F
|
| 7 |
+
from allennlp.data import Vocabulary
|
| 8 |
+
from allennlp.models.model import Model
|
| 9 |
+
from allennlp.modules import TimeDistributed, TextFieldEmbedder
|
| 10 |
+
from allennlp.nn import InitializerApplicator, RegularizerApplicator
|
| 11 |
+
from allennlp.nn.util import get_text_field_mask, sequence_cross_entropy_with_logits
|
| 12 |
+
from allennlp.training.metrics import CategoricalAccuracy
|
| 13 |
+
from overrides import overrides
|
| 14 |
+
from torch.nn.modules.linear import Linear
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@Model.register("seq2labels")
|
| 18 |
+
class Seq2Labels(Model):
|
| 19 |
+
"""
|
| 20 |
+
This ``Seq2Labels`` simply encodes a sequence of text with a stacked ``Seq2SeqEncoder``, then
|
| 21 |
+
predicts a tag (or couple tags) for each token in the sequence.
|
| 22 |
+
|
| 23 |
+
Parameters
|
| 24 |
+
----------
|
| 25 |
+
vocab : ``Vocabulary``, required
|
| 26 |
+
A Vocabulary, required in order to compute sizes for input/output projections.
|
| 27 |
+
text_field_embedder : ``TextFieldEmbedder``, required
|
| 28 |
+
Used to embed the ``tokens`` ``TextField`` we get as input to the model.
|
| 29 |
+
encoder : ``Seq2SeqEncoder``
|
| 30 |
+
The encoder (with its own internal stacking) that we will use in between embedding tokens
|
| 31 |
+
and predicting output tags.
|
| 32 |
+
calculate_span_f1 : ``bool``, optional (default=``None``)
|
| 33 |
+
Calculate span-level F1 metrics during training. If this is ``True``, then
|
| 34 |
+
``label_encoding`` is required. If ``None`` and
|
| 35 |
+
label_encoding is specified, this is set to ``True``.
|
| 36 |
+
If ``None`` and label_encoding is not specified, it defaults
|
| 37 |
+
to ``False``.
|
| 38 |
+
label_encoding : ``str``, optional (default=``None``)
|
| 39 |
+
Label encoding to use when calculating span f1.
|
| 40 |
+
Valid options are "BIO", "BIOUL", "IOB1", "BMES".
|
| 41 |
+
Required if ``calculate_span_f1`` is true.
|
| 42 |
+
labels_namespace : ``str``, optional (default=``labels``)
|
| 43 |
+
This is needed to compute the SpanBasedF1Measure metric, if desired.
|
| 44 |
+
Unless you did something unusual, the default value should be what you want.
|
| 45 |
+
verbose_metrics : ``bool``, optional (default = False)
|
| 46 |
+
If true, metrics will be returned per label class in addition
|
| 47 |
+
to the overall statistics.
|
| 48 |
+
initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
|
| 49 |
+
Used to initialize the model parameters.
|
| 50 |
+
regularizer : ``RegularizerApplicator``, optional (default=``None``)
|
| 51 |
+
If provided, will be used to calculate the regularization penalty during training.
|
| 52 |
+
"""
|
| 53 |
+
|
| 54 |
+
def __init__(self, vocab: Vocabulary,
|
| 55 |
+
text_field_embedder: TextFieldEmbedder,
|
| 56 |
+
predictor_dropout=0.0,
|
| 57 |
+
labels_namespace: str = "labels",
|
| 58 |
+
detect_namespace: str = "d_tags",
|
| 59 |
+
verbose_metrics: bool = False,
|
| 60 |
+
label_smoothing: float = 0.0,
|
| 61 |
+
confidence: float = 0.0,
|
| 62 |
+
del_confidence: float = 0.0,
|
| 63 |
+
initializer: InitializerApplicator = InitializerApplicator(),
|
| 64 |
+
regularizer: Optional[RegularizerApplicator] = None) -> None:
|
| 65 |
+
super(Seq2Labels, self).__init__(vocab, regularizer)
|
| 66 |
+
|
| 67 |
+
self.label_namespaces = [labels_namespace,
|
| 68 |
+
detect_namespace]
|
| 69 |
+
self.text_field_embedder = text_field_embedder
|
| 70 |
+
self.num_labels_classes = self.vocab.get_vocab_size(labels_namespace)
|
| 71 |
+
self.num_detect_classes = self.vocab.get_vocab_size(detect_namespace)
|
| 72 |
+
self.label_smoothing = label_smoothing
|
| 73 |
+
self.confidence = confidence
|
| 74 |
+
self.del_conf = del_confidence
|
| 75 |
+
self.incorr_index = self.vocab.get_token_index("INCORRECT",
|
| 76 |
+
namespace=detect_namespace)
|
| 77 |
+
|
| 78 |
+
self._verbose_metrics = verbose_metrics
|
| 79 |
+
self.predictor_dropout = TimeDistributed(torch.nn.Dropout(predictor_dropout))
|
| 80 |
+
|
| 81 |
+
self.tag_labels_projection_layer = TimeDistributed(
|
| 82 |
+
Linear(text_field_embedder._token_embedders['bert'].get_output_dim(), self.num_labels_classes))
|
| 83 |
+
|
| 84 |
+
self.tag_detect_projection_layer = TimeDistributed(
|
| 85 |
+
Linear(text_field_embedder._token_embedders['bert'].get_output_dim(), self.num_detect_classes))
|
| 86 |
+
|
| 87 |
+
self.metrics = {"accuracy": CategoricalAccuracy()}
|
| 88 |
+
|
| 89 |
+
initializer(self)
|
| 90 |
+
|
| 91 |
+
@overrides
|
| 92 |
+
def forward(self, # type: ignore
|
| 93 |
+
tokens: Dict[str, torch.LongTensor],
|
| 94 |
+
labels: torch.LongTensor = None,
|
| 95 |
+
d_tags: torch.LongTensor = None,
|
| 96 |
+
metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]:
|
| 97 |
+
# pylint: disable=arguments-differ
|
| 98 |
+
"""
|
| 99 |
+
Parameters
|
| 100 |
+
----------
|
| 101 |
+
tokens : Dict[str, torch.LongTensor], required
|
| 102 |
+
The output of ``TextField.as_array()``, which should typically be passed directly to a
|
| 103 |
+
``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
|
| 104 |
+
tensors. At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
|
| 105 |
+
Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
|
| 106 |
+
for the ``TokenIndexers`` when you created the ``TextField`` representing your
|
| 107 |
+
sequence. The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
|
| 108 |
+
which knows how to combine different word representations into a single vector per
|
| 109 |
+
token in your input.
|
| 110 |
+
labels : torch.LongTensor, optional (default = None)
|
| 111 |
+
A torch tensor representing the sequence of integer gold class labels of shape
|
| 112 |
+
``(batch_size, num_tokens)``.
|
| 113 |
+
d_tags : torch.LongTensor, optional (default = None)
|
| 114 |
+
A torch tensor representing the sequence of integer gold class labels of shape
|
| 115 |
+
``(batch_size, num_tokens)``.
|
| 116 |
+
metadata : ``List[Dict[str, Any]]``, optional, (default = None)
|
| 117 |
+
metadata containing the original words in the sentence to be tagged under a 'words' key.
|
| 118 |
+
|
| 119 |
+
Returns
|
| 120 |
+
-------
|
| 121 |
+
An output dictionary consisting of:
|
| 122 |
+
logits : torch.FloatTensor
|
| 123 |
+
A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
|
| 124 |
+
unnormalised log probabilities of the tag classes.
|
| 125 |
+
class_probabilities : torch.FloatTensor
|
| 126 |
+
A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
|
| 127 |
+
a distribution of the tag classes per word.
|
| 128 |
+
loss : torch.FloatTensor, optional
|
| 129 |
+
A scalar loss to be optimised.
|
| 130 |
+
|
| 131 |
+
"""
|
| 132 |
+
encoded_text = self.text_field_embedder(tokens)
|
| 133 |
+
batch_size, sequence_length, _ = encoded_text.size()
|
| 134 |
+
mask = get_text_field_mask(tokens)
|
| 135 |
+
logits_labels = self.tag_labels_projection_layer(self.predictor_dropout(encoded_text))
|
| 136 |
+
logits_d = self.tag_detect_projection_layer(encoded_text)
|
| 137 |
+
|
| 138 |
+
class_probabilities_labels = F.softmax(logits_labels, dim=-1).view(
|
| 139 |
+
[batch_size, sequence_length, self.num_labels_classes])
|
| 140 |
+
class_probabilities_d = F.softmax(logits_d, dim=-1).view(
|
| 141 |
+
[batch_size, sequence_length, self.num_detect_classes])
|
| 142 |
+
error_probs = class_probabilities_d[:, :, self.incorr_index] * mask
|
| 143 |
+
incorr_prob = torch.max(error_probs, dim=-1)[0]
|
| 144 |
+
|
| 145 |
+
probability_change = [self.confidence, self.del_conf] + [0] * (self.num_labels_classes - 2)
|
| 146 |
+
class_probabilities_labels += torch.FloatTensor(probability_change).repeat(
|
| 147 |
+
(batch_size, sequence_length, 1)).to(class_probabilities_labels.device)
|
| 148 |
+
|
| 149 |
+
output_dict = {"logits_labels": logits_labels,
|
| 150 |
+
"logits_d_tags": logits_d,
|
| 151 |
+
"class_probabilities_labels": class_probabilities_labels,
|
| 152 |
+
"class_probabilities_d_tags": class_probabilities_d,
|
| 153 |
+
"max_error_probability": incorr_prob}
|
| 154 |
+
if labels is not None and d_tags is not None:
|
| 155 |
+
loss_labels = sequence_cross_entropy_with_logits(logits_labels, labels, mask,
|
| 156 |
+
label_smoothing=self.label_smoothing)
|
| 157 |
+
loss_d = sequence_cross_entropy_with_logits(logits_d, d_tags, mask)
|
| 158 |
+
for metric in self.metrics.values():
|
| 159 |
+
metric(logits_labels, labels, mask.float())
|
| 160 |
+
metric(logits_d, d_tags, mask.float())
|
| 161 |
+
output_dict["loss"] = loss_labels + loss_d
|
| 162 |
+
|
| 163 |
+
if metadata is not None:
|
| 164 |
+
output_dict["words"] = [x["words"] for x in metadata]
|
| 165 |
+
return output_dict
|
| 166 |
+
|
| 167 |
+
@overrides
|
| 168 |
+
def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
|
| 169 |
+
"""
|
| 170 |
+
Does a simple position-wise argmax over each token, converts indices to string labels, and
|
| 171 |
+
adds a ``"tags"`` key to the dictionary with the result.
|
| 172 |
+
"""
|
| 173 |
+
for label_namespace in self.label_namespaces:
|
| 174 |
+
all_predictions = output_dict[f'class_probabilities_{label_namespace}']
|
| 175 |
+
all_predictions = all_predictions.cpu().data.numpy()
|
| 176 |
+
if all_predictions.ndim == 3:
|
| 177 |
+
predictions_list = [all_predictions[i] for i in range(all_predictions.shape[0])]
|
| 178 |
+
else:
|
| 179 |
+
predictions_list = [all_predictions]
|
| 180 |
+
all_tags = []
|
| 181 |
+
|
| 182 |
+
for predictions in predictions_list:
|
| 183 |
+
argmax_indices = numpy.argmax(predictions, axis=-1)
|
| 184 |
+
tags = [self.vocab.get_token_from_index(x, namespace=label_namespace)
|
| 185 |
+
for x in argmax_indices]
|
| 186 |
+
all_tags.append(tags)
|
| 187 |
+
output_dict[f'{label_namespace}'] = all_tags
|
| 188 |
+
return output_dict
|
| 189 |
+
|
| 190 |
+
@overrides
|
| 191 |
+
def get_metrics(self, reset: bool = False) -> Dict[str, float]:
|
| 192 |
+
metrics_to_return = {metric_name: metric.get_metric(reset) for
|
| 193 |
+
metric_name, metric in self.metrics.items()}
|
| 194 |
+
return metrics_to_return
|
gector/tokenization.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from time import time
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def get_bpe_groups(token_offsets, bpe_offsets, input_ids, max_bpe_pieces=5):
|
| 9 |
+
bpe_groups = []
|
| 10 |
+
last_used_bpe = 0
|
| 11 |
+
# find the size of offsets
|
| 12 |
+
if (0, 0) in bpe_offsets:
|
| 13 |
+
bpe_size = bpe_offsets.index((0, 0))
|
| 14 |
+
else:
|
| 15 |
+
bpe_size = len(bpe_offsets)
|
| 16 |
+
|
| 17 |
+
saved_ids = [i for i in range(len(input_ids))]
|
| 18 |
+
redundant_ids = []
|
| 19 |
+
for token_offset in token_offsets:
|
| 20 |
+
start_token, end_token = token_offset
|
| 21 |
+
bpe_group = []
|
| 22 |
+
mapping_is_found = False
|
| 23 |
+
for i in range(last_used_bpe, bpe_size):
|
| 24 |
+
start_bpe, end_bpe = bpe_offsets[i]
|
| 25 |
+
if start_bpe >= start_token and end_bpe <= end_token:
|
| 26 |
+
# check if bpe_group is satisfy max_bpe_pieces constraint
|
| 27 |
+
if len(bpe_group) < max_bpe_pieces:
|
| 28 |
+
bpe_group.append(i)
|
| 29 |
+
else:
|
| 30 |
+
redundant_ids.append(i)
|
| 31 |
+
last_used_bpe = i + 1
|
| 32 |
+
mapping_is_found = True
|
| 33 |
+
elif mapping_is_found:
|
| 34 |
+
# stop doing useless iterations
|
| 35 |
+
break
|
| 36 |
+
else:
|
| 37 |
+
continue
|
| 38 |
+
bpe_groups.append(bpe_group)
|
| 39 |
+
saved_ids = [i for i in saved_ids if i not in redundant_ids]
|
| 40 |
+
return bpe_groups, saved_ids
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def reduce_input_ids(input_ids, bpe_groups, saved_ids,
|
| 44 |
+
max_bpe_length=80, max_bpe_pieces=5):
|
| 45 |
+
# check if sequence is satisfy max_bpe_length constraint
|
| 46 |
+
while len(saved_ids) > max_bpe_length:
|
| 47 |
+
max_bpe_pieces -= 1
|
| 48 |
+
for token_id in range(len(bpe_groups)):
|
| 49 |
+
if len(bpe_groups[token_id]) > max_bpe_pieces:
|
| 50 |
+
redundant_ids = bpe_groups[token_id][max_bpe_pieces:]
|
| 51 |
+
bpe_groups[token_id] = bpe_groups[token_id][:max_bpe_pieces]
|
| 52 |
+
saved_ids = [i for i in saved_ids if i not in redundant_ids]
|
| 53 |
+
|
| 54 |
+
# get offsets
|
| 55 |
+
reduced_ids = [input_ids[i] for i in saved_ids]
|
| 56 |
+
correct_offsets = []
|
| 57 |
+
idx = 0
|
| 58 |
+
for i, bpe_group in enumerate(bpe_groups):
|
| 59 |
+
norm_idx = min(idx, len(reduced_ids) - 1)
|
| 60 |
+
correct_offsets.append(norm_idx)
|
| 61 |
+
idx += len(bpe_group)
|
| 62 |
+
|
| 63 |
+
return reduced_ids, correct_offsets
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def get_offsets_and_reduce_input_ids(tokenizer_output, token_offset_list,
|
| 67 |
+
index_name="bert", max_bpe_length=80,
|
| 68 |
+
max_bpe_pieces=5):
|
| 69 |
+
timings = {"bpe": 0, "reduce": 0, "mask": 0}
|
| 70 |
+
output_ids, output_offsets, output_masks = [], [], []
|
| 71 |
+
for i, token_offsets in enumerate(token_offset_list):
|
| 72 |
+
input_ids = tokenizer_output['input_ids'][i]
|
| 73 |
+
|
| 74 |
+
t0 = time()
|
| 75 |
+
# get bpe level offsets
|
| 76 |
+
bpe_offsets = tokenizer_output['offset_mapping'][i]
|
| 77 |
+
bpe_groups, saved_ids = get_bpe_groups(token_offsets, bpe_offsets,
|
| 78 |
+
input_ids,
|
| 79 |
+
max_bpe_pieces=max_bpe_pieces)
|
| 80 |
+
t1 = time()
|
| 81 |
+
timings["bpe"] += t1 - t0
|
| 82 |
+
|
| 83 |
+
# reduce sequence length
|
| 84 |
+
reduced_ids, correct_offsets = reduce_input_ids(input_ids, bpe_groups,
|
| 85 |
+
saved_ids,
|
| 86 |
+
max_bpe_length=max_bpe_length,
|
| 87 |
+
max_bpe_pieces=max_bpe_pieces)
|
| 88 |
+
|
| 89 |
+
t2 = time()
|
| 90 |
+
timings["reduce"] += t2 - t1
|
| 91 |
+
|
| 92 |
+
# get mask
|
| 93 |
+
bpe_mask = [1 for _ in correct_offsets]
|
| 94 |
+
output_ids.append(reduced_ids)
|
| 95 |
+
output_offsets.append(correct_offsets)
|
| 96 |
+
output_masks.append(bpe_mask)
|
| 97 |
+
|
| 98 |
+
t3 = time()
|
| 99 |
+
timings["mask"] += t3 - t2
|
| 100 |
+
|
| 101 |
+
# tt = sum(timings.values())
|
| 102 |
+
# timings = {k: f"{round(v * 100 / tt, 2)}%" for k, v in timings.items()}
|
| 103 |
+
# print(timings)
|
| 104 |
+
|
| 105 |
+
output = {index_name: output_ids,
|
| 106 |
+
f"{index_name}-offsets": output_offsets,
|
| 107 |
+
"mask": output_masks}
|
| 108 |
+
return output
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def get_offset_for_tokens(tokens):
|
| 112 |
+
sentence = " ".join(tokens)
|
| 113 |
+
token_offsets = []
|
| 114 |
+
end_idx = 0
|
| 115 |
+
for token in tokens:
|
| 116 |
+
idx = sentence[end_idx:].index(token) + end_idx
|
| 117 |
+
end_idx = idx + len(token)
|
| 118 |
+
offset = (idx, end_idx)
|
| 119 |
+
token_offsets.append(offset)
|
| 120 |
+
return token_offsets
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def get_token_offsets(batch):
|
| 124 |
+
token_offset_list = []
|
| 125 |
+
for tokens in batch:
|
| 126 |
+
token_offsets = get_offset_for_tokens(tokens)
|
| 127 |
+
token_offset_list.append(token_offsets)
|
| 128 |
+
return token_offset_list
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def pad_output(output, pad_idx=0):
|
| 132 |
+
padded_output = {}
|
| 133 |
+
for input_key in output.keys():
|
| 134 |
+
indexes = output[input_key]
|
| 135 |
+
max_len = max([len(x) for x in indexes])
|
| 136 |
+
padded_indexes = []
|
| 137 |
+
for index_list in indexes:
|
| 138 |
+
cur_len = len(index_list)
|
| 139 |
+
pad_len = max_len - cur_len
|
| 140 |
+
padded_indexes.append(index_list + [pad_idx] * pad_len)
|
| 141 |
+
padded_output[input_key] = padded_indexes
|
| 142 |
+
return padded_output
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def tokenize_batch(tokenizer, batch_tokens, index_name="bert",
|
| 146 |
+
max_bpe_length=80, max_bpe_pieces=5):
|
| 147 |
+
timings = {}
|
| 148 |
+
t0 = time()
|
| 149 |
+
# get batch with sentences
|
| 150 |
+
batch_sentences = [" ".join(x) for x in batch_tokens]
|
| 151 |
+
# get token level offsets
|
| 152 |
+
token_offset_list = get_token_offsets(batch_tokens)
|
| 153 |
+
# token_offset_list = get_token_offsets_multi(batch_tokens)
|
| 154 |
+
t1 = time()
|
| 155 |
+
timings["offset_time"] = t1 - t0
|
| 156 |
+
# tokenize batch
|
| 157 |
+
tokenizer_output = tokenizer.batch_encode_plus(batch_sentences,
|
| 158 |
+
pad_to_max_length=False,
|
| 159 |
+
return_offsets_mapping=True,
|
| 160 |
+
add_special_tokens=False)
|
| 161 |
+
|
| 162 |
+
t2 = time()
|
| 163 |
+
timings["tokenize_time"] = t2 - t1
|
| 164 |
+
# postprocess batch
|
| 165 |
+
output = get_offsets_and_reduce_input_ids(tokenizer_output,
|
| 166 |
+
token_offset_list,
|
| 167 |
+
index_name=index_name,
|
| 168 |
+
max_bpe_length=max_bpe_length,
|
| 169 |
+
max_bpe_pieces=max_bpe_pieces)
|
| 170 |
+
|
| 171 |
+
t3 = time()
|
| 172 |
+
timings["reduce_time"] = t3 - t2
|
| 173 |
+
# pad output
|
| 174 |
+
output = pad_output(output)
|
| 175 |
+
t4 = time()
|
| 176 |
+
timings["pading_time"] = t4 - t3
|
| 177 |
+
# tt = sum(timings.values())
|
| 178 |
+
# timings = {k:f"{round(v*100/tt, 2)}%" for k,v in timings.items()}
|
| 179 |
+
# print(timings)
|
| 180 |
+
|
| 181 |
+
return output
|
gector/tokenizer_indexer.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tweaked version of corresponding AllenNLP file"""
|
| 2 |
+
import logging
|
| 3 |
+
from collections import defaultdict
|
| 4 |
+
from typing import Dict, List, Callable
|
| 5 |
+
|
| 6 |
+
from allennlp.common.util import pad_sequence_to_length
|
| 7 |
+
from allennlp.data.token_indexers.token_indexer import TokenIndexer
|
| 8 |
+
from allennlp.data.tokenizers.token import Token
|
| 9 |
+
from allennlp.data.vocabulary import Vocabulary
|
| 10 |
+
from overrides import overrides
|
| 11 |
+
from transformers import AutoTokenizer
|
| 12 |
+
|
| 13 |
+
from utils.helpers import START_TOKEN
|
| 14 |
+
|
| 15 |
+
from gector.tokenization import tokenize_batch
|
| 16 |
+
import copy
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# TODO(joelgrus): Figure out how to generate token_type_ids out of this token indexer.
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class TokenizerIndexer(TokenIndexer[int]):
|
| 25 |
+
"""
|
| 26 |
+
A token indexer that does the wordpiece-tokenization (e.g. for BERT embeddings).
|
| 27 |
+
If you are using one of the pretrained BERT models, you'll want to use the ``PretrainedBertIndexer``
|
| 28 |
+
subclass rather than this base class.
|
| 29 |
+
|
| 30 |
+
Parameters
|
| 31 |
+
----------
|
| 32 |
+
tokenizer : ``Callable[[str], List[str]]``
|
| 33 |
+
A function that does the actual tokenization.
|
| 34 |
+
max_pieces : int, optional (default: 512)
|
| 35 |
+
The BERT embedder uses positional embeddings and so has a corresponding
|
| 36 |
+
maximum length for its input ids. Any inputs longer than this will
|
| 37 |
+
either be truncated (default), or be split apart and batched using a
|
| 38 |
+
sliding window.
|
| 39 |
+
token_min_padding_length : ``int``, optional (default=``0``)
|
| 40 |
+
See :class:`TokenIndexer`.
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
def __init__(self,
|
| 44 |
+
tokenizer: Callable[[str], List[str]],
|
| 45 |
+
max_pieces: int = 512,
|
| 46 |
+
max_pieces_per_token: int = 3,
|
| 47 |
+
token_min_padding_length: int = 0) -> None:
|
| 48 |
+
super().__init__(token_min_padding_length)
|
| 49 |
+
|
| 50 |
+
# The BERT code itself does a two-step tokenization:
|
| 51 |
+
# sentence -> [words], and then word -> [wordpieces]
|
| 52 |
+
# In AllenNLP, the first step is implemented as the ``BertBasicWordSplitter``,
|
| 53 |
+
# and this token indexer handles the second.
|
| 54 |
+
|
| 55 |
+
self.tokenizer = tokenizer
|
| 56 |
+
self.max_pieces_per_token = max_pieces_per_token
|
| 57 |
+
self.max_pieces = max_pieces
|
| 58 |
+
self.max_pieces_per_sentence = 80
|
| 59 |
+
|
| 60 |
+
@overrides
|
| 61 |
+
def tokens_to_indices(self, tokens: List[Token],
|
| 62 |
+
vocabulary: Vocabulary,
|
| 63 |
+
index_name: str) -> Dict[str, List[int]]:
|
| 64 |
+
text = [token.text for token in tokens]
|
| 65 |
+
batch_tokens = [text]
|
| 66 |
+
|
| 67 |
+
output_fast = tokenize_batch(self.tokenizer,
|
| 68 |
+
batch_tokens,
|
| 69 |
+
max_bpe_length=self.max_pieces,
|
| 70 |
+
max_bpe_pieces=self.max_pieces_per_token)
|
| 71 |
+
output_fast = {k: v[0] for k, v in output_fast.items()}
|
| 72 |
+
return output_fast
|
| 73 |
+
|
| 74 |
+
@overrides
|
| 75 |
+
def count_vocab_items(self, token: Token, counter: Dict[str, Dict[str, int]]):
|
| 76 |
+
# If we only use pretrained models, we don't need to do anything here.
|
| 77 |
+
pass
|
| 78 |
+
|
| 79 |
+
@overrides
|
| 80 |
+
def get_padding_token(self) -> int:
|
| 81 |
+
return 0
|
| 82 |
+
|
| 83 |
+
@overrides
|
| 84 |
+
def get_padding_lengths(self, token: int) -> Dict[str, int]: # pylint: disable=unused-argument
|
| 85 |
+
return {}
|
| 86 |
+
|
| 87 |
+
@overrides
|
| 88 |
+
def pad_token_sequence(self,
|
| 89 |
+
tokens: Dict[str, List[int]],
|
| 90 |
+
desired_num_tokens: Dict[str, int],
|
| 91 |
+
padding_lengths: Dict[str, int]) -> Dict[str, List[int]]: # pylint: disable=unused-argument
|
| 92 |
+
return {key: pad_sequence_to_length(val, desired_num_tokens[key])
|
| 93 |
+
for key, val in tokens.items()}
|
| 94 |
+
|
| 95 |
+
@overrides
|
| 96 |
+
def get_keys(self, index_name: str) -> List[str]:
|
| 97 |
+
"""
|
| 98 |
+
We need to override this because the indexer generates multiple keys.
|
| 99 |
+
"""
|
| 100 |
+
# pylint: disable=no-self-use
|
| 101 |
+
return [index_name, f"{index_name}-offsets", f"{index_name}-type-ids", "mask"]
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
class PretrainedBertIndexer(TokenizerIndexer):
|
| 105 |
+
# pylint: disable=line-too-long
|
| 106 |
+
"""
|
| 107 |
+
A ``TokenIndexer`` corresponding to a pretrained BERT model.
|
| 108 |
+
|
| 109 |
+
Parameters
|
| 110 |
+
----------
|
| 111 |
+
pretrained_model: ``str``
|
| 112 |
+
Either the name of the pretrained model to use (e.g. 'bert-base-uncased'),
|
| 113 |
+
or the path to the .txt file with its vocabulary.
|
| 114 |
+
If the name is a key in the list of pretrained models at
|
| 115 |
+
https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/pytorch_pretrained_bert/tokenization.py#L33
|
| 116 |
+
the corresponding path will be used; otherwise it will be interpreted as a path or URL.
|
| 117 |
+
do_lowercase: ``bool``, optional (default = True)
|
| 118 |
+
Whether to lowercase the tokens before converting to wordpiece ids.
|
| 119 |
+
max_pieces: int, optional (default: 512)
|
| 120 |
+
The BERT embedder uses positional embeddings and so has a corresponding
|
| 121 |
+
maximum length for its input ids. Any inputs longer than this will
|
| 122 |
+
either be truncated (default), or be split apart and batched using a
|
| 123 |
+
sliding window.
|
| 124 |
+
"""
|
| 125 |
+
|
| 126 |
+
def __init__(self,
|
| 127 |
+
pretrained_model: str,
|
| 128 |
+
do_lowercase: bool = True,
|
| 129 |
+
max_pieces: int = 512,
|
| 130 |
+
max_pieces_per_token: int = 5,
|
| 131 |
+
special_tokens_fix: int = 0) -> None:
|
| 132 |
+
|
| 133 |
+
if pretrained_model.endswith("-cased") and do_lowercase:
|
| 134 |
+
logger.warning("Your BERT model appears to be cased, "
|
| 135 |
+
"but your indexer is lowercasing tokens.")
|
| 136 |
+
elif pretrained_model.endswith("-uncased") and not do_lowercase:
|
| 137 |
+
logger.warning("Your BERT model appears to be uncased, "
|
| 138 |
+
"but your indexer is not lowercasing tokens.")
|
| 139 |
+
|
| 140 |
+
model_name = copy.deepcopy(pretrained_model)
|
| 141 |
+
|
| 142 |
+
model_tokenizer = AutoTokenizer.from_pretrained(
|
| 143 |
+
model_name, do_lower_case=do_lowercase, do_basic_tokenize=False, use_fast=True)
|
| 144 |
+
|
| 145 |
+
# to adjust all tokenizers
|
| 146 |
+
if hasattr(model_tokenizer, 'encoder'):
|
| 147 |
+
model_tokenizer.vocab = model_tokenizer.encoder
|
| 148 |
+
if hasattr(model_tokenizer, 'sp_model'):
|
| 149 |
+
model_tokenizer.vocab = defaultdict(lambda: 1)
|
| 150 |
+
for i in range(model_tokenizer.sp_model.get_piece_size()):
|
| 151 |
+
model_tokenizer.vocab[model_tokenizer.sp_model.id_to_piece(i)] = i
|
| 152 |
+
|
| 153 |
+
if special_tokens_fix:
|
| 154 |
+
model_tokenizer.add_tokens([START_TOKEN])
|
| 155 |
+
model_tokenizer.vocab[START_TOKEN] = len(model_tokenizer) - 1
|
| 156 |
+
|
| 157 |
+
super().__init__(tokenizer=model_tokenizer,
|
| 158 |
+
max_pieces=max_pieces,
|
| 159 |
+
max_pieces_per_token=max_pieces_per_token
|
| 160 |
+
)
|
| 161 |
+
|
gector/trainer.py
ADDED
|
@@ -0,0 +1,845 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tweaked version of corresponding AllenNLP file"""
|
| 2 |
+
import datetime
|
| 3 |
+
import logging
|
| 4 |
+
import math
|
| 5 |
+
import os
|
| 6 |
+
import time
|
| 7 |
+
import traceback
|
| 8 |
+
from typing import Dict, Optional, List, Tuple, Union, Iterable, Any
|
| 9 |
+
|
| 10 |
+
import torch
|
| 11 |
+
import torch.optim.lr_scheduler
|
| 12 |
+
from allennlp.common import Params
|
| 13 |
+
from allennlp.common.checks import ConfigurationError, parse_cuda_device
|
| 14 |
+
from allennlp.common.tqdm import Tqdm
|
| 15 |
+
from allennlp.common.util import dump_metrics, gpu_memory_mb, peak_memory_mb, lazy_groups_of
|
| 16 |
+
from allennlp.data.instance import Instance
|
| 17 |
+
from allennlp.data.iterators.data_iterator import DataIterator, TensorDict
|
| 18 |
+
from allennlp.models.model import Model
|
| 19 |
+
from allennlp.nn import util as nn_util
|
| 20 |
+
from allennlp.training import util as training_util
|
| 21 |
+
from allennlp.training.checkpointer import Checkpointer
|
| 22 |
+
from allennlp.training.learning_rate_schedulers import LearningRateScheduler
|
| 23 |
+
from allennlp.training.metric_tracker import MetricTracker
|
| 24 |
+
from allennlp.training.momentum_schedulers import MomentumScheduler
|
| 25 |
+
from allennlp.training.moving_average import MovingAverage
|
| 26 |
+
from allennlp.training.optimizers import Optimizer
|
| 27 |
+
from allennlp.training.tensorboard_writer import TensorboardWriter
|
| 28 |
+
from allennlp.training.trainer_base import TrainerBase
|
| 29 |
+
|
| 30 |
+
logger = logging.getLogger(__name__)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class Trainer(TrainerBase):
|
| 34 |
+
def __init__(
|
| 35 |
+
self,
|
| 36 |
+
model: Model,
|
| 37 |
+
optimizer: torch.optim.Optimizer,
|
| 38 |
+
scheduler: torch.optim.lr_scheduler,
|
| 39 |
+
iterator: DataIterator,
|
| 40 |
+
train_dataset: Iterable[Instance],
|
| 41 |
+
validation_dataset: Optional[Iterable[Instance]] = None,
|
| 42 |
+
patience: Optional[int] = None,
|
| 43 |
+
validation_metric: str = "-loss",
|
| 44 |
+
validation_iterator: DataIterator = None,
|
| 45 |
+
shuffle: bool = True,
|
| 46 |
+
num_epochs: int = 20,
|
| 47 |
+
accumulated_batch_count: int = 1,
|
| 48 |
+
serialization_dir: Optional[str] = None,
|
| 49 |
+
num_serialized_models_to_keep: int = 20,
|
| 50 |
+
keep_serialized_model_every_num_seconds: int = None,
|
| 51 |
+
checkpointer: Checkpointer = None,
|
| 52 |
+
model_save_interval: float = None,
|
| 53 |
+
cuda_device: Union[int, List] = -1,
|
| 54 |
+
grad_norm: Optional[float] = None,
|
| 55 |
+
grad_clipping: Optional[float] = None,
|
| 56 |
+
learning_rate_scheduler: Optional[LearningRateScheduler] = None,
|
| 57 |
+
momentum_scheduler: Optional[MomentumScheduler] = None,
|
| 58 |
+
summary_interval: int = 100,
|
| 59 |
+
histogram_interval: int = None,
|
| 60 |
+
should_log_parameter_statistics: bool = True,
|
| 61 |
+
should_log_learning_rate: bool = False,
|
| 62 |
+
log_batch_size_period: Optional[int] = None,
|
| 63 |
+
moving_average: Optional[MovingAverage] = None,
|
| 64 |
+
cold_step_count: int = 0,
|
| 65 |
+
cold_lr: float = 1e-3,
|
| 66 |
+
cuda_verbose_step=None,
|
| 67 |
+
) -> None:
|
| 68 |
+
"""
|
| 69 |
+
A trainer for doing supervised learning. It just takes a labeled dataset
|
| 70 |
+
and a ``DataIterator``, and uses the supplied ``Optimizer`` to learn the weights
|
| 71 |
+
for your model over some fixed number of epochs. You can also pass in a validation
|
| 72 |
+
dataset and enable early stopping. There are many other bells and whistles as well.
|
| 73 |
+
|
| 74 |
+
Parameters
|
| 75 |
+
----------
|
| 76 |
+
model : ``Model``, required.
|
| 77 |
+
An AllenNLP model to be optimized. Pytorch Modules can also be optimized if
|
| 78 |
+
their ``forward`` method returns a dictionary with a "loss" key, containing a
|
| 79 |
+
scalar tensor representing the loss function to be optimized.
|
| 80 |
+
|
| 81 |
+
If you are training your model using GPUs, your model should already be
|
| 82 |
+
on the correct device. (If you use `Trainer.from_params` this will be
|
| 83 |
+
handled for you.)
|
| 84 |
+
optimizer : ``torch.nn.Optimizer``, required.
|
| 85 |
+
An instance of a Pytorch Optimizer, instantiated with the parameters of the
|
| 86 |
+
model to be optimized.
|
| 87 |
+
iterator : ``DataIterator``, required.
|
| 88 |
+
A method for iterating over a ``Dataset``, yielding padded indexed batches.
|
| 89 |
+
train_dataset : ``Dataset``, required.
|
| 90 |
+
A ``Dataset`` to train on. The dataset should have already been indexed.
|
| 91 |
+
validation_dataset : ``Dataset``, optional, (default = None).
|
| 92 |
+
A ``Dataset`` to evaluate on. The dataset should have already been indexed.
|
| 93 |
+
patience : Optional[int] > 0, optional (default=None)
|
| 94 |
+
Number of epochs to be patient before early stopping: the training is stopped
|
| 95 |
+
after ``patience`` epochs with no improvement. If given, it must be ``> 0``.
|
| 96 |
+
If None, early stopping is disabled.
|
| 97 |
+
validation_metric : str, optional (default="loss")
|
| 98 |
+
Validation metric to measure for whether to stop training using patience
|
| 99 |
+
and whether to serialize an ``is_best`` model each epoch. The metric name
|
| 100 |
+
must be prepended with either "+" or "-", which specifies whether the metric
|
| 101 |
+
is an increasing or decreasing function.
|
| 102 |
+
validation_iterator : ``DataIterator``, optional (default=None)
|
| 103 |
+
An iterator to use for the validation set. If ``None``, then
|
| 104 |
+
use the training `iterator`.
|
| 105 |
+
shuffle: ``bool``, optional (default=True)
|
| 106 |
+
Whether to shuffle the instances in the iterator or not.
|
| 107 |
+
num_epochs : int, optional (default = 20)
|
| 108 |
+
Number of training epochs.
|
| 109 |
+
serialization_dir : str, optional (default=None)
|
| 110 |
+
Path to directory for saving and loading model files. Models will not be saved if
|
| 111 |
+
this parameter is not passed.
|
| 112 |
+
num_serialized_models_to_keep : ``int``, optional (default=20)
|
| 113 |
+
Number of previous model checkpoints to retain. Default is to keep 20 checkpoints.
|
| 114 |
+
A value of None or -1 means all checkpoints will be kept.
|
| 115 |
+
keep_serialized_model_every_num_seconds : ``int``, optional (default=None)
|
| 116 |
+
If num_serialized_models_to_keep is not None, then occasionally it's useful to
|
| 117 |
+
save models at a given interval in addition to the last num_serialized_models_to_keep.
|
| 118 |
+
To do so, specify keep_serialized_model_every_num_seconds as the number of seconds
|
| 119 |
+
between permanently saved checkpoints. Note that this option is only used if
|
| 120 |
+
num_serialized_models_to_keep is not None, otherwise all checkpoints are kept.
|
| 121 |
+
checkpointer : ``Checkpointer``, optional (default=None)
|
| 122 |
+
An instance of class Checkpointer to use instead of the default. If a checkpointer is specified,
|
| 123 |
+
the arguments num_serialized_models_to_keep and keep_serialized_model_every_num_seconds should
|
| 124 |
+
not be specified. The caller is responsible for initializing the checkpointer so that it is
|
| 125 |
+
consistent with serialization_dir.
|
| 126 |
+
model_save_interval : ``float``, optional (default=None)
|
| 127 |
+
If provided, then serialize models every ``model_save_interval``
|
| 128 |
+
seconds within single epochs. In all cases, models are also saved
|
| 129 |
+
at the end of every epoch if ``serialization_dir`` is provided.
|
| 130 |
+
cuda_device : ``Union[int, List[int]]``, optional (default = -1)
|
| 131 |
+
An integer or list of integers specifying the CUDA device(s) to use. If -1, the CPU is used.
|
| 132 |
+
grad_norm : ``float``, optional, (default = None).
|
| 133 |
+
If provided, gradient norms will be rescaled to have a maximum of this value.
|
| 134 |
+
grad_clipping : ``float``, optional (default = ``None``).
|
| 135 |
+
If provided, gradients will be clipped `during the backward pass` to have an (absolute)
|
| 136 |
+
maximum of this value. If you are getting ``NaNs`` in your gradients during training
|
| 137 |
+
that are not solved by using ``grad_norm``, you may need this.
|
| 138 |
+
learning_rate_scheduler : ``LearningRateScheduler``, optional (default = None)
|
| 139 |
+
If specified, the learning rate will be decayed with respect to
|
| 140 |
+
this schedule at the end of each epoch (or batch, if the scheduler implements
|
| 141 |
+
the ``step_batch`` method). If you use :class:`torch.optim.lr_scheduler.ReduceLROnPlateau`,
|
| 142 |
+
this will use the ``validation_metric`` provided to determine if learning has plateaued.
|
| 143 |
+
To support updating the learning rate on every batch, this can optionally implement
|
| 144 |
+
``step_batch(batch_num_total)`` which updates the learning rate given the batch number.
|
| 145 |
+
momentum_scheduler : ``MomentumScheduler``, optional (default = None)
|
| 146 |
+
If specified, the momentum will be updated at the end of each batch or epoch
|
| 147 |
+
according to the schedule.
|
| 148 |
+
summary_interval: ``int``, optional, (default = 100)
|
| 149 |
+
Number of batches between logging scalars to tensorboard
|
| 150 |
+
histogram_interval : ``int``, optional, (default = ``None``)
|
| 151 |
+
If not None, then log histograms to tensorboard every ``histogram_interval`` batches.
|
| 152 |
+
When this parameter is specified, the following additional logging is enabled:
|
| 153 |
+
* Histograms of model parameters
|
| 154 |
+
* The ratio of parameter update norm to parameter norm
|
| 155 |
+
* Histogram of layer activations
|
| 156 |
+
We log histograms of the parameters returned by
|
| 157 |
+
``model.get_parameters_for_histogram_tensorboard_logging``.
|
| 158 |
+
The layer activations are logged for any modules in the ``Model`` that have
|
| 159 |
+
the attribute ``should_log_activations`` set to ``True``. Logging
|
| 160 |
+
histograms requires a number of GPU-CPU copies during training and is typically
|
| 161 |
+
slow, so we recommend logging histograms relatively infrequently.
|
| 162 |
+
Note: only Modules that return tensors, tuples of tensors or dicts
|
| 163 |
+
with tensors as values currently support activation logging.
|
| 164 |
+
should_log_parameter_statistics : ``bool``, optional, (default = True)
|
| 165 |
+
Whether to send parameter statistics (mean and standard deviation
|
| 166 |
+
of parameters and gradients) to tensorboard.
|
| 167 |
+
should_log_learning_rate : ``bool``, optional, (default = False)
|
| 168 |
+
Whether to send parameter specific learning rate to tensorboard.
|
| 169 |
+
log_batch_size_period : ``int``, optional, (default = ``None``)
|
| 170 |
+
If defined, how often to log the average batch size.
|
| 171 |
+
moving_average: ``MovingAverage``, optional, (default = None)
|
| 172 |
+
If provided, we will maintain moving averages for all parameters. During training, we
|
| 173 |
+
employ a shadow variable for each parameter, which maintains the moving average. During
|
| 174 |
+
evaluation, we backup the original parameters and assign the moving averages to corresponding
|
| 175 |
+
parameters. Be careful that when saving the checkpoint, we will save the moving averages of
|
| 176 |
+
parameters. This is necessary because we want the saved model to perform as well as the validated
|
| 177 |
+
model if we load it later. But this may cause problems if you restart the training from checkpoint.
|
| 178 |
+
"""
|
| 179 |
+
super().__init__(serialization_dir, cuda_device)
|
| 180 |
+
|
| 181 |
+
# I am not calling move_to_gpu here, because if the model is
|
| 182 |
+
# not already on the GPU then the optimizer is going to be wrong.
|
| 183 |
+
self.model = model
|
| 184 |
+
|
| 185 |
+
self.iterator = iterator
|
| 186 |
+
self._validation_iterator = validation_iterator
|
| 187 |
+
self.shuffle = shuffle
|
| 188 |
+
self.optimizer = optimizer
|
| 189 |
+
self.scheduler = scheduler
|
| 190 |
+
self.train_data = train_dataset
|
| 191 |
+
self._validation_data = validation_dataset
|
| 192 |
+
self.accumulated_batch_count = accumulated_batch_count
|
| 193 |
+
self.cold_step_count = cold_step_count
|
| 194 |
+
self.cold_lr = cold_lr
|
| 195 |
+
self.cuda_verbose_step = cuda_verbose_step
|
| 196 |
+
|
| 197 |
+
if patience is None: # no early stopping
|
| 198 |
+
if validation_dataset:
|
| 199 |
+
logger.warning(
|
| 200 |
+
"You provided a validation dataset but patience was set to None, "
|
| 201 |
+
"meaning that early stopping is disabled"
|
| 202 |
+
)
|
| 203 |
+
elif (not isinstance(patience, int)) or patience <= 0:
|
| 204 |
+
raise ConfigurationError(
|
| 205 |
+
'{} is an invalid value for "patience": it must be a positive integer '
|
| 206 |
+
"or None (if you want to disable early stopping)".format(patience)
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
# For tracking is_best_so_far and should_stop_early
|
| 210 |
+
self._metric_tracker = MetricTracker(patience, validation_metric)
|
| 211 |
+
# Get rid of + or -
|
| 212 |
+
self._validation_metric = validation_metric[1:]
|
| 213 |
+
|
| 214 |
+
self._num_epochs = num_epochs
|
| 215 |
+
|
| 216 |
+
if checkpointer is not None:
|
| 217 |
+
# We can't easily check if these parameters were passed in, so check against their default values.
|
| 218 |
+
# We don't check against serialization_dir since it is also used by the parent class.
|
| 219 |
+
if num_serialized_models_to_keep != 20 \
|
| 220 |
+
or keep_serialized_model_every_num_seconds is not None:
|
| 221 |
+
raise ConfigurationError(
|
| 222 |
+
"When passing a custom Checkpointer, you may not also pass in separate checkpointer "
|
| 223 |
+
"args 'num_serialized_models_to_keep' or 'keep_serialized_model_every_num_seconds'."
|
| 224 |
+
)
|
| 225 |
+
self._checkpointer = checkpointer
|
| 226 |
+
else:
|
| 227 |
+
self._checkpointer = Checkpointer(
|
| 228 |
+
serialization_dir,
|
| 229 |
+
keep_serialized_model_every_num_seconds,
|
| 230 |
+
num_serialized_models_to_keep,
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
self._model_save_interval = model_save_interval
|
| 234 |
+
|
| 235 |
+
self._grad_norm = grad_norm
|
| 236 |
+
self._grad_clipping = grad_clipping
|
| 237 |
+
|
| 238 |
+
self._learning_rate_scheduler = learning_rate_scheduler
|
| 239 |
+
self._momentum_scheduler = momentum_scheduler
|
| 240 |
+
self._moving_average = moving_average
|
| 241 |
+
|
| 242 |
+
# We keep the total batch number as an instance variable because it
|
| 243 |
+
# is used inside a closure for the hook which logs activations in
|
| 244 |
+
# ``_enable_activation_logging``.
|
| 245 |
+
self._batch_num_total = 0
|
| 246 |
+
|
| 247 |
+
self._tensorboard = TensorboardWriter(
|
| 248 |
+
get_batch_num_total=lambda: self._batch_num_total,
|
| 249 |
+
serialization_dir=serialization_dir,
|
| 250 |
+
summary_interval=summary_interval,
|
| 251 |
+
histogram_interval=histogram_interval,
|
| 252 |
+
should_log_parameter_statistics=should_log_parameter_statistics,
|
| 253 |
+
should_log_learning_rate=should_log_learning_rate,
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
self._log_batch_size_period = log_batch_size_period
|
| 257 |
+
|
| 258 |
+
self._last_log = 0.0 # time of last logging
|
| 259 |
+
|
| 260 |
+
# Enable activation logging.
|
| 261 |
+
if histogram_interval is not None:
|
| 262 |
+
self._tensorboard.enable_activation_logging(self.model)
|
| 263 |
+
|
| 264 |
+
def rescale_gradients(self) -> Optional[float]:
|
| 265 |
+
return training_util.rescale_gradients(self.model, self._grad_norm)
|
| 266 |
+
|
| 267 |
+
def batch_loss(self, batch_group: List[TensorDict], for_training: bool) -> torch.Tensor:
|
| 268 |
+
"""
|
| 269 |
+
Does a forward pass on the given batches and returns the ``loss`` value in the result.
|
| 270 |
+
If ``for_training`` is `True` also applies regularization penalty.
|
| 271 |
+
"""
|
| 272 |
+
if self._multiple_gpu:
|
| 273 |
+
output_dict = training_util.data_parallel(batch_group, self.model, self._cuda_devices)
|
| 274 |
+
else:
|
| 275 |
+
assert len(batch_group) == 1
|
| 276 |
+
batch = batch_group[0]
|
| 277 |
+
batch = nn_util.move_to_device(batch, self._cuda_devices[0])
|
| 278 |
+
output_dict = self.model(**batch)
|
| 279 |
+
|
| 280 |
+
try:
|
| 281 |
+
loss = output_dict["loss"]
|
| 282 |
+
if for_training:
|
| 283 |
+
loss += self.model.get_regularization_penalty()
|
| 284 |
+
except KeyError:
|
| 285 |
+
if for_training:
|
| 286 |
+
raise RuntimeError(
|
| 287 |
+
"The model you are trying to optimize does not contain a"
|
| 288 |
+
" 'loss' key in the output of model.forward(inputs)."
|
| 289 |
+
)
|
| 290 |
+
loss = None
|
| 291 |
+
|
| 292 |
+
return loss
|
| 293 |
+
|
| 294 |
+
def _train_epoch(self, epoch: int) -> Dict[str, float]:
|
| 295 |
+
"""
|
| 296 |
+
Trains one epoch and returns metrics.
|
| 297 |
+
"""
|
| 298 |
+
logger.info("Epoch %d/%d", epoch, self._num_epochs - 1)
|
| 299 |
+
peak_cpu_usage = peak_memory_mb()
|
| 300 |
+
logger.info(f"Peak CPU memory usage MB: {peak_cpu_usage}")
|
| 301 |
+
gpu_usage = []
|
| 302 |
+
for gpu, memory in gpu_memory_mb().items():
|
| 303 |
+
gpu_usage.append((gpu, memory))
|
| 304 |
+
logger.info(f"GPU {gpu} memory usage MB: {memory}")
|
| 305 |
+
|
| 306 |
+
train_loss = 0.0
|
| 307 |
+
# Set the model to "train" mode.
|
| 308 |
+
self.model.train()
|
| 309 |
+
|
| 310 |
+
num_gpus = len(self._cuda_devices)
|
| 311 |
+
|
| 312 |
+
# Get tqdm for the training batches
|
| 313 |
+
raw_train_generator = self.iterator(self.train_data, num_epochs=1, shuffle=self.shuffle)
|
| 314 |
+
train_generator = lazy_groups_of(raw_train_generator, num_gpus)
|
| 315 |
+
num_training_batches = math.ceil(self.iterator.get_num_batches(self.train_data) / num_gpus)
|
| 316 |
+
residue = num_training_batches % self.accumulated_batch_count
|
| 317 |
+
self._last_log = time.time()
|
| 318 |
+
last_save_time = time.time()
|
| 319 |
+
|
| 320 |
+
batches_this_epoch = 0
|
| 321 |
+
if self._batch_num_total is None:
|
| 322 |
+
self._batch_num_total = 0
|
| 323 |
+
|
| 324 |
+
histogram_parameters = set(self.model.get_parameters_for_histogram_tensorboard_logging())
|
| 325 |
+
|
| 326 |
+
logger.info("Training")
|
| 327 |
+
train_generator_tqdm = Tqdm.tqdm(train_generator, total=num_training_batches)
|
| 328 |
+
cumulative_batch_size = 0
|
| 329 |
+
self.optimizer.zero_grad()
|
| 330 |
+
for batch_group in train_generator_tqdm:
|
| 331 |
+
batches_this_epoch += 1
|
| 332 |
+
self._batch_num_total += 1
|
| 333 |
+
batch_num_total = self._batch_num_total
|
| 334 |
+
|
| 335 |
+
iter_len = self.accumulated_batch_count \
|
| 336 |
+
if batches_this_epoch <= (num_training_batches - residue) else residue
|
| 337 |
+
|
| 338 |
+
if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
|
| 339 |
+
print(f'Before forward pass - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
|
| 340 |
+
print(f'Before forward pass - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
|
| 341 |
+
try:
|
| 342 |
+
loss = self.batch_loss(batch_group, for_training=True) / iter_len
|
| 343 |
+
except RuntimeError as e:
|
| 344 |
+
print(e)
|
| 345 |
+
for x in batch_group:
|
| 346 |
+
all_words = [len(y['words']) for y in x['metadata']]
|
| 347 |
+
print(f"Total sents: {len(all_words)}. "
|
| 348 |
+
f"Min {min(all_words)}. Max {max(all_words)}")
|
| 349 |
+
for elem in ['labels', 'd_tags']:
|
| 350 |
+
tt = x[elem]
|
| 351 |
+
print(
|
| 352 |
+
f"{elem} shape {list(tt.shape)} and min {tt.min().item()} and {tt.max().item()}")
|
| 353 |
+
for elem in ["bert", "mask", "bert-offsets"]:
|
| 354 |
+
tt = x['tokens'][elem]
|
| 355 |
+
print(
|
| 356 |
+
f"{elem} shape {list(tt.shape)} and min {tt.min().item()} and {tt.max().item()}")
|
| 357 |
+
raise e
|
| 358 |
+
|
| 359 |
+
if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
|
| 360 |
+
print(f'After forward pass - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
|
| 361 |
+
print(f'After forward pass - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
|
| 362 |
+
|
| 363 |
+
if torch.isnan(loss):
|
| 364 |
+
raise ValueError("nan loss encountered")
|
| 365 |
+
|
| 366 |
+
loss.backward()
|
| 367 |
+
|
| 368 |
+
if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
|
| 369 |
+
print(f'After backprop - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
|
| 370 |
+
print(f'After backprop - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
|
| 371 |
+
|
| 372 |
+
train_loss += loss.item() * iter_len
|
| 373 |
+
|
| 374 |
+
del batch_group, loss
|
| 375 |
+
torch.cuda.empty_cache()
|
| 376 |
+
|
| 377 |
+
if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
|
| 378 |
+
print(f'After collecting garbage - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
|
| 379 |
+
print(f'After collecting garbage - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
|
| 380 |
+
|
| 381 |
+
batch_grad_norm = self.rescale_gradients()
|
| 382 |
+
|
| 383 |
+
# This does nothing if batch_num_total is None or you are using a
|
| 384 |
+
# scheduler which doesn't update per batch.
|
| 385 |
+
if self._learning_rate_scheduler:
|
| 386 |
+
self._learning_rate_scheduler.step_batch(batch_num_total)
|
| 387 |
+
if self._momentum_scheduler:
|
| 388 |
+
self._momentum_scheduler.step_batch(batch_num_total)
|
| 389 |
+
|
| 390 |
+
if self._tensorboard.should_log_histograms_this_batch():
|
| 391 |
+
# get the magnitude of parameter updates for logging
|
| 392 |
+
# We need a copy of current parameters to compute magnitude of updates,
|
| 393 |
+
# and copy them to CPU so large models won't go OOM on the GPU.
|
| 394 |
+
param_updates = {
|
| 395 |
+
name: param.detach().cpu().clone()
|
| 396 |
+
for name, param in self.model.named_parameters()
|
| 397 |
+
}
|
| 398 |
+
if batches_this_epoch % self.accumulated_batch_count == 0 or \
|
| 399 |
+
batches_this_epoch == num_training_batches:
|
| 400 |
+
self.optimizer.step()
|
| 401 |
+
self.optimizer.zero_grad()
|
| 402 |
+
for name, param in self.model.named_parameters():
|
| 403 |
+
param_updates[name].sub_(param.detach().cpu())
|
| 404 |
+
update_norm = torch.norm(param_updates[name].view(-1))
|
| 405 |
+
param_norm = torch.norm(param.view(-1)).cpu()
|
| 406 |
+
self._tensorboard.add_train_scalar(
|
| 407 |
+
"gradient_update/" + name, update_norm / (param_norm + 1e-7)
|
| 408 |
+
)
|
| 409 |
+
else:
|
| 410 |
+
if batches_this_epoch % self.accumulated_batch_count == 0 or \
|
| 411 |
+
batches_this_epoch == num_training_batches:
|
| 412 |
+
self.optimizer.step()
|
| 413 |
+
self.optimizer.zero_grad()
|
| 414 |
+
|
| 415 |
+
# Update moving averages
|
| 416 |
+
if self._moving_average is not None:
|
| 417 |
+
self._moving_average.apply(batch_num_total)
|
| 418 |
+
|
| 419 |
+
# Update the description with the latest metrics
|
| 420 |
+
metrics = training_util.get_metrics(self.model, train_loss, batches_this_epoch)
|
| 421 |
+
description = training_util.description_from_metrics(metrics)
|
| 422 |
+
|
| 423 |
+
train_generator_tqdm.set_description(description, refresh=False)
|
| 424 |
+
|
| 425 |
+
# Log parameter values to Tensorboard
|
| 426 |
+
if self._tensorboard.should_log_this_batch():
|
| 427 |
+
self._tensorboard.log_parameter_and_gradient_statistics(self.model, batch_grad_norm)
|
| 428 |
+
self._tensorboard.log_learning_rates(self.model, self.optimizer)
|
| 429 |
+
|
| 430 |
+
self._tensorboard.add_train_scalar("loss/loss_train", metrics["loss"])
|
| 431 |
+
self._tensorboard.log_metrics({"epoch_metrics/" + k: v for k, v in metrics.items()})
|
| 432 |
+
|
| 433 |
+
if self._tensorboard.should_log_histograms_this_batch():
|
| 434 |
+
self._tensorboard.log_histograms(self.model, histogram_parameters)
|
| 435 |
+
|
| 436 |
+
if self._log_batch_size_period:
|
| 437 |
+
cur_batch = sum([training_util.get_batch_size(batch) for batch in batch_group])
|
| 438 |
+
cumulative_batch_size += cur_batch
|
| 439 |
+
if (batches_this_epoch - 1) % self._log_batch_size_period == 0:
|
| 440 |
+
average = cumulative_batch_size / batches_this_epoch
|
| 441 |
+
logger.info(f"current batch size: {cur_batch} mean batch size: {average}")
|
| 442 |
+
self._tensorboard.add_train_scalar("current_batch_size", cur_batch)
|
| 443 |
+
self._tensorboard.add_train_scalar("mean_batch_size", average)
|
| 444 |
+
|
| 445 |
+
# Save model if needed.
|
| 446 |
+
if self._model_save_interval is not None and (
|
| 447 |
+
time.time() - last_save_time > self._model_save_interval
|
| 448 |
+
):
|
| 449 |
+
last_save_time = time.time()
|
| 450 |
+
self._save_checkpoint(
|
| 451 |
+
"{0}.{1}".format(epoch, training_util.time_to_str(int(last_save_time)))
|
| 452 |
+
)
|
| 453 |
+
|
| 454 |
+
metrics = training_util.get_metrics(self.model, train_loss, batches_this_epoch, reset=True)
|
| 455 |
+
metrics["cpu_memory_MB"] = peak_cpu_usage
|
| 456 |
+
for (gpu_num, memory) in gpu_usage:
|
| 457 |
+
metrics["gpu_" + str(gpu_num) + "_memory_MB"] = memory
|
| 458 |
+
return metrics
|
| 459 |
+
|
| 460 |
+
def _validation_loss(self) -> Tuple[float, int]:
|
| 461 |
+
"""
|
| 462 |
+
Computes the validation loss. Returns it and the number of batches.
|
| 463 |
+
"""
|
| 464 |
+
logger.info("Validating")
|
| 465 |
+
|
| 466 |
+
self.model.eval()
|
| 467 |
+
|
| 468 |
+
# Replace parameter values with the shadow values from the moving averages.
|
| 469 |
+
if self._moving_average is not None:
|
| 470 |
+
self._moving_average.assign_average_value()
|
| 471 |
+
|
| 472 |
+
if self._validation_iterator is not None:
|
| 473 |
+
val_iterator = self._validation_iterator
|
| 474 |
+
else:
|
| 475 |
+
val_iterator = self.iterator
|
| 476 |
+
|
| 477 |
+
num_gpus = len(self._cuda_devices)
|
| 478 |
+
|
| 479 |
+
raw_val_generator = val_iterator(self._validation_data, num_epochs=1, shuffle=False)
|
| 480 |
+
val_generator = lazy_groups_of(raw_val_generator, num_gpus)
|
| 481 |
+
num_validation_batches = math.ceil(
|
| 482 |
+
val_iterator.get_num_batches(self._validation_data) / num_gpus
|
| 483 |
+
)
|
| 484 |
+
val_generator_tqdm = Tqdm.tqdm(val_generator, total=num_validation_batches)
|
| 485 |
+
batches_this_epoch = 0
|
| 486 |
+
val_loss = 0
|
| 487 |
+
for batch_group in val_generator_tqdm:
|
| 488 |
+
|
| 489 |
+
loss = self.batch_loss(batch_group, for_training=False)
|
| 490 |
+
if loss is not None:
|
| 491 |
+
# You shouldn't necessarily have to compute a loss for validation, so we allow for
|
| 492 |
+
# `loss` to be None. We need to be careful, though - `batches_this_epoch` is
|
| 493 |
+
# currently only used as the divisor for the loss function, so we can safely only
|
| 494 |
+
# count those batches for which we actually have a loss. If this variable ever
|
| 495 |
+
# gets used for something else, we might need to change things around a bit.
|
| 496 |
+
batches_this_epoch += 1
|
| 497 |
+
val_loss += loss.detach().cpu().numpy()
|
| 498 |
+
|
| 499 |
+
# Update the description with the latest metrics
|
| 500 |
+
val_metrics = training_util.get_metrics(self.model, val_loss, batches_this_epoch)
|
| 501 |
+
description = training_util.description_from_metrics(val_metrics)
|
| 502 |
+
val_generator_tqdm.set_description(description, refresh=False)
|
| 503 |
+
|
| 504 |
+
# Now restore the original parameter values.
|
| 505 |
+
if self._moving_average is not None:
|
| 506 |
+
self._moving_average.restore()
|
| 507 |
+
|
| 508 |
+
return val_loss, batches_this_epoch
|
| 509 |
+
|
| 510 |
+
def train(self) -> Dict[str, Any]:
|
| 511 |
+
"""
|
| 512 |
+
Trains the supplied model with the supplied parameters.
|
| 513 |
+
"""
|
| 514 |
+
try:
|
| 515 |
+
epoch_counter = self._restore_checkpoint()
|
| 516 |
+
except RuntimeError:
|
| 517 |
+
traceback.print_exc()
|
| 518 |
+
raise ConfigurationError(
|
| 519 |
+
"Could not recover training from the checkpoint. Did you mean to output to "
|
| 520 |
+
"a different serialization directory or delete the existing serialization "
|
| 521 |
+
"directory?"
|
| 522 |
+
)
|
| 523 |
+
|
| 524 |
+
training_util.enable_gradient_clipping(self.model, self._grad_clipping)
|
| 525 |
+
|
| 526 |
+
logger.info("Beginning training.")
|
| 527 |
+
|
| 528 |
+
train_metrics: Dict[str, float] = {}
|
| 529 |
+
val_metrics: Dict[str, float] = {}
|
| 530 |
+
this_epoch_val_metric: float = None
|
| 531 |
+
metrics: Dict[str, Any] = {}
|
| 532 |
+
epochs_trained = 0
|
| 533 |
+
training_start_time = time.time()
|
| 534 |
+
|
| 535 |
+
if self.cold_step_count > 0:
|
| 536 |
+
base_lr = self.optimizer.param_groups[0]['lr']
|
| 537 |
+
for param_group in self.optimizer.param_groups:
|
| 538 |
+
param_group['lr'] = self.cold_lr
|
| 539 |
+
self.model.text_field_embedder._token_embedders['bert'].set_weights(freeze=True)
|
| 540 |
+
|
| 541 |
+
metrics["best_epoch"] = self._metric_tracker.best_epoch
|
| 542 |
+
for key, value in self._metric_tracker.best_epoch_metrics.items():
|
| 543 |
+
metrics["best_validation_" + key] = value
|
| 544 |
+
|
| 545 |
+
for epoch in range(epoch_counter, self._num_epochs):
|
| 546 |
+
if epoch == self.cold_step_count and epoch != 0:
|
| 547 |
+
for param_group in self.optimizer.param_groups:
|
| 548 |
+
param_group['lr'] = base_lr
|
| 549 |
+
self.model.text_field_embedder._token_embedders['bert'].set_weights(freeze=False)
|
| 550 |
+
|
| 551 |
+
epoch_start_time = time.time()
|
| 552 |
+
train_metrics = self._train_epoch(epoch)
|
| 553 |
+
|
| 554 |
+
# get peak of memory usage
|
| 555 |
+
if "cpu_memory_MB" in train_metrics:
|
| 556 |
+
metrics["peak_cpu_memory_MB"] = max(
|
| 557 |
+
metrics.get("peak_cpu_memory_MB", 0), train_metrics["cpu_memory_MB"]
|
| 558 |
+
)
|
| 559 |
+
for key, value in train_metrics.items():
|
| 560 |
+
if key.startswith("gpu_"):
|
| 561 |
+
metrics["peak_" + key] = max(metrics.get("peak_" + key, 0), value)
|
| 562 |
+
|
| 563 |
+
# clear cache before validation
|
| 564 |
+
torch.cuda.empty_cache()
|
| 565 |
+
if self._validation_data is not None:
|
| 566 |
+
with torch.no_grad():
|
| 567 |
+
# We have a validation set, so compute all the metrics on it.
|
| 568 |
+
val_loss, num_batches = self._validation_loss()
|
| 569 |
+
val_metrics = training_util.get_metrics(
|
| 570 |
+
self.model, val_loss, num_batches, reset=True
|
| 571 |
+
)
|
| 572 |
+
|
| 573 |
+
# Check validation metric for early stopping
|
| 574 |
+
this_epoch_val_metric = val_metrics[self._validation_metric]
|
| 575 |
+
self._metric_tracker.add_metric(this_epoch_val_metric)
|
| 576 |
+
|
| 577 |
+
if self._metric_tracker.should_stop_early():
|
| 578 |
+
logger.info("Ran out of patience. Stopping training.")
|
| 579 |
+
break
|
| 580 |
+
|
| 581 |
+
self._tensorboard.log_metrics(
|
| 582 |
+
train_metrics, val_metrics=val_metrics, log_to_console=True, epoch=epoch + 1
|
| 583 |
+
) # +1 because tensorboard doesn't like 0
|
| 584 |
+
|
| 585 |
+
# Create overall metrics dict
|
| 586 |
+
training_elapsed_time = time.time() - training_start_time
|
| 587 |
+
metrics["training_duration"] = str(datetime.timedelta(seconds=training_elapsed_time))
|
| 588 |
+
metrics["training_start_epoch"] = epoch_counter
|
| 589 |
+
metrics["training_epochs"] = epochs_trained
|
| 590 |
+
metrics["epoch"] = epoch
|
| 591 |
+
|
| 592 |
+
for key, value in train_metrics.items():
|
| 593 |
+
metrics["training_" + key] = value
|
| 594 |
+
for key, value in val_metrics.items():
|
| 595 |
+
metrics["validation_" + key] = value
|
| 596 |
+
|
| 597 |
+
# if self.cold_step_count <= epoch:
|
| 598 |
+
self.scheduler.step(metrics['validation_loss'])
|
| 599 |
+
|
| 600 |
+
if self._metric_tracker.is_best_so_far():
|
| 601 |
+
# Update all the best_ metrics.
|
| 602 |
+
# (Otherwise they just stay the same as they were.)
|
| 603 |
+
metrics["best_epoch"] = epoch
|
| 604 |
+
for key, value in val_metrics.items():
|
| 605 |
+
metrics["best_validation_" + key] = value
|
| 606 |
+
|
| 607 |
+
self._metric_tracker.best_epoch_metrics = val_metrics
|
| 608 |
+
|
| 609 |
+
if self._serialization_dir:
|
| 610 |
+
dump_metrics(
|
| 611 |
+
os.path.join(self._serialization_dir, f"metrics_epoch_{epoch}.json"), metrics
|
| 612 |
+
)
|
| 613 |
+
|
| 614 |
+
# The Scheduler API is agnostic to whether your schedule requires a validation metric -
|
| 615 |
+
# if it doesn't, the validation metric passed here is ignored.
|
| 616 |
+
if self._learning_rate_scheduler:
|
| 617 |
+
self._learning_rate_scheduler.step(this_epoch_val_metric, epoch)
|
| 618 |
+
if self._momentum_scheduler:
|
| 619 |
+
self._momentum_scheduler.step(this_epoch_val_metric, epoch)
|
| 620 |
+
|
| 621 |
+
self._save_checkpoint(epoch)
|
| 622 |
+
|
| 623 |
+
epoch_elapsed_time = time.time() - epoch_start_time
|
| 624 |
+
logger.info("Epoch duration: %s", datetime.timedelta(seconds=epoch_elapsed_time))
|
| 625 |
+
|
| 626 |
+
if epoch < self._num_epochs - 1:
|
| 627 |
+
training_elapsed_time = time.time() - training_start_time
|
| 628 |
+
estimated_time_remaining = training_elapsed_time * (
|
| 629 |
+
(self._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1
|
| 630 |
+
)
|
| 631 |
+
formatted_time = str(datetime.timedelta(seconds=int(estimated_time_remaining)))
|
| 632 |
+
logger.info("Estimated training time remaining: %s", formatted_time)
|
| 633 |
+
|
| 634 |
+
epochs_trained += 1
|
| 635 |
+
|
| 636 |
+
# make sure pending events are flushed to disk and files are closed properly
|
| 637 |
+
# self._tensorboard.close()
|
| 638 |
+
|
| 639 |
+
# Load the best model state before returning
|
| 640 |
+
best_model_state = self._checkpointer.best_model_state()
|
| 641 |
+
if best_model_state:
|
| 642 |
+
self.model.load_state_dict(best_model_state)
|
| 643 |
+
|
| 644 |
+
return metrics
|
| 645 |
+
|
| 646 |
+
def _save_checkpoint(self, epoch: Union[int, str]) -> None:
|
| 647 |
+
"""
|
| 648 |
+
Saves a checkpoint of the model to self._serialization_dir.
|
| 649 |
+
Is a no-op if self._serialization_dir is None.
|
| 650 |
+
|
| 651 |
+
Parameters
|
| 652 |
+
----------
|
| 653 |
+
epoch : Union[int, str], required.
|
| 654 |
+
The epoch of training. If the checkpoint is saved in the middle
|
| 655 |
+
of an epoch, the parameter is a string with the epoch and timestamp.
|
| 656 |
+
"""
|
| 657 |
+
# If moving averages are used for parameters, we save
|
| 658 |
+
# the moving average values into checkpoint, instead of the current values.
|
| 659 |
+
if self._moving_average is not None:
|
| 660 |
+
self._moving_average.assign_average_value()
|
| 661 |
+
|
| 662 |
+
# These are the training states we need to persist.
|
| 663 |
+
training_states = {
|
| 664 |
+
"metric_tracker": self._metric_tracker.state_dict(),
|
| 665 |
+
"optimizer": self.optimizer.state_dict(),
|
| 666 |
+
"batch_num_total": self._batch_num_total,
|
| 667 |
+
}
|
| 668 |
+
|
| 669 |
+
# If we have a learning rate or momentum scheduler, we should persist them too.
|
| 670 |
+
if self._learning_rate_scheduler is not None:
|
| 671 |
+
training_states["learning_rate_scheduler"] = self._learning_rate_scheduler.state_dict()
|
| 672 |
+
if self._momentum_scheduler is not None:
|
| 673 |
+
training_states["momentum_scheduler"] = self._momentum_scheduler.state_dict()
|
| 674 |
+
|
| 675 |
+
self._checkpointer.save_checkpoint(
|
| 676 |
+
model_state=self.model.state_dict(),
|
| 677 |
+
epoch=epoch,
|
| 678 |
+
training_states=training_states,
|
| 679 |
+
is_best_so_far=self._metric_tracker.is_best_so_far(),
|
| 680 |
+
)
|
| 681 |
+
|
| 682 |
+
# Restore the original values for parameters so that training will not be affected.
|
| 683 |
+
if self._moving_average is not None:
|
| 684 |
+
self._moving_average.restore()
|
| 685 |
+
|
| 686 |
+
def _restore_checkpoint(self) -> int:
|
| 687 |
+
"""
|
| 688 |
+
Restores the model and training state from the last saved checkpoint.
|
| 689 |
+
This includes an epoch count and optimizer state, which is serialized separately
|
| 690 |
+
from model parameters. This function should only be used to continue training -
|
| 691 |
+
if you wish to load a model for inference/load parts of a model into a new
|
| 692 |
+
computation graph, you should use the native Pytorch functions:
|
| 693 |
+
`` model.load_state_dict(torch.load("/path/to/model/weights.th"))``
|
| 694 |
+
|
| 695 |
+
If ``self._serialization_dir`` does not exist or does not contain any checkpointed weights,
|
| 696 |
+
this function will do nothing and return 0.
|
| 697 |
+
|
| 698 |
+
Returns
|
| 699 |
+
-------
|
| 700 |
+
epoch: int
|
| 701 |
+
The epoch at which to resume training, which should be one after the epoch
|
| 702 |
+
in the saved training state.
|
| 703 |
+
"""
|
| 704 |
+
model_state, training_state = self._checkpointer.restore_checkpoint()
|
| 705 |
+
|
| 706 |
+
if not training_state:
|
| 707 |
+
# No checkpoint to restore, start at 0
|
| 708 |
+
return 0
|
| 709 |
+
|
| 710 |
+
self.model.load_state_dict(model_state)
|
| 711 |
+
self.optimizer.load_state_dict(training_state["optimizer"])
|
| 712 |
+
if self._learning_rate_scheduler is not None \
|
| 713 |
+
and "learning_rate_scheduler" in training_state:
|
| 714 |
+
self._learning_rate_scheduler.load_state_dict(training_state["learning_rate_scheduler"])
|
| 715 |
+
if self._momentum_scheduler is not None and "momentum_scheduler" in training_state:
|
| 716 |
+
self._momentum_scheduler.load_state_dict(training_state["momentum_scheduler"])
|
| 717 |
+
training_util.move_optimizer_to_cuda(self.optimizer)
|
| 718 |
+
|
| 719 |
+
# Currently the ``training_state`` contains a serialized ``MetricTracker``.
|
| 720 |
+
if "metric_tracker" in training_state:
|
| 721 |
+
self._metric_tracker.load_state_dict(training_state["metric_tracker"])
|
| 722 |
+
# It used to be the case that we tracked ``val_metric_per_epoch``.
|
| 723 |
+
elif "val_metric_per_epoch" in training_state:
|
| 724 |
+
self._metric_tracker.clear()
|
| 725 |
+
self._metric_tracker.add_metrics(training_state["val_metric_per_epoch"])
|
| 726 |
+
# And before that we didn't track anything.
|
| 727 |
+
else:
|
| 728 |
+
self._metric_tracker.clear()
|
| 729 |
+
|
| 730 |
+
if isinstance(training_state["epoch"], int):
|
| 731 |
+
epoch_to_return = training_state["epoch"] + 1
|
| 732 |
+
else:
|
| 733 |
+
epoch_to_return = int(training_state["epoch"].split(".")[0]) + 1
|
| 734 |
+
|
| 735 |
+
# For older checkpoints with batch_num_total missing, default to old behavior where
|
| 736 |
+
# it is unchanged.
|
| 737 |
+
batch_num_total = training_state.get("batch_num_total")
|
| 738 |
+
if batch_num_total is not None:
|
| 739 |
+
self._batch_num_total = batch_num_total
|
| 740 |
+
|
| 741 |
+
return epoch_to_return
|
| 742 |
+
|
| 743 |
+
# Requires custom from_params.
|
| 744 |
+
@classmethod
|
| 745 |
+
def from_params( # type: ignore
|
| 746 |
+
cls,
|
| 747 |
+
model: Model,
|
| 748 |
+
serialization_dir: str,
|
| 749 |
+
iterator: DataIterator,
|
| 750 |
+
train_data: Iterable[Instance],
|
| 751 |
+
validation_data: Optional[Iterable[Instance]],
|
| 752 |
+
params: Params,
|
| 753 |
+
validation_iterator: DataIterator = None,
|
| 754 |
+
) -> "Trainer":
|
| 755 |
+
|
| 756 |
+
patience = params.pop_int("patience", None)
|
| 757 |
+
validation_metric = params.pop("validation_metric", "-loss")
|
| 758 |
+
shuffle = params.pop_bool("shuffle", True)
|
| 759 |
+
num_epochs = params.pop_int("num_epochs", 20)
|
| 760 |
+
cuda_device = parse_cuda_device(params.pop("cuda_device", -1))
|
| 761 |
+
grad_norm = params.pop_float("grad_norm", None)
|
| 762 |
+
grad_clipping = params.pop_float("grad_clipping", None)
|
| 763 |
+
lr_scheduler_params = params.pop("learning_rate_scheduler", None)
|
| 764 |
+
momentum_scheduler_params = params.pop("momentum_scheduler", None)
|
| 765 |
+
|
| 766 |
+
if isinstance(cuda_device, list):
|
| 767 |
+
model_device = cuda_device[0]
|
| 768 |
+
else:
|
| 769 |
+
model_device = cuda_device
|
| 770 |
+
if model_device >= 0:
|
| 771 |
+
# Moving model to GPU here so that the optimizer state gets constructed on
|
| 772 |
+
# the right device.
|
| 773 |
+
model = model.cuda(model_device)
|
| 774 |
+
|
| 775 |
+
parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad]
|
| 776 |
+
optimizer = Optimizer.from_params(parameters, params.pop("optimizer"))
|
| 777 |
+
if "moving_average" in params:
|
| 778 |
+
moving_average = MovingAverage.from_params(
|
| 779 |
+
params.pop("moving_average"), parameters=parameters
|
| 780 |
+
)
|
| 781 |
+
else:
|
| 782 |
+
moving_average = None
|
| 783 |
+
|
| 784 |
+
if lr_scheduler_params:
|
| 785 |
+
lr_scheduler = LearningRateScheduler.from_params(optimizer, lr_scheduler_params)
|
| 786 |
+
else:
|
| 787 |
+
lr_scheduler = None
|
| 788 |
+
if momentum_scheduler_params:
|
| 789 |
+
momentum_scheduler = MomentumScheduler.from_params(optimizer, momentum_scheduler_params)
|
| 790 |
+
else:
|
| 791 |
+
momentum_scheduler = None
|
| 792 |
+
|
| 793 |
+
if "checkpointer" in params:
|
| 794 |
+
if "keep_serialized_model_every_num_seconds" in params \
|
| 795 |
+
or "num_serialized_models_to_keep" in params:
|
| 796 |
+
raise ConfigurationError(
|
| 797 |
+
"Checkpointer may be initialized either from the 'checkpointer' key or from the "
|
| 798 |
+
"keys 'num_serialized_models_to_keep' and 'keep_serialized_model_every_num_seconds'"
|
| 799 |
+
" but the passed config uses both methods."
|
| 800 |
+
)
|
| 801 |
+
checkpointer = Checkpointer.from_params(params.pop("checkpointer"))
|
| 802 |
+
else:
|
| 803 |
+
num_serialized_models_to_keep = params.pop_int("num_serialized_models_to_keep", 20)
|
| 804 |
+
keep_serialized_model_every_num_seconds = params.pop_int(
|
| 805 |
+
"keep_serialized_model_every_num_seconds", None
|
| 806 |
+
)
|
| 807 |
+
checkpointer = Checkpointer(
|
| 808 |
+
serialization_dir=serialization_dir,
|
| 809 |
+
num_serialized_models_to_keep=num_serialized_models_to_keep,
|
| 810 |
+
keep_serialized_model_every_num_seconds=keep_serialized_model_every_num_seconds,
|
| 811 |
+
)
|
| 812 |
+
model_save_interval = params.pop_float("model_save_interval", None)
|
| 813 |
+
summary_interval = params.pop_int("summary_interval", 100)
|
| 814 |
+
histogram_interval = params.pop_int("histogram_interval", None)
|
| 815 |
+
should_log_parameter_statistics = params.pop_bool("should_log_parameter_statistics", True)
|
| 816 |
+
should_log_learning_rate = params.pop_bool("should_log_learning_rate", False)
|
| 817 |
+
log_batch_size_period = params.pop_int("log_batch_size_period", None)
|
| 818 |
+
|
| 819 |
+
params.assert_empty(cls.__name__)
|
| 820 |
+
return cls(
|
| 821 |
+
model,
|
| 822 |
+
optimizer,
|
| 823 |
+
iterator,
|
| 824 |
+
train_data,
|
| 825 |
+
validation_data,
|
| 826 |
+
patience=patience,
|
| 827 |
+
validation_metric=validation_metric,
|
| 828 |
+
validation_iterator=validation_iterator,
|
| 829 |
+
shuffle=shuffle,
|
| 830 |
+
num_epochs=num_epochs,
|
| 831 |
+
serialization_dir=serialization_dir,
|
| 832 |
+
cuda_device=cuda_device,
|
| 833 |
+
grad_norm=grad_norm,
|
| 834 |
+
grad_clipping=grad_clipping,
|
| 835 |
+
learning_rate_scheduler=lr_scheduler,
|
| 836 |
+
momentum_scheduler=momentum_scheduler,
|
| 837 |
+
checkpointer=checkpointer,
|
| 838 |
+
model_save_interval=model_save_interval,
|
| 839 |
+
summary_interval=summary_interval,
|
| 840 |
+
histogram_interval=histogram_interval,
|
| 841 |
+
should_log_parameter_statistics=should_log_parameter_statistics,
|
| 842 |
+
should_log_learning_rate=should_log_learning_rate,
|
| 843 |
+
log_batch_size_period=log_batch_size_period,
|
| 844 |
+
moving_average=moving_average,
|
| 845 |
+
)
|
output_vocabulary/d_tags.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CORRECT
|
| 2 |
+
INCORRECT
|
| 3 |
+
@@UNKNOWN@@
|
| 4 |
+
@@PADDING@@
|
output_vocabulary/labels.txt
ADDED
|
@@ -0,0 +1,5002 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
$KEEP
|
| 2 |
+
$DELETE
|
| 3 |
+
$TRANSFORM_CASE_CAPITAL
|
| 4 |
+
$APPEND_the
|
| 5 |
+
$APPEND_,
|
| 6 |
+
$APPEND_a
|
| 7 |
+
$TRANSFORM_VERB_VB_VBZ
|
| 8 |
+
$TRANSFORM_AGREEMENT_PLURAL
|
| 9 |
+
$TRANSFORM_CASE_LOWER
|
| 10 |
+
$TRANSFORM_VERB_VB_VBN
|
| 11 |
+
$REPLACE_the
|
| 12 |
+
$REPLACE_a
|
| 13 |
+
$REPLACE_to
|
| 14 |
+
$TRANSFORM_VERB_VB_VBG
|
| 15 |
+
$REPLACE_.
|
| 16 |
+
$APPEND_to
|
| 17 |
+
$REPLACE_,
|
| 18 |
+
$REPLACE_in
|
| 19 |
+
$REPLACE_was
|
| 20 |
+
$TRANSFORM_VERB_VBZ_VB
|
| 21 |
+
$TRANSFORM_AGREEMENT_SINGULAR
|
| 22 |
+
$APPEND_I
|
| 23 |
+
$APPEND_.
|
| 24 |
+
$REPLACE_for
|
| 25 |
+
$REPLACE_I
|
| 26 |
+
$APPEND_(
|
| 27 |
+
$TRANSFORM_VERB_VBG_VB
|
| 28 |
+
$REPLACE_is
|
| 29 |
+
$REPLACE_have
|
| 30 |
+
$REPLACE_on
|
| 31 |
+
$REPLACE_are
|
| 32 |
+
$REPLACE_of
|
| 33 |
+
$REPLACE_it
|
| 34 |
+
$TRANSFORM_VERB_VBN_VB
|
| 35 |
+
$REPLACE_that
|
| 36 |
+
$APPEND_in
|
| 37 |
+
$REPLACE_and
|
| 38 |
+
$APPEND_it
|
| 39 |
+
$APPEND_that
|
| 40 |
+
$REPLACE_at
|
| 41 |
+
$APPEND_for
|
| 42 |
+
$APPEND_of
|
| 43 |
+
$APPEND_and
|
| 44 |
+
$REPLACE_an
|
| 45 |
+
$REPLACE_my
|
| 46 |
+
$REPLACE_had
|
| 47 |
+
$APPEND_have
|
| 48 |
+
$APPEND_an
|
| 49 |
+
$REPLACE_has
|
| 50 |
+
$APPEND_my
|
| 51 |
+
$APPEND_is
|
| 52 |
+
$APPEND_The
|
| 53 |
+
$APPEND_will
|
| 54 |
+
$REPLACE_with
|
| 55 |
+
$REPLACE_were
|
| 56 |
+
$REPLACE_be
|
| 57 |
+
$TRANSFORM_VERB_VBN_VBG
|
| 58 |
+
$APPEND_``
|
| 59 |
+
$REPLACE_do
|
| 60 |
+
$TRANSFORM_VERB_VBG_VBN
|
| 61 |
+
$REPLACE_this
|
| 62 |
+
$REPLACE_will
|
| 63 |
+
$TRANSFORM_VERB_VB_VBD
|
| 64 |
+
$APPEND_was
|
| 65 |
+
$REPLACE_n't
|
| 66 |
+
$APPEND_about
|
| 67 |
+
$REPLACE_from
|
| 68 |
+
$REPLACE_about
|
| 69 |
+
$REPLACE_It
|
| 70 |
+
$APPEND_on
|
| 71 |
+
$REPLACE_would
|
| 72 |
+
$MERGE_SPACE
|
| 73 |
+
$APPEND_at
|
| 74 |
+
$APPEND_'s
|
| 75 |
+
$REPLACE_as
|
| 76 |
+
$REPLACE_'s
|
| 77 |
+
$REPLACE_could
|
| 78 |
+
$APPEND_with
|
| 79 |
+
$REPLACE_did
|
| 80 |
+
$REPLACE_them
|
| 81 |
+
$REPLACE_The
|
| 82 |
+
$REPLACE_by
|
| 83 |
+
$REPLACE_so
|
| 84 |
+
$REPLACE_not
|
| 85 |
+
$REPLACE_can
|
| 86 |
+
$APPEND_am
|
| 87 |
+
$APPEND_be
|
| 88 |
+
$REPLACE_because
|
| 89 |
+
$APPEND_/
|
| 90 |
+
$REPLACE_they
|
| 91 |
+
$REPLACE_am
|
| 92 |
+
$APPEND_are
|
| 93 |
+
$TRANSFORM_VERB_VBZ_VBN
|
| 94 |
+
$REPLACE_'m
|
| 95 |
+
$REPLACE_their
|
| 96 |
+
$TRANSFORM_VERB_VBN_VBZ
|
| 97 |
+
$APPEND_had
|
| 98 |
+
$APPEND_would
|
| 99 |
+
$APPEND_-
|
| 100 |
+
$REPLACE_(
|
| 101 |
+
$TRANSFORM_VERB_VBN_VBD
|
| 102 |
+
$REPLACE_very
|
| 103 |
+
$REPLACE_people
|
| 104 |
+
$REPLACE_get
|
| 105 |
+
$REPLACE_there
|
| 106 |
+
$REPLACE_?
|
| 107 |
+
$APPEND_do
|
| 108 |
+
$REPLACE_;
|
| 109 |
+
$REPLACE_me
|
| 110 |
+
$REPLACE_one
|
| 111 |
+
$REPLACE_been
|
| 112 |
+
$APPEND_so
|
| 113 |
+
$APPEND_)
|
| 114 |
+
$APPEND_'m
|
| 115 |
+
$REPLACE_or
|
| 116 |
+
$REPLACE_some
|
| 117 |
+
$REPLACE_you
|
| 118 |
+
$TRANSFORM_VERB_VBD_VBN
|
| 119 |
+
$APPEND_as
|
| 120 |
+
$REPLACE_like
|
| 121 |
+
$TRANSFORM_VERB_VBD_VB
|
| 122 |
+
$REPLACE_which
|
| 123 |
+
$APPEND_has
|
| 124 |
+
$REPLACE_these
|
| 125 |
+
$REPLACE_This
|
| 126 |
+
$APPEND_from
|
| 127 |
+
$REPLACE_when
|
| 128 |
+
$APPEND_'ve
|
| 129 |
+
$REPLACE_``
|
| 130 |
+
$APPEND_there
|
| 131 |
+
$REPLACE_does
|
| 132 |
+
$APPEND_also
|
| 133 |
+
$APPEND_It
|
| 134 |
+
$APPEND_can
|
| 135 |
+
$REPLACE_:
|
| 136 |
+
$REPLACE_other
|
| 137 |
+
$APPEND_more
|
| 138 |
+
$REPLACE_want
|
| 139 |
+
$REPLACE_we
|
| 140 |
+
$REPLACE_'ve
|
| 141 |
+
$REPLACE_what
|
| 142 |
+
$REPLACE_more
|
| 143 |
+
$REPLACE_many
|
| 144 |
+
$REPLACE_into
|
| 145 |
+
$APPEND_been
|
| 146 |
+
$APPEND_by
|
| 147 |
+
$APPEND_this
|
| 148 |
+
$REPLACE_went
|
| 149 |
+
$REPLACE_time
|
| 150 |
+
$APPEND_only
|
| 151 |
+
$TRANSFORM_VERB_VBG_VBZ
|
| 152 |
+
$REPLACE_go
|
| 153 |
+
$REPLACE_while
|
| 154 |
+
$REPLACE_but
|
| 155 |
+
$APPEND_all
|
| 156 |
+
$APPEND_if
|
| 157 |
+
$REPLACE_should
|
| 158 |
+
$REPLACE_out
|
| 159 |
+
$APPEND_'
|
| 160 |
+
$REPLACE_during
|
| 161 |
+
$REPLACE_much
|
| 162 |
+
$APPEND_like
|
| 163 |
+
$REPLACE_!
|
| 164 |
+
$APPEND_but
|
| 165 |
+
$REPLACE_if
|
| 166 |
+
$REPLACE_since
|
| 167 |
+
$APPEND_people
|
| 168 |
+
$APPEND_because
|
| 169 |
+
$REPLACE_any
|
| 170 |
+
$APPEND_A
|
| 171 |
+
$REPLACE_another
|
| 172 |
+
$REPLACE_They
|
| 173 |
+
$APPEND_you
|
| 174 |
+
$REPLACE_ca
|
| 175 |
+
$REPLACE_our
|
| 176 |
+
$REPLACE_who
|
| 177 |
+
$APPEND_now
|
| 178 |
+
$REPLACE_really
|
| 179 |
+
$REPLACE_make
|
| 180 |
+
$APPEND_me
|
| 181 |
+
$APPEND_who
|
| 182 |
+
$REPLACE_In
|
| 183 |
+
$REPLACE_her
|
| 184 |
+
$REPLACE_English
|
| 185 |
+
$APPEND_some
|
| 186 |
+
$APPEND_when
|
| 187 |
+
$APPEND_still
|
| 188 |
+
$APPEND_them
|
| 189 |
+
$REPLACE_use
|
| 190 |
+
$APPEND_just
|
| 191 |
+
$REPLACE_things
|
| 192 |
+
$REPLACE_/
|
| 193 |
+
$REPLACE_got
|
| 194 |
+
$REPLACE_My
|
| 195 |
+
$APPEND_were
|
| 196 |
+
$REPLACE_he
|
| 197 |
+
$REPLACE_countries
|
| 198 |
+
$APPEND_their
|
| 199 |
+
$REPLACE_using
|
| 200 |
+
$TRANSFORM_VERB_VBZ_VBG
|
| 201 |
+
$APPEND_'ll
|
| 202 |
+
$REPLACE_being
|
| 203 |
+
$REPLACE_too
|
| 204 |
+
$APPEND_we
|
| 205 |
+
$APPEND_they
|
| 206 |
+
$REPLACE_lot
|
| 207 |
+
$REPLACE_-
|
| 208 |
+
$REPLACE_all
|
| 209 |
+
$REPLACE_good
|
| 210 |
+
$APPEND_[
|
| 211 |
+
$REPLACE_every
|
| 212 |
+
$REPLACE_)
|
| 213 |
+
$REPLACE_your
|
| 214 |
+
$APPEND_My
|
| 215 |
+
$APPEND_even
|
| 216 |
+
$APPEND_out
|
| 217 |
+
$REPLACE_his
|
| 218 |
+
$REPLACE_made
|
| 219 |
+
$APPEND_any
|
| 220 |
+
$REPLACE_where
|
| 221 |
+
$APPEND_which
|
| 222 |
+
$REPLACE_work
|
| 223 |
+
$REPLACE_used
|
| 224 |
+
$APPEND_one
|
| 225 |
+
$REPLACE_take
|
| 226 |
+
$APPEND_In
|
| 227 |
+
$REPLACE_There
|
| 228 |
+
$REPLACE_up
|
| 229 |
+
$REPLACE_how
|
| 230 |
+
$REPLACE_myself
|
| 231 |
+
$APPEND_what
|
| 232 |
+
$APPEND_very
|
| 233 |
+
$APPEND_?
|
| 234 |
+
$REPLACE_become
|
| 235 |
+
$REPLACE_think
|
| 236 |
+
$REPLACE_going
|
| 237 |
+
$REPLACE_Japanese
|
| 238 |
+
$REPLACE_well
|
| 239 |
+
$APPEND_being
|
| 240 |
+
$APPEND_or
|
| 241 |
+
$REPLACE_just
|
| 242 |
+
$REPLACE_write
|
| 243 |
+
$REPLACE_those
|
| 244 |
+
$REPLACE_feel
|
| 245 |
+
$REPLACE_until
|
| 246 |
+
$APPEND_However
|
| 247 |
+
$APPEND_our
|
| 248 |
+
$REPLACE_something
|
| 249 |
+
$APPEND_get
|
| 250 |
+
$REPLACE_diary
|
| 251 |
+
$REPLACE_no
|
| 252 |
+
$REPLACE_over
|
| 253 |
+
$APPEND_time
|
| 254 |
+
$APPEND_then
|
| 255 |
+
$REPLACE_see
|
| 256 |
+
$REPLACE_writing
|
| 257 |
+
$REPLACE_wo
|
| 258 |
+
$REPLACE_only
|
| 259 |
+
$REPLACE_'ll
|
| 260 |
+
$REPLACE_after
|
| 261 |
+
$REPLACE_know
|
| 262 |
+
$REPLACE_anything
|
| 263 |
+
$REPLACE_now
|
| 264 |
+
$REPLACE_That
|
| 265 |
+
$REPLACE_first
|
| 266 |
+
$REPLACE_than
|
| 267 |
+
$APPEND_up
|
| 268 |
+
$REPLACE_better
|
| 269 |
+
$REPLACE_hope
|
| 270 |
+
$REPLACE_through
|
| 271 |
+
$REPLACE_doing
|
| 272 |
+
$APPEND_go
|
| 273 |
+
$REPLACE_then
|
| 274 |
+
$APPEND_too
|
| 275 |
+
$REPLACE_studying
|
| 276 |
+
$REPLACE_its
|
| 277 |
+
$REPLACE_learn
|
| 278 |
+
$REPLACE_lives
|
| 279 |
+
$REPLACE_having
|
| 280 |
+
$REPLACE_told
|
| 281 |
+
$REPLACE_What
|
| 282 |
+
$REPLACE_she
|
| 283 |
+
$REPLACE_thought
|
| 284 |
+
$APPEND_not
|
| 285 |
+
$REPLACE_around
|
| 286 |
+
$REPLACE_him
|
| 287 |
+
$REPLACE_different
|
| 288 |
+
$APPEND_could
|
| 289 |
+
$APPEND_such
|
| 290 |
+
$REPLACE_able
|
| 291 |
+
$REPLACE_On
|
| 292 |
+
$REPLACE_before
|
| 293 |
+
$REPLACE_though
|
| 294 |
+
$REPLACE_also
|
| 295 |
+
$APPEND_entry
|
| 296 |
+
$REPLACE_learned
|
| 297 |
+
$TRANSFORM_CASE_UPPER
|
| 298 |
+
$APPEND_again
|
| 299 |
+
$REPLACE_friends
|
| 300 |
+
$APPEND_This
|
| 301 |
+
$REPLACE_might
|
| 302 |
+
$REPLACE_A
|
| 303 |
+
$REPLACE_However
|
| 304 |
+
$APPEND_really
|
| 305 |
+
$REPLACE_started
|
| 306 |
+
$REPLACE_improve
|
| 307 |
+
$APPEND_English
|
| 308 |
+
$REPLACE_years
|
| 309 |
+
$REPLACE_'
|
| 310 |
+
$REPLACE_most
|
| 311 |
+
$APPEND_how
|
| 312 |
+
$REPLACE_day
|
| 313 |
+
$APPEND_:
|
| 314 |
+
$APPEND_today
|
| 315 |
+
$REPLACE_find
|
| 316 |
+
$REPLACE_help
|
| 317 |
+
$APPEND_should
|
| 318 |
+
$REPLACE_We
|
| 319 |
+
$REPLACE_even
|
| 320 |
+
$REPLACE_may
|
| 321 |
+
$REPLACE_left
|
| 322 |
+
$REPLACE_called
|
| 323 |
+
$APPEND_did
|
| 324 |
+
$REPLACE_course
|
| 325 |
+
$REPLACE_These
|
| 326 |
+
$REPLACE_understand
|
| 327 |
+
$REPLACE_So
|
| 328 |
+
$REPLACE_said
|
| 329 |
+
$REPLACE_took
|
| 330 |
+
$REPLACE_person
|
| 331 |
+
$REPLACE_school
|
| 332 |
+
$REPLACE_such
|
| 333 |
+
$APPEND_called
|
| 334 |
+
$REPLACE_At
|
| 335 |
+
$APPEND_before
|
| 336 |
+
$REPLACE_way
|
| 337 |
+
$APPEND_he
|
| 338 |
+
$REPLACE_everyone
|
| 339 |
+
$REPLACE_here
|
| 340 |
+
$REPLACE_When
|
| 341 |
+
$REPLACE_everything
|
| 342 |
+
$REPLACE_need
|
| 343 |
+
$APPEND_her
|
| 344 |
+
$REPLACE_Because
|
| 345 |
+
$TRANSFORM_VERB_VBD_VBG
|
| 346 |
+
$REPLACE_say
|
| 347 |
+
$REPLACE_study
|
| 348 |
+
$APPEND_much
|
| 349 |
+
$REPLACE_still
|
| 350 |
+
$REPLACE_found
|
| 351 |
+
$APPEND_always
|
| 352 |
+
$REPLACE_last
|
| 353 |
+
$APPEND_other
|
| 354 |
+
$TRANSFORM_VERB_VBG_VBD
|
| 355 |
+
$REPLACE_learning
|
| 356 |
+
$REPLACE_correct
|
| 357 |
+
$REPLACE_two
|
| 358 |
+
$REPLACE_days
|
| 359 |
+
$REPLACE_difficult
|
| 360 |
+
$REPLACE_never
|
| 361 |
+
$APPEND__
|
| 362 |
+
$REPLACE_'d
|
| 363 |
+
$APPEND_your
|
| 364 |
+
$REPLACE_us
|
| 365 |
+
$REPLACE_foreign
|
| 366 |
+
$REPLACE_entry
|
| 367 |
+
$APPEND_!
|
| 368 |
+
$REPLACE_Japan
|
| 369 |
+
$APPEND_;
|
| 370 |
+
$REPLACE_tell
|
| 371 |
+
$REPLACE_give
|
| 372 |
+
$REPLACE_decided
|
| 373 |
+
$APPEND_during
|
| 374 |
+
$REPLACE_Also
|
| 375 |
+
$APPEND_his
|
| 376 |
+
$REPLACE_speak
|
| 377 |
+
$REPLACE_came
|
| 378 |
+
$REPLACE_little
|
| 379 |
+
$APPEND_while
|
| 380 |
+
$TRANSFORM_VERB_VBZ_VBD
|
| 381 |
+
$APPEND_things
|
| 382 |
+
$REPLACE_especially
|
| 383 |
+
$REPLACE_Recently
|
| 384 |
+
$REPLACE_come
|
| 385 |
+
$APPEND_especially
|
| 386 |
+
$REPLACE_needed
|
| 387 |
+
$APPEND_make
|
| 388 |
+
$REPLACE_whether
|
| 389 |
+
$REPLACE_felt
|
| 390 |
+
$REPLACE_Although
|
| 391 |
+
$REPLACE_someone
|
| 392 |
+
$REPLACE_As
|
| 393 |
+
$REPLACE_great
|
| 394 |
+
$REPLACE_today
|
| 395 |
+
$APPEND_since
|
| 396 |
+
$REPLACE_hard
|
| 397 |
+
$REPLACE_For
|
| 398 |
+
$REPLACE_became
|
| 399 |
+
$REPLACE_between
|
| 400 |
+
$REPLACE_beautiful
|
| 401 |
+
$REPLACE_life
|
| 402 |
+
$REPLACE_why
|
| 403 |
+
$APPEND_though
|
| 404 |
+
$APPEND_There
|
| 405 |
+
$APPEND_going
|
| 406 |
+
$REPLACE_long
|
| 407 |
+
$APPEND_where
|
| 408 |
+
$REPLACE_believe
|
| 409 |
+
$REPLACE_website
|
| 410 |
+
$REPLACE_heard
|
| 411 |
+
$REPLACE_job
|
| 412 |
+
$REPLACE_home
|
| 413 |
+
$REPLACE_'re
|
| 414 |
+
$REPLACE_But
|
| 415 |
+
$REPLACE_anyone
|
| 416 |
+
$REPLACE_again
|
| 417 |
+
$REPLACE_bad
|
| 418 |
+
$REPLACE_recently
|
| 419 |
+
$APPEND_here
|
| 420 |
+
$REPLACE_practice
|
| 421 |
+
$REPLACE_often
|
| 422 |
+
$APPEND_got
|
| 423 |
+
$APPEND_feel
|
| 424 |
+
$REPLACE_saw
|
| 425 |
+
$REPLACE_quickly
|
| 426 |
+
$REPLACE_language
|
| 427 |
+
$REPLACE_wanted
|
| 428 |
+
$APPEND_each
|
| 429 |
+
$REPLACE_put
|
| 430 |
+
$REPLACE_done
|
| 431 |
+
$REPLACE_minutes
|
| 432 |
+
$REPLACE_each
|
| 433 |
+
$APPEND_she
|
| 434 |
+
$REPLACE_grammar
|
| 435 |
+
$REPLACE_watch
|
| 436 |
+
$REPLACE_happy
|
| 437 |
+
$REPLACE_back
|
| 438 |
+
$REPLACE_friend
|
| 439 |
+
$REPLACE_off
|
| 440 |
+
$REPLACE_He
|
| 441 |
+
$REPLACE_Since
|
| 442 |
+
$APPEND_something
|
| 443 |
+
$APPEND_using
|
| 444 |
+
$APPEND_At
|
| 445 |
+
$REPLACE_university
|
| 446 |
+
$REPLACE_country
|
| 447 |
+
$REPLACE_watching
|
| 448 |
+
$REPLACE_received
|
| 449 |
+
$REPLACE_enough
|
| 450 |
+
$REPLACE_weather
|
| 451 |
+
$REPLACE_usually
|
| 452 |
+
$APPEND_back
|
| 453 |
+
$REPLACE_happened
|
| 454 |
+
$APPEND_having
|
| 455 |
+
$REPLACE_always
|
| 456 |
+
$APPEND_does
|
| 457 |
+
$REPLACE_After
|
| 458 |
+
$REPLACE_try
|
| 459 |
+
$REPLACE_start
|
| 460 |
+
$APPEND_already
|
| 461 |
+
$REPLACE_talk
|
| 462 |
+
$REPLACE_thing
|
| 463 |
+
$APPEND_But
|
| 464 |
+
$APPEND_For
|
| 465 |
+
$REPLACE_Then
|
| 466 |
+
$REPLACE_fun
|
| 467 |
+
$REPLACE_soon
|
| 468 |
+
$REPLACE_starting
|
| 469 |
+
$REPLACE_away
|
| 470 |
+
$APPEND_want
|
| 471 |
+
$REPLACE_asked
|
| 472 |
+
$APPEND_went
|
| 473 |
+
$REPLACE_trip
|
| 474 |
+
$REPLACE_new
|
| 475 |
+
$REPLACE_right
|
| 476 |
+
$APPEND_after
|
| 477 |
+
$REPLACE_keep
|
| 478 |
+
$REPLACE_interesting
|
| 479 |
+
$REPLACE_together
|
| 480 |
+
$REPLACE_Do
|
| 481 |
+
$APPEND_So
|
| 482 |
+
$REPLACE_beginning
|
| 483 |
+
$APPEND_myself
|
| 484 |
+
$REPLACE_getting
|
| 485 |
+
$APPEND_On
|
| 486 |
+
$REPLACE_restaurant
|
| 487 |
+
$REPLACE_looking
|
| 488 |
+
$REPLACE_children
|
| 489 |
+
$APPEND_last
|
| 490 |
+
$REPLACE_college
|
| 491 |
+
$APPEND_right
|
| 492 |
+
$REPLACE_stay
|
| 493 |
+
$REPLACE_year
|
| 494 |
+
$REPLACE_live
|
| 495 |
+
$REPLACE_travel
|
| 496 |
+
$REPLACE_favorite
|
| 497 |
+
$REPLACE_read
|
| 498 |
+
$APPEND_well
|
| 499 |
+
$REPLACE_written
|
| 500 |
+
$REPLACE_months
|
| 501 |
+
$APPEND_yet
|
| 502 |
+
$APPEND_first
|
| 503 |
+
$APPEND_most
|
| 504 |
+
$REPLACE_look
|
| 505 |
+
$REPLACE_tried
|
| 506 |
+
$REPLACE_clothes
|
| 507 |
+
$REPLACE_[
|
| 508 |
+
$REPLACE_kind
|
| 509 |
+
$APPEND_its
|
| 510 |
+
$REPLACE_&
|
| 511 |
+
$REPLACE_remember
|
| 512 |
+
$APPEND_him
|
| 513 |
+
$REPLACE_problem
|
| 514 |
+
$APPEND_*
|
| 515 |
+
$REPLACE_meet
|
| 516 |
+
$REPLACE_gave
|
| 517 |
+
$REPLACE_either
|
| 518 |
+
$REPLACE_makes
|
| 519 |
+
$REPLACE_elderly
|
| 520 |
+
$REPLACE_hobbies
|
| 521 |
+
$REPLACE_easily
|
| 522 |
+
$REPLACE_important
|
| 523 |
+
$APPEND_take
|
| 524 |
+
$APPEND_thing
|
| 525 |
+
$REPLACE_vocabulary
|
| 526 |
+
$REPLACE_listening
|
| 527 |
+
$REPLACE_must
|
| 528 |
+
$REPLACE_hours
|
| 529 |
+
$REPLACE_place
|
| 530 |
+
$REPLACE_While
|
| 531 |
+
$REPLACE_without
|
| 532 |
+
$REPLACE_end
|
| 533 |
+
$REPLACE_Korean
|
| 534 |
+
$REPLACE_Therefore
|
| 535 |
+
$REPLACE_working
|
| 536 |
+
$REPLACE_high
|
| 537 |
+
$REPLACE_house
|
| 538 |
+
$REPLACE_already
|
| 539 |
+
$APPEND_good
|
| 540 |
+
$REPLACE_opportunity
|
| 541 |
+
$APPEND_many
|
| 542 |
+
$REPLACE_family
|
| 543 |
+
$REPLACE_During
|
| 544 |
+
$REPLACE_First
|
| 545 |
+
$APPEND_both
|
| 546 |
+
$REPLACE_once
|
| 547 |
+
$REPLACE_experience
|
| 548 |
+
$REPLACE_tomorrow
|
| 549 |
+
$APPEND_these
|
| 550 |
+
$REPLACE_true
|
| 551 |
+
$APPEND_day
|
| 552 |
+
$REPLACE_leave
|
| 553 |
+
$APPEND_When
|
| 554 |
+
$REPLACE_watched
|
| 555 |
+
$APPEND_person
|
| 556 |
+
$REPLACE_best
|
| 557 |
+
$REPLACE_harder
|
| 558 |
+
$REPLACE_Today
|
| 559 |
+
$REPLACE_morning
|
| 560 |
+
$REPLACE_If
|
| 561 |
+
$REPLACE_woke
|
| 562 |
+
$APPEND_into
|
| 563 |
+
$APPEND_made
|
| 564 |
+
$REPLACE_foreigners
|
| 565 |
+
$REPLACE_part
|
| 566 |
+
$APPEND_ever
|
| 567 |
+
$APPEND_probably
|
| 568 |
+
$APPEND_way
|
| 569 |
+
$APPEND_over
|
| 570 |
+
$APPEND_n't
|
| 571 |
+
$REPLACE_towards
|
| 572 |
+
$REPLACE_three
|
| 573 |
+
$REPLACE_One
|
| 574 |
+
$REPLACE_studied
|
| 575 |
+
$REPLACE_nervous
|
| 576 |
+
$REPLACE_forward
|
| 577 |
+
$REPLACE_seen
|
| 578 |
+
$REPLACE_Chinese
|
| 579 |
+
$REPLACE_night
|
| 580 |
+
$APPEND_own
|
| 581 |
+
$REPLACE_taught
|
| 582 |
+
$APPEND_usually
|
| 583 |
+
$REPLACE_To
|
| 584 |
+
$REPLACE_communicate
|
| 585 |
+
$APPEND_Japanese
|
| 586 |
+
$REPLACE_entries
|
| 587 |
+
$REPLACE_traveling
|
| 588 |
+
$REPLACE_site
|
| 589 |
+
$REPLACE_difference
|
| 590 |
+
$APPEND_those
|
| 591 |
+
$TRANSFORM_VERB_VBD_VBZ
|
| 592 |
+
$REPLACE_rainy
|
| 593 |
+
$REPLACE_play
|
| 594 |
+
$REPLACE_comfortable
|
| 595 |
+
$REPLACE_recommend
|
| 596 |
+
$REPLACE_coming
|
| 597 |
+
$REPLACE_Is
|
| 598 |
+
$REPLACE_asleep
|
| 599 |
+
$REPLACE_realized
|
| 600 |
+
$APPEND_recently
|
| 601 |
+
$APPEND_around
|
| 602 |
+
$REPLACE_men
|
| 603 |
+
$REPLACE_Finally
|
| 604 |
+
$REPLACE_excited
|
| 605 |
+
$REPLACE_near
|
| 606 |
+
$APPEND_often
|
| 607 |
+
$REPLACE_t
|
| 608 |
+
$REPLACE_next
|
| 609 |
+
$REPLACE_ever
|
| 610 |
+
$APPEND_Today
|
| 611 |
+
$REPLACE_taking
|
| 612 |
+
$APPEND_started
|
| 613 |
+
$REPLACE_please
|
| 614 |
+
$APPEND_than
|
| 615 |
+
$REPLACE_sentences
|
| 616 |
+
$APPEND_What
|
| 617 |
+
$REPLACE_She
|
| 618 |
+
$APPEND_work
|
| 619 |
+
$REPLACE_visit
|
| 620 |
+
$REPLACE_surprised
|
| 621 |
+
$REPLACE_show
|
| 622 |
+
$REPLACE_You
|
| 623 |
+
$APPEND_used
|
| 624 |
+
$REPLACE_ago
|
| 625 |
+
$APPEND_Even
|
| 626 |
+
$APPEND_That
|
| 627 |
+
$REPLACE_similar
|
| 628 |
+
$APPEND_soon
|
| 629 |
+
$REPLACE_less
|
| 630 |
+
$REPLACE_enjoy
|
| 631 |
+
$REPLACE_diaries
|
| 632 |
+
$REPLACE_speaking
|
| 633 |
+
$REPLACE_past
|
| 634 |
+
$APPEND_through
|
| 635 |
+
$REPLACE_women
|
| 636 |
+
$REPLACE_planned
|
| 637 |
+
$REPLACE_later
|
| 638 |
+
$REPLACE_looked
|
| 639 |
+
$REPLACE_yet
|
| 640 |
+
$APPEND_us
|
| 641 |
+
$REPLACE_And
|
| 642 |
+
$APPEND_'d
|
| 643 |
+
$APPEND_As
|
| 644 |
+
$REPLACE_healthy
|
| 645 |
+
$APPEND_might
|
| 646 |
+
$REPLACE_class
|
| 647 |
+
$REPLACE_Now
|
| 648 |
+
$REPLACE_outside
|
| 649 |
+
$REPLACE_tired
|
| 650 |
+
$APPEND_else
|
| 651 |
+
$REPLACE_Please
|
| 652 |
+
$REPLACE_problems
|
| 653 |
+
$APPEND_They
|
| 654 |
+
$REPLACE_food
|
| 655 |
+
$REPLACE_reading
|
| 656 |
+
$APPEND_&
|
| 657 |
+
$APPEND_think
|
| 658 |
+
$REPLACE_finished
|
| 659 |
+
$REPLACE_popular
|
| 660 |
+
$REPLACE_Are
|
| 661 |
+
$APPEND_2
|
| 662 |
+
$APPEND_may
|
| 663 |
+
$APPEND_found
|
| 664 |
+
$APPEND_whether
|
| 665 |
+
$APPEND_We
|
| 666 |
+
$REPLACE_How
|
| 667 |
+
$REPLACE_continue
|
| 668 |
+
$REPLACE_everyday
|
| 669 |
+
$REPLACE_daily
|
| 670 |
+
$REPLACE_talked
|
| 671 |
+
$APPEND_new
|
| 672 |
+
$REPLACE_reason
|
| 673 |
+
$REPLACE_means
|
| 674 |
+
$REPLACE_opportunities
|
| 675 |
+
$APPEND_different
|
| 676 |
+
$REPLACE_business
|
| 677 |
+
$REPLACE_making
|
| 678 |
+
$APPEND_ago
|
| 679 |
+
$REPLACE_favourite
|
| 680 |
+
$REPLACE_bit
|
| 681 |
+
$REPLACE_delicious
|
| 682 |
+
$APPEND_every
|
| 683 |
+
$REPLACE_spend
|
| 684 |
+
$APPEND_finally
|
| 685 |
+
$APPEND_part
|
| 686 |
+
$REPLACE_yesterday
|
| 687 |
+
$REPLACE_down
|
| 688 |
+
$REPLACE_times
|
| 689 |
+
$REPLACE_holiday
|
| 690 |
+
$REPLACE_nice
|
| 691 |
+
$REPLACE_although
|
| 692 |
+
$REPLACE_earlier
|
| 693 |
+
$REPLACE_Can
|
| 694 |
+
$REPLACE_due
|
| 695 |
+
$APPEND_help
|
| 696 |
+
$REPLACE_caught
|
| 697 |
+
$REPLACE_quite
|
| 698 |
+
$APPEND_kind
|
| 699 |
+
$REPLACE_words
|
| 700 |
+
$REPLACE_movie
|
| 701 |
+
$REPLACE_else
|
| 702 |
+
$APPEND_together
|
| 703 |
+
$REPLACE_advertisement
|
| 704 |
+
$APPEND_Is
|
| 705 |
+
$APPEND_between
|
| 706 |
+
$APPEND_enough
|
| 707 |
+
$REPLACE_let
|
| 708 |
+
$REPLACE_instead
|
| 709 |
+
$REPLACE_disappointed
|
| 710 |
+
$REPLACE_Have
|
| 711 |
+
$APPEND_After
|
| 712 |
+
$APPEND_no
|
| 713 |
+
$APPEND_doing
|
| 714 |
+
$REPLACE_skills
|
| 715 |
+
$APPEND_instead
|
| 716 |
+
$REPLACE_Some
|
| 717 |
+
$REPLACE_Actually
|
| 718 |
+
$APPEND_3
|
| 719 |
+
$REPLACE_choose
|
| 720 |
+
$REPLACE_An
|
| 721 |
+
$APPEND_away
|
| 722 |
+
$REPLACE_Does
|
| 723 |
+
$REPLACE_played
|
| 724 |
+
$APPEND_Because
|
| 725 |
+
$REPLACE_both
|
| 726 |
+
$REPLACE_easier
|
| 727 |
+
$REPLACE_others
|
| 728 |
+
$REPLACE_eat
|
| 729 |
+
$REPLACE_onto
|
| 730 |
+
$REPLACE_sometimes
|
| 731 |
+
$REPLACE_began
|
| 732 |
+
$REPLACE_usual
|
| 733 |
+
$REPLACE_expensive
|
| 734 |
+
$APPEND_To
|
| 735 |
+
$APPEND_actually
|
| 736 |
+
$REPLACE_old
|
| 737 |
+
$APPEND_see
|
| 738 |
+
$APPEND_know
|
| 739 |
+
$REPLACE_few
|
| 740 |
+
$APPEND_why
|
| 741 |
+
$APPEND_sometimes
|
| 742 |
+
$REPLACE_Unfortunately
|
| 743 |
+
$APPEND_use
|
| 744 |
+
$REPLACE_older
|
| 745 |
+
$REPLACE_joined
|
| 746 |
+
$REPLACE_own
|
| 747 |
+
$REPLACE_raining
|
| 748 |
+
$REPLACE_themselves
|
| 749 |
+
$REPLACE_example
|
| 750 |
+
$APPEND_able
|
| 751 |
+
$REPLACE_arrived
|
| 752 |
+
$REPLACE_whom
|
| 753 |
+
$REPLACE_nothing
|
| 754 |
+
$REPLACE_fluently
|
| 755 |
+
$APPEND_getting
|
| 756 |
+
$REPLACE_convenient
|
| 757 |
+
$REPLACE_met
|
| 758 |
+
$REPLACE_becoming
|
| 759 |
+
$APPEND_better
|
| 760 |
+
$APPEND_become
|
| 761 |
+
$REPLACE_lots
|
| 762 |
+
$REPLACE_fast
|
| 763 |
+
$REPLACE_memories
|
| 764 |
+
$REPLACE_worse
|
| 765 |
+
$REPLACE_interested
|
| 766 |
+
$REPLACE_hear
|
| 767 |
+
$REPLACE_Secondly
|
| 768 |
+
$REPLACE_thoughts
|
| 769 |
+
$REPLACE_journal
|
| 770 |
+
$REPLACE_bought
|
| 771 |
+
$REPLACE_useful
|
| 772 |
+
$REPLACE_teach
|
| 773 |
+
$APPEND_learn
|
| 774 |
+
$REPLACE_throughout
|
| 775 |
+
$REPLACE_money
|
| 776 |
+
$REPLACE_change
|
| 777 |
+
$REPLACE_imagine
|
| 778 |
+
$REPLACE_late
|
| 779 |
+
$REPLACE_mine
|
| 780 |
+
$REPLACE_same
|
| 781 |
+
$REPLACE_future
|
| 782 |
+
$REPLACE_sure
|
| 783 |
+
$REPLACE_students
|
| 784 |
+
$REPLACE_along
|
| 785 |
+
$REPLACE_exercise
|
| 786 |
+
$REPLACE_opinion
|
| 787 |
+
$REPLACE_return
|
| 788 |
+
$REPLACE_cause
|
| 789 |
+
$REPLACE_month
|
| 790 |
+
$REPLACE_stop
|
| 791 |
+
$REPLACE_worried
|
| 792 |
+
$REPLACE_trying
|
| 793 |
+
$REPLACE_health
|
| 794 |
+
$REPLACE_American
|
| 795 |
+
$APPEND_writing
|
| 796 |
+
$REPLACE_enjoyed
|
| 797 |
+
$REPLACE_second
|
| 798 |
+
$APPEND_look
|
| 799 |
+
$APPEND_old
|
| 800 |
+
$REPLACE_finally
|
| 801 |
+
$REPLACE_wish
|
| 802 |
+
$REPLACE_famous
|
| 803 |
+
$REPLACE_talking
|
| 804 |
+
$REPLACE_abroad
|
| 805 |
+
$REPLACE_information
|
| 806 |
+
$APPEND_And
|
| 807 |
+
$REPLACE_stopped
|
| 808 |
+
$REPLACE_lose
|
| 809 |
+
$REPLACE_sentence
|
| 810 |
+
$REPLACE_pronunciation
|
| 811 |
+
$REPLACE_feeling
|
| 812 |
+
$REPLACE_younger
|
| 813 |
+
$REPLACE_passed
|
| 814 |
+
$REPLACE_among
|
| 815 |
+
$REPLACE_paid
|
| 816 |
+
$REPLACE_playing
|
| 817 |
+
$REPLACE_attend
|
| 818 |
+
$REPLACE_early
|
| 819 |
+
$REPLACE_All
|
| 820 |
+
$REPLACE_Maybe
|
| 821 |
+
$APPEND_high
|
| 822 |
+
$REPLACE_child
|
| 823 |
+
$APPEND_anything
|
| 824 |
+
$REPLACE_order
|
| 825 |
+
$REPLACE_saying
|
| 826 |
+
$REPLACE_families
|
| 827 |
+
$REPLACE_special
|
| 828 |
+
$REPLACE_spent
|
| 829 |
+
$REPLACE_appreciate
|
| 830 |
+
$REPLACE_successful
|
| 831 |
+
$APPEND_If
|
| 832 |
+
$REPLACE_turned
|
| 833 |
+
$REPLACE_cities
|
| 834 |
+
$REPLACE_definitely
|
| 835 |
+
$REPLACE_fell
|
| 836 |
+
$APPEND_try
|
| 837 |
+
$APPEND_skills
|
| 838 |
+
$REPLACE_world
|
| 839 |
+
$REPLACE_technology
|
| 840 |
+
$REPLACE_small
|
| 841 |
+
$REPLACE_wrote
|
| 842 |
+
$REPLACE_takes
|
| 843 |
+
$REPLACE_seems
|
| 844 |
+
$REPLACE_various
|
| 845 |
+
$REPLACE_actually
|
| 846 |
+
$REPLACE_exam
|
| 847 |
+
$REPLACE_free
|
| 848 |
+
$REPLACE_gone
|
| 849 |
+
$REPLACE_strong
|
| 850 |
+
$REPLACE_receive
|
| 851 |
+
$REPLACE_Yesterday
|
| 852 |
+
$REPLACE_probably
|
| 853 |
+
$REPLACE_Every
|
| 854 |
+
$REPLACE_normal
|
| 855 |
+
$REPLACE_ask
|
| 856 |
+
$REPLACE_company
|
| 857 |
+
$REPLACE_environment
|
| 858 |
+
$REPLACE_buy
|
| 859 |
+
$REPLACE_shows
|
| 860 |
+
$REPLACE_easy
|
| 861 |
+
$REPLACE_sincerely
|
| 862 |
+
$REPLACE_vacation
|
| 863 |
+
$REPLACE_far
|
| 864 |
+
$REPLACE_sauce
|
| 865 |
+
$REPLACE_teacher
|
| 866 |
+
$REPLACE_living
|
| 867 |
+
$REPLACE_parties
|
| 868 |
+
$REPLACE_goes
|
| 869 |
+
$REPLACE_Christmas
|
| 870 |
+
$REPLACE_Hello
|
| 871 |
+
$APPEND_start
|
| 872 |
+
$REPLACE_hurt
|
| 873 |
+
$REPLACE_wonder
|
| 874 |
+
$REPLACE_mind
|
| 875 |
+
$REPLACE_possible
|
| 876 |
+
$REPLACE_thinking
|
| 877 |
+
$APPEND_Now
|
| 878 |
+
$REPLACE_relationship
|
| 879 |
+
$REPLACE_plan
|
| 880 |
+
$REPLACE_man
|
| 881 |
+
$REPLACE_woman
|
| 882 |
+
$REPLACE_activities
|
| 883 |
+
$APPEND_down
|
| 884 |
+
$REPLACE_returned
|
| 885 |
+
$REPLACE_pay
|
| 886 |
+
$REPLACE_ability
|
| 887 |
+
$REPLACE_exciting
|
| 888 |
+
$REPLACE_safe
|
| 889 |
+
$APPEND_off
|
| 890 |
+
$APPEND_until
|
| 891 |
+
$REPLACE_goal
|
| 892 |
+
$APPEND_either
|
| 893 |
+
$REPLACE_mistakes
|
| 894 |
+
$REPLACE_within
|
| 895 |
+
$REPLACE_etc
|
| 896 |
+
$REPLACE_cost
|
| 897 |
+
$REPLACE_particular
|
| 898 |
+
$REPLACE_sense
|
| 899 |
+
$REPLACE_longer
|
| 900 |
+
$REPLACE_advice
|
| 901 |
+
$REPLACE_several
|
| 902 |
+
$APPEND_Do
|
| 903 |
+
$APPEND_currently
|
| 904 |
+
$REPLACE_answer
|
| 905 |
+
$REPLACE_Even
|
| 906 |
+
$REPLACE_held
|
| 907 |
+
$REPLACE_online
|
| 908 |
+
$APPEND_life
|
| 909 |
+
$REPLACE_Firstly
|
| 910 |
+
$REPLACE_close
|
| 911 |
+
$APPEND_study
|
| 912 |
+
$REPLACE_wear
|
| 913 |
+
$APPEND_language
|
| 914 |
+
$REPLACE_number
|
| 915 |
+
$REPLACE_young
|
| 916 |
+
$APPEND_whole
|
| 917 |
+
$APPEND_two
|
| 918 |
+
$APPEND_Then
|
| 919 |
+
$REPLACE_large
|
| 920 |
+
$REPLACE_eating
|
| 921 |
+
$REPLACE_given
|
| 922 |
+
$REPLACE_video
|
| 923 |
+
$REPLACE_spoke
|
| 924 |
+
$REPLACE_Another
|
| 925 |
+
$APPEND_rather
|
| 926 |
+
$APPEND_Of
|
| 927 |
+
$APPEND_once
|
| 928 |
+
$REPLACE_wonderful
|
| 929 |
+
$APPEND_must
|
| 930 |
+
$REPLACE_tonight
|
| 931 |
+
$REPLACE_Their
|
| 932 |
+
$REPLACE_languages
|
| 933 |
+
$REPLACE_big
|
| 934 |
+
$REPLACE_break
|
| 935 |
+
$REPLACE_government
|
| 936 |
+
$REPLACE_staff
|
| 937 |
+
$REPLACE_prepare
|
| 938 |
+
$REPLACE_quit
|
| 939 |
+
$REPLACE_completely
|
| 940 |
+
$REPLACE_ourselves
|
| 941 |
+
$APPEND_He
|
| 942 |
+
$REPLACE_nor
|
| 943 |
+
$APPEND_someone
|
| 944 |
+
$REPLACE_sad
|
| 945 |
+
$REPLACE_against
|
| 946 |
+
$REPLACE_anymore
|
| 947 |
+
$APPEND_give
|
| 948 |
+
$REPLACE_stayed
|
| 949 |
+
$REPLACE_achieve
|
| 950 |
+
$APPEND_An
|
| 951 |
+
$APPEND_Right
|
| 952 |
+
$REPLACE_maybe
|
| 953 |
+
$REPLACE_lost
|
| 954 |
+
$APPEND_came
|
| 955 |
+
$REPLACE_accommodation
|
| 956 |
+
$APPEND_One
|
| 957 |
+
$APPEND_"
|
| 958 |
+
$REPLACE_daughter
|
| 959 |
+
$APPEND_next
|
| 960 |
+
$REPLACE_love
|
| 961 |
+
$REPLACE_cheap
|
| 962 |
+
$REPLACE_poor
|
| 963 |
+
$REPLACE_By
|
| 964 |
+
$REPLACE_whole
|
| 965 |
+
$REPLACE_bring
|
| 966 |
+
$REPLACE_real
|
| 967 |
+
$REPLACE_worked
|
| 968 |
+
$REPLACE_almost
|
| 969 |
+
$REPLACE_prefer
|
| 970 |
+
$APPEND_find
|
| 971 |
+
$REPLACE_everybody
|
| 972 |
+
$APPEND_another
|
| 973 |
+
$REPLACE_create
|
| 974 |
+
$REPLACE_addition
|
| 975 |
+
$REPLACE_turn
|
| 976 |
+
$REPLACE_situation
|
| 977 |
+
$APPEND_due
|
| 978 |
+
$REPLACE_boyfriend
|
| 979 |
+
$APPEND_home
|
| 980 |
+
$REPLACE_finish
|
| 981 |
+
$REPLACE_rather
|
| 982 |
+
$APPEND_said
|
| 983 |
+
$APPEND_'re
|
| 984 |
+
$REPLACE_careful
|
| 985 |
+
$APPEND_long
|
| 986 |
+
$REPLACE_recommended
|
| 987 |
+
$REPLACE_customers
|
| 988 |
+
$REPLACE_level
|
| 989 |
+
$REPLACE_died
|
| 990 |
+
$REPLACE_comes
|
| 991 |
+
$APPEND_You
|
| 992 |
+
$REPLACE_glad
|
| 993 |
+
$APPEND_come
|
| 994 |
+
$REPLACE_short
|
| 995 |
+
$REPLACE_knowledge
|
| 996 |
+
$REPLACE_set
|
| 997 |
+
$REPLACE_Lang
|
| 998 |
+
$REPLACE_planning
|
| 999 |
+
$REPLACE_confidence
|
| 1000 |
+
$REPLACE_gain
|
| 1001 |
+
$REPLACE_check
|
| 1002 |
+
$REPLACE_immediately
|
| 1003 |
+
$REPLACE_scared
|
| 1004 |
+
$REPLACE_conversation
|
| 1005 |
+
$REPLACE_native
|
| 1006 |
+
$REPLACE_His
|
| 1007 |
+
$REPLACE_full
|
| 1008 |
+
$REPLACE_express
|
| 1009 |
+
$REPLACE_married
|
| 1010 |
+
$REPLACE_shopping
|
| 1011 |
+
$APPEND_working
|
| 1012 |
+
$APPEND_food
|
| 1013 |
+
$REPLACE_research
|
| 1014 |
+
$REPLACE_whenever
|
| 1015 |
+
$REPLACE_corrections
|
| 1016 |
+
$REPLACE_weird
|
| 1017 |
+
$APPEND_quite
|
| 1018 |
+
$REPLACE_necessary
|
| 1019 |
+
$REPLACE_Korea
|
| 1020 |
+
$REPLACE_whose
|
| 1021 |
+
$REPLACE_higher
|
| 1022 |
+
$APPEND_entries
|
| 1023 |
+
$REPLACE_Starting
|
| 1024 |
+
$REPLACE_attended
|
| 1025 |
+
$APPEND_5
|
| 1026 |
+
$APPEND_past
|
| 1027 |
+
$REPLACE_realize
|
| 1028 |
+
$REPLACE_cold
|
| 1029 |
+
$APPEND_playing
|
| 1030 |
+
$REPLACE_ended
|
| 1031 |
+
$REPLACE_inside
|
| 1032 |
+
$APPEND_please
|
| 1033 |
+
$REPLACE_coffee
|
| 1034 |
+
$REPLACE_enjoyable
|
| 1035 |
+
$APPEND_took
|
| 1036 |
+
$REPLACE_economic
|
| 1037 |
+
$APPEND_member
|
| 1038 |
+
$REPLACE_natural
|
| 1039 |
+
$REPLACE_registered
|
| 1040 |
+
$REPLACE_idea
|
| 1041 |
+
$APPEND_Recently
|
| 1042 |
+
$APPEND_<
|
| 1043 |
+
$REPLACE_caused
|
| 1044 |
+
$REPLACE_student
|
| 1045 |
+
$REPLACE_questions
|
| 1046 |
+
$REPLACE_music
|
| 1047 |
+
$REPLACE_story
|
| 1048 |
+
$REPLACE_happiness
|
| 1049 |
+
$REPLACE_gives
|
| 1050 |
+
$APPEND_She
|
| 1051 |
+
$REPLACE_Especially
|
| 1052 |
+
$REPLACE_energy
|
| 1053 |
+
$REPLACE_available
|
| 1054 |
+
$REPLACE_anywhere
|
| 1055 |
+
$REPLACE_taken
|
| 1056 |
+
$REPLACE_four
|
| 1057 |
+
$REPLACE_sleep
|
| 1058 |
+
$REPLACE_afraid
|
| 1059 |
+
$REPLACE_Everyone
|
| 1060 |
+
$APPEND_learning
|
| 1061 |
+
$REPLACE_ate
|
| 1062 |
+
$APPEND_few
|
| 1063 |
+
$APPEND_Since
|
| 1064 |
+
$REPLACE_helps
|
| 1065 |
+
$REPLACE_vegetables
|
| 1066 |
+
$REPLACE_kept
|
| 1067 |
+
$REPLACE_gets
|
| 1068 |
+
$REPLACE_explain
|
| 1069 |
+
$REPLACE_girlfriend
|
| 1070 |
+
$REPLACE_choice
|
| 1071 |
+
$REPLACE_waiting
|
| 1072 |
+
$APPEND_put
|
| 1073 |
+
$APPEND_yesterday
|
| 1074 |
+
$APPEND_During
|
| 1075 |
+
$REPLACE_From
|
| 1076 |
+
$APPEND_starting
|
| 1077 |
+
$REPLACE_scary
|
| 1078 |
+
$REPLACE_program
|
| 1079 |
+
$REPLACE_fish
|
| 1080 |
+
$REPLACE_hand
|
| 1081 |
+
$REPLACE_enter
|
| 1082 |
+
$APPEND_friends
|
| 1083 |
+
$REPLACE_decide
|
| 1084 |
+
$REPLACE_score
|
| 1085 |
+
$REPLACE_lonely
|
| 1086 |
+
$APPEND_easily
|
| 1087 |
+
$REPLACE_discovered
|
| 1088 |
+
$REPLACE_seeing
|
| 1089 |
+
$REPLACE_message
|
| 1090 |
+
$REPLACE_week
|
| 1091 |
+
$APPEND_studying
|
| 1092 |
+
$REPLACE_universities
|
| 1093 |
+
$REPLACE_introduce
|
| 1094 |
+
$REPLACE_common
|
| 1095 |
+
$REPLACE_heavily
|
| 1096 |
+
$REPLACE_People
|
| 1097 |
+
$REPLACE_care
|
| 1098 |
+
$APPEND_hard
|
| 1099 |
+
$REPLACE_hit
|
| 1100 |
+
$REPLACE_America
|
| 1101 |
+
$REPLACE_point
|
| 1102 |
+
$APPEND_need
|
| 1103 |
+
$REPLACE_funny
|
| 1104 |
+
$APPEND_almost
|
| 1105 |
+
$REPLACE_pass
|
| 1106 |
+
$REPLACE_temperature
|
| 1107 |
+
$REPLACE_performance
|
| 1108 |
+
$REPLACE_call
|
| 1109 |
+
$REPLACE_extremely
|
| 1110 |
+
$REPLACE_chance
|
| 1111 |
+
$REPLACE_main
|
| 1112 |
+
$REPLACE_season
|
| 1113 |
+
$REPLACE_series
|
| 1114 |
+
$REPLACE_nearby
|
| 1115 |
+
$REPLACE_license
|
| 1116 |
+
$REPLACE_expected
|
| 1117 |
+
$REPLACE_Last
|
| 1118 |
+
$REPLACE_picture
|
| 1119 |
+
$REPLACE_movies
|
| 1120 |
+
$APPEND_Also
|
| 1121 |
+
$REPLACE_seriously
|
| 1122 |
+
$REPLACE_via
|
| 1123 |
+
$REPLACE_running
|
| 1124 |
+
$REPLACE_run
|
| 1125 |
+
$REPLACE_regarding
|
| 1126 |
+
$REPLACE_chose
|
| 1127 |
+
$REPLACE_moment
|
| 1128 |
+
$APPEND_feeling
|
| 1129 |
+
$APPEND_bit
|
| 1130 |
+
$REPLACE_occurred
|
| 1131 |
+
$REPLACE_travelling
|
| 1132 |
+
$REPLACE_brought
|
| 1133 |
+
$APPEND_makes
|
| 1134 |
+
$REPLACE_amount
|
| 1135 |
+
$REPLACE_speakers
|
| 1136 |
+
$REPLACE_scenery
|
| 1137 |
+
$APPEND_year
|
| 1138 |
+
$APPEND_quickly
|
| 1139 |
+
$REPLACE_grateful
|
| 1140 |
+
$REPLACE_character
|
| 1141 |
+
$REPLACE_sleepy
|
| 1142 |
+
$REPLACE_bed
|
| 1143 |
+
$REPLACE_increase
|
| 1144 |
+
$APPEND_Good
|
| 1145 |
+
$REPLACE_area
|
| 1146 |
+
$REPLACE_certain
|
| 1147 |
+
$REPLACE_ways
|
| 1148 |
+
$REPLACE_looks
|
| 1149 |
+
$REPLACE_Nowadays
|
| 1150 |
+
$REPLACE_lucky
|
| 1151 |
+
$REPLACE_current
|
| 1152 |
+
$REPLACE_traditional
|
| 1153 |
+
$APPEND_write
|
| 1154 |
+
$APPEND_anymore
|
| 1155 |
+
$REPLACE_noticed
|
| 1156 |
+
$REPLACE_Did
|
| 1157 |
+
$REPLACE_matter
|
| 1158 |
+
$REPLACE_worry
|
| 1159 |
+
$REPLACE_angry
|
| 1160 |
+
$REPLACE_With
|
| 1161 |
+
$REPLACE_biggest
|
| 1162 |
+
$REPLACE_alcohol
|
| 1163 |
+
$APPEND_left
|
| 1164 |
+
$REPLACE_move
|
| 1165 |
+
$REPLACE_succeed
|
| 1166 |
+
$REPLACE_post
|
| 1167 |
+
$REPLACE_]
|
| 1168 |
+
$REPLACE_abilities
|
| 1169 |
+
$REPLACE_earthquake
|
| 1170 |
+
$REPLACE_visited
|
| 1171 |
+
$APPEND_]
|
| 1172 |
+
$REPLACE_speech
|
| 1173 |
+
$REPLACE_Thank
|
| 1174 |
+
$REPLACE_fewer
|
| 1175 |
+
$REPLACE_happen
|
| 1176 |
+
$APPEND_tomorrow
|
| 1177 |
+
$REPLACE_dinner
|
| 1178 |
+
$REPLACE_quiet
|
| 1179 |
+
$APPEND_type
|
| 1180 |
+
$REPLACE_previous
|
| 1181 |
+
$REPLACE_Furthermore
|
| 1182 |
+
$REPLACE_colleagues
|
| 1183 |
+
$REPLACE_present
|
| 1184 |
+
$REPLACE_No
|
| 1185 |
+
$REPLACE_chicken
|
| 1186 |
+
$REPLACE_city
|
| 1187 |
+
$REPLACE_weeks
|
| 1188 |
+
$REPLACE_develop
|
| 1189 |
+
$REPLACE_join
|
| 1190 |
+
$APPEND_Last
|
| 1191 |
+
$REPLACE_except
|
| 1192 |
+
$REPLACE_economy
|
| 1193 |
+
$REPLACE_sang
|
| 1194 |
+
$REPLACE_phrase
|
| 1195 |
+
$REPLACE_provide
|
| 1196 |
+
$REPLACE_lately
|
| 1197 |
+
$REPLACE_experienced
|
| 1198 |
+
$REPLACE_won
|
| 1199 |
+
$REPLACE_Though
|
| 1200 |
+
$APPEND_Therefore
|
| 1201 |
+
$APPEND_piece
|
| 1202 |
+
$REPLACE_including
|
| 1203 |
+
$REPLACE_husband
|
| 1204 |
+
$REPLACE_changed
|
| 1205 |
+
$REPLACE_view
|
| 1206 |
+
$REPLACE_becomes
|
| 1207 |
+
$REPLACE_share
|
| 1208 |
+
$APPEND_place
|
| 1209 |
+
$REPLACE_test
|
| 1210 |
+
$APPEND_4
|
| 1211 |
+
$APPEND_years
|
| 1212 |
+
$REPLACE_Our
|
| 1213 |
+
$REPLACE_wrong
|
| 1214 |
+
$REPLACE_seemed
|
| 1215 |
+
$REPLACE_wondering
|
| 1216 |
+
$REPLACE_computer
|
| 1217 |
+
$REPLACE_known
|
| 1218 |
+
$REPLACE_culture
|
| 1219 |
+
$REPLACE_Hong
|
| 1220 |
+
$REPLACE_clear
|
| 1221 |
+
$REPLACE_birthday
|
| 1222 |
+
$REPLACE_despite
|
| 1223 |
+
$REPLACE_front
|
| 1224 |
+
$REPLACE_sound
|
| 1225 |
+
$REPLACE_thankful
|
| 1226 |
+
$REPLACE_practise
|
| 1227 |
+
$REPLACE_Will
|
| 1228 |
+
$REPLACE_atmosphere
|
| 1229 |
+
$REPLACE_activity
|
| 1230 |
+
$APPEND_movie
|
| 1231 |
+
$REPLACE_China
|
| 1232 |
+
$REPLACE_reasons
|
| 1233 |
+
$REPLACE_name
|
| 1234 |
+
$REPLACE_serious
|
| 1235 |
+
$REPLACE_2
|
| 1236 |
+
$REPLACE_warm
|
| 1237 |
+
$REPLACE_depressed
|
| 1238 |
+
$REPLACE_simple
|
| 1239 |
+
$APPEND_trying
|
| 1240 |
+
$REPLACE_alone
|
| 1241 |
+
$APPEND_`
|
| 1242 |
+
$REPLACE_listen
|
| 1243 |
+
$REPLACE__
|
| 1244 |
+
$REPLACE_faithfully
|
| 1245 |
+
$REPLACE_Which
|
| 1246 |
+
$REPLACE_relieved
|
| 1247 |
+
$APPEND_1
|
| 1248 |
+
$REPLACE_price
|
| 1249 |
+
$REPLACE_store
|
| 1250 |
+
$REPLACE_lower
|
| 1251 |
+
$REPLACE_strange
|
| 1252 |
+
$REPLACE_game
|
| 1253 |
+
$REPLACE_sick
|
| 1254 |
+
$REPLACE_focus
|
| 1255 |
+
$REPLACE_suddenly
|
| 1256 |
+
$APPEND_Please
|
| 1257 |
+
$REPLACE_Would
|
| 1258 |
+
$REPLACE_traveled
|
| 1259 |
+
$REPLACE_event
|
| 1260 |
+
$REPLACE_ones
|
| 1261 |
+
$APPEND_Yesterday
|
| 1262 |
+
$APPEND_making
|
| 1263 |
+
$REPLACE_remembered
|
| 1264 |
+
$REPLACE_s
|
| 1265 |
+
$REPLACE_Lately
|
| 1266 |
+
$APPEND_S
|
| 1267 |
+
$REPLACE_member
|
| 1268 |
+
$APPEND_decided
|
| 1269 |
+
$REPLACE_across
|
| 1270 |
+
$REPLACE_entered
|
| 1271 |
+
$APPEND_maybe
|
| 1272 |
+
$REPLACE_University
|
| 1273 |
+
$REPLACE_difficulties
|
| 1274 |
+
$REPLACE_terrible
|
| 1275 |
+
$REPLACE_places
|
| 1276 |
+
$REPLACE_pretty
|
| 1277 |
+
$REPLACE_weekend
|
| 1278 |
+
$REPLACE_decision
|
| 1279 |
+
$APPEND_later
|
| 1280 |
+
$REPLACE_anybody
|
| 1281 |
+
$REPLACE_result
|
| 1282 |
+
$REPLACE_buses
|
| 1283 |
+
$REPLACE_Fortunately
|
| 1284 |
+
$APPEND_suddenly
|
| 1285 |
+
$REPLACE_slept
|
| 1286 |
+
$APPEND_school
|
| 1287 |
+
$REPLACE_group
|
| 1288 |
+
$REPLACE_electricity
|
| 1289 |
+
$REPLACE_fan
|
| 1290 |
+
$REPLACE_supposed
|
| 1291 |
+
$REPLACE_recent
|
| 1292 |
+
$REPLACE_wants
|
| 1293 |
+
$APPEND_10
|
| 1294 |
+
$REPLACE_low
|
| 1295 |
+
$APPEND_continue
|
| 1296 |
+
$APPEND_keep
|
| 1297 |
+
$APPEND_words
|
| 1298 |
+
$APPEND_Sometimes
|
| 1299 |
+
$REPLACE_type
|
| 1300 |
+
$REPLACE_Tomorrow
|
| 1301 |
+
$REPLACE_okay
|
| 1302 |
+
$APPEND_class
|
| 1303 |
+
$REPLACE_Her
|
| 1304 |
+
$APPEND_everything
|
| 1305 |
+
$APPEND_university
|
| 1306 |
+
$REPLACE_behind
|
| 1307 |
+
$REPLACE_clean
|
| 1308 |
+
$REPLACE_anxious
|
| 1309 |
+
$REPLACE_follow
|
| 1310 |
+
$APPEND_amount
|
| 1311 |
+
$REPLACE_parents
|
| 1312 |
+
$APPEND_While
|
| 1313 |
+
$REPLACE_email
|
| 1314 |
+
$REPLACE_mean
|
| 1315 |
+
$REPLACE_Most
|
| 1316 |
+
$APPEND_watching
|
| 1317 |
+
$REPLACE_taste
|
| 1318 |
+
$APPEND_taking
|
| 1319 |
+
$REPLACE_Sometimes
|
| 1320 |
+
$REPLACE_French
|
| 1321 |
+
$REPLACE_wearing
|
| 1322 |
+
$APPEND_weather
|
| 1323 |
+
$REPLACE_law
|
| 1324 |
+
$REPLACE_difficulty
|
| 1325 |
+
$APPEND_job
|
| 1326 |
+
$REPLACE_training
|
| 1327 |
+
$REPLACE_crowded
|
| 1328 |
+
$APPEND_All
|
| 1329 |
+
$REPLACE_gotten
|
| 1330 |
+
$REPLACE_catch
|
| 1331 |
+
$REPLACE_method
|
| 1332 |
+
$REPLACE_public
|
| 1333 |
+
$REPLACE_classes
|
| 1334 |
+
$REPLACE_seem
|
| 1335 |
+
$APPEND_show
|
| 1336 |
+
$REPLACE_question
|
| 1337 |
+
$REPLACE_development
|
| 1338 |
+
$REPLACE_says
|
| 1339 |
+
$REPLACE_faster
|
| 1340 |
+
$REPLACE_mother
|
| 1341 |
+
$REPLACE_guitar
|
| 1342 |
+
$REPLACE_teeth
|
| 1343 |
+
$REPLACE_song
|
| 1344 |
+
$REPLACE_lesson
|
| 1345 |
+
$REPLACE_knew
|
| 1346 |
+
$REPLACE_sent
|
| 1347 |
+
$REPLACE_unable
|
| 1348 |
+
$REPLACE_alot
|
| 1349 |
+
$REPLACE_Those
|
| 1350 |
+
$REPLACE_concert
|
| 1351 |
+
$APPEND_speak
|
| 1352 |
+
$REPLACE_software
|
| 1353 |
+
$REPLACE_German
|
| 1354 |
+
$REPLACE_Currently
|
| 1355 |
+
$REPLACE_yourself
|
| 1356 |
+
$REPLACE_fact
|
| 1357 |
+
$REPLACE_major
|
| 1358 |
+
$REPLACE_snowboarding
|
| 1359 |
+
$REPLACE_apartment
|
| 1360 |
+
$REPLACE_none
|
| 1361 |
+
$REPLACE_Here
|
| 1362 |
+
$REPLACE_reply
|
| 1363 |
+
$REPLACE_lived
|
| 1364 |
+
$APPEND_site
|
| 1365 |
+
$REPLACE_introduction
|
| 1366 |
+
$REPLACE_exchange
|
| 1367 |
+
$APPEND_level
|
| 1368 |
+
$REPLACE_iPhone
|
| 1369 |
+
$REPLACE_consider
|
| 1370 |
+
$REPLACE_leaves
|
| 1371 |
+
$APPEND_early
|
| 1372 |
+
$REPLACE_requires
|
| 1373 |
+
$REPLACE_Saturday
|
| 1374 |
+
$TRANSFORM_CASE_CAPITAL_1
|
| 1375 |
+
$REPLACE_further
|
| 1376 |
+
$REPLACE_absolutely
|
| 1377 |
+
$REPLACE_realised
|
| 1378 |
+
$APPEND_heard
|
| 1379 |
+
$REPLACE_following
|
| 1380 |
+
$REPLACE_doctor
|
| 1381 |
+
$REPLACE_beginner
|
| 1382 |
+
$APPEND_against
|
| 1383 |
+
$REPLACE_embarrassed
|
| 1384 |
+
$REPLACE_correctly
|
| 1385 |
+
$REPLACE_half
|
| 1386 |
+
$REPLACE_dangerous
|
| 1387 |
+
$REPLACE_moved
|
| 1388 |
+
$REPLACE_complete
|
| 1389 |
+
$REPLACE_perfect
|
| 1390 |
+
$REPLACE_Anyway
|
| 1391 |
+
$REPLACE_hold
|
| 1392 |
+
$REPLACE_differences
|
| 1393 |
+
$REPLACE_lunch
|
| 1394 |
+
$REPLACE_himself
|
| 1395 |
+
$REPLACE_based
|
| 1396 |
+
$APPEND_thought
|
| 1397 |
+
$REPLACE_reach
|
| 1398 |
+
$REPLACE_cheaper
|
| 1399 |
+
$REPLACE_loud
|
| 1400 |
+
$APPEND_By
|
| 1401 |
+
$APPEND_everyone
|
| 1402 |
+
$REPLACE_leaving
|
| 1403 |
+
$REPLACE_released
|
| 1404 |
+
$REPLACE_fine
|
| 1405 |
+
$REPLACE_Australia
|
| 1406 |
+
$REPLACE_style
|
| 1407 |
+
$REPLACE_deal
|
| 1408 |
+
$APPEND_along
|
| 1409 |
+
$REPLACE_satisfied
|
| 1410 |
+
$REPLACE_Of
|
| 1411 |
+
$REPLACE_variety
|
| 1412 |
+
$APPEND_improve
|
| 1413 |
+
$REPLACE_under
|
| 1414 |
+
$REPLACE_giving
|
| 1415 |
+
$REPLACE_party
|
| 1416 |
+
$APPEND_understand
|
| 1417 |
+
$REPLACE_everywhere
|
| 1418 |
+
$REPLACE_confident
|
| 1419 |
+
$APPEND_play
|
| 1420 |
+
$REPLACE_slow
|
| 1421 |
+
$REPLACE_centre
|
| 1422 |
+
$REPLACE_light
|
| 1423 |
+
$REPLACE_trouble
|
| 1424 |
+
$REPLACE_Its
|
| 1425 |
+
$APPEND_became
|
| 1426 |
+
$REPLACE_begin
|
| 1427 |
+
$REPLACE_grade
|
| 1428 |
+
$REPLACE_exams
|
| 1429 |
+
$REPLACE_busy
|
| 1430 |
+
$REPLACE_nbsp
|
| 1431 |
+
$REPLACE_3
|
| 1432 |
+
$REPLACE_control
|
| 1433 |
+
$REPLACE_characters
|
| 1434 |
+
$REPLACE_needs
|
| 1435 |
+
$REPLACE_pictures
|
| 1436 |
+
$APPEND_New
|
| 1437 |
+
$APPEND_test
|
| 1438 |
+
$REPLACE_currently
|
| 1439 |
+
$REPLACE_describe
|
| 1440 |
+
$REPLACE_uncomfortable
|
| 1441 |
+
$REPLACE_affected
|
| 1442 |
+
$REPLACE_songs
|
| 1443 |
+
$REPLACE_helped
|
| 1444 |
+
$REPLACE_head
|
| 1445 |
+
$APPEND_let
|
| 1446 |
+
$REPLACE_costs
|
| 1447 |
+
$REPLACE_five
|
| 1448 |
+
$REPLACE_slowly
|
| 1449 |
+
$REPLACE_1
|
| 1450 |
+
$REPLACE_causes
|
| 1451 |
+
$REPLACE_ashamed
|
| 1452 |
+
$APPEND_coming
|
| 1453 |
+
$APPEND_everyday
|
| 1454 |
+
$REPLACE_products
|
| 1455 |
+
$REPLACE_dishes
|
| 1456 |
+
$REPLACE_least
|
| 1457 |
+
$REPLACE_wore
|
| 1458 |
+
$REPLACE_internet
|
| 1459 |
+
$REPLACE_mentioned
|
| 1460 |
+
$APPEND_began
|
| 1461 |
+
$REPLACE_word
|
| 1462 |
+
$REPLACE_service
|
| 1463 |
+
$REPLACE_workers
|
| 1464 |
+
$REPLACE_continued
|
| 1465 |
+
$REPLACE_sounds
|
| 1466 |
+
$REPLACE_hour
|
| 1467 |
+
$REPLACE_jobs
|
| 1468 |
+
$REPLACE_career
|
| 1469 |
+
$REPLACE_personal
|
| 1470 |
+
$REPLACE_piece
|
| 1471 |
+
$REPLACE_per
|
| 1472 |
+
$REPLACE_Regarding
|
| 1473 |
+
$REPLACE_entrance
|
| 1474 |
+
$REPLACE_improving
|
| 1475 |
+
$APPEND_=
|
| 1476 |
+
$REPLACE_areas
|
| 1477 |
+
$REPLACE_1st
|
| 1478 |
+
$REPLACE_mostly
|
| 1479 |
+
$REPLACE_lessons
|
| 1480 |
+
$REPLACE_drink
|
| 1481 |
+
$REPLACE_hair
|
| 1482 |
+
$APPEND_exactly
|
| 1483 |
+
$REPLACE_e
|
| 1484 |
+
$REPLACE_luck
|
| 1485 |
+
$REPLACE_members
|
| 1486 |
+
$APPEND_means
|
| 1487 |
+
$REPLACE_mistake
|
| 1488 |
+
$REPLACE_somewhere
|
| 1489 |
+
$APPEND_pair
|
| 1490 |
+
$REPLACE_tomatoes
|
| 1491 |
+
$APPEND_definitely
|
| 1492 |
+
$REPLACE_swimming
|
| 1493 |
+
$REPLACE_perform
|
| 1494 |
+
$REPLACE_compared
|
| 1495 |
+
$REPLACE_unfortunately
|
| 1496 |
+
$REPLACE_however
|
| 1497 |
+
$REPLACE_twice
|
| 1498 |
+
$REPLACE_society
|
| 1499 |
+
$APPEND_20
|
| 1500 |
+
$REPLACE_preparing
|
| 1501 |
+
$REPLACE_Two
|
| 1502 |
+
$APPEND_Japan
|
| 1503 |
+
$REPLACE_nobody
|
| 1504 |
+
$REPLACE_environmental
|
| 1505 |
+
$REPLACE_till
|
| 1506 |
+
$REPLACE_fall
|
| 1507 |
+
$REPLACE_spoken
|
| 1508 |
+
$REPLACE_forget
|
| 1509 |
+
$REPLACE_form
|
| 1510 |
+
$APPEND_number
|
| 1511 |
+
$APPEND_watch
|
| 1512 |
+
$APPEND_live
|
| 1513 |
+
$REPLACE_include
|
| 1514 |
+
$REPLACE_related
|
| 1515 |
+
$REPLACE_wait
|
| 1516 |
+
$APPEND_These
|
| 1517 |
+
$REPLACE_European
|
| 1518 |
+
$APPEND_tell
|
| 1519 |
+
$REPLACE_meeting
|
| 1520 |
+
$REPLACE_evening
|
| 1521 |
+
$REPLACE_nowadays
|
| 1522 |
+
$REPLACE_northern
|
| 1523 |
+
$REPLACE_convenience
|
| 1524 |
+
$REPLACE_performed
|
| 1525 |
+
$REPLACE_plans
|
| 1526 |
+
$REPLACE_competition
|
| 1527 |
+
$REPLACE_open
|
| 1528 |
+
$REPLACE_confused
|
| 1529 |
+
$REPLACE_practicing
|
| 1530 |
+
$REPLACE_quality
|
| 1531 |
+
$REPLACE_professional
|
| 1532 |
+
$REPLACE_maintain
|
| 1533 |
+
$REPLACE_pain
|
| 1534 |
+
$REPLACE_familiar
|
| 1535 |
+
$REPLACE_classical
|
| 1536 |
+
$REPLACE_shop
|
| 1537 |
+
$REPLACE_filled
|
| 1538 |
+
$REPLACE_improved
|
| 1539 |
+
$REPLACE_meant
|
| 1540 |
+
$APPEND_listening
|
| 1541 |
+
$REPLACE_ceremony
|
| 1542 |
+
$REPLACE_increasing
|
| 1543 |
+
$REPLACE_drove
|
| 1544 |
+
$APPEND_completely
|
| 1545 |
+
$REPLACE_account
|
| 1546 |
+
$REPLACE_developed
|
| 1547 |
+
$REPLACE_lack
|
| 1548 |
+
$REPLACE_purpose
|
| 1549 |
+
$REPLACE_upon
|
| 1550 |
+
$REPLACE_tasted
|
| 1551 |
+
$REPLACE_crazy
|
| 1552 |
+
$REPLACE_summer
|
| 1553 |
+
$REPLACE_regret
|
| 1554 |
+
$REPLACE_born
|
| 1555 |
+
$REPLACE_rain
|
| 1556 |
+
$REPLACE_weight
|
| 1557 |
+
$REPLACE_required
|
| 1558 |
+
$REPLACE_accept
|
| 1559 |
+
$REPLACE_cut
|
| 1560 |
+
$REPLACE_flew
|
| 1561 |
+
$REPLACE_waste
|
| 1562 |
+
$APPEND_ca
|
| 1563 |
+
$APPEND_trip
|
| 1564 |
+
$REPLACE_Going
|
| 1565 |
+
$REPLACE_excellent
|
| 1566 |
+
$REPLACE_created
|
| 1567 |
+
$REPLACE_reality
|
| 1568 |
+
$REPLACE_cultural
|
| 1569 |
+
$REPLACE_save
|
| 1570 |
+
$REPLACE_programs
|
| 1571 |
+
$REPLACE_painful
|
| 1572 |
+
$REPLACE_Many
|
| 1573 |
+
$REPLACE_dish
|
| 1574 |
+
$REPLACE_teaching
|
| 1575 |
+
$REPLACE_Studying
|
| 1576 |
+
$REPLACE_water
|
| 1577 |
+
$REPLACE_happens
|
| 1578 |
+
$REPLACE_pleased
|
| 1579 |
+
$REPLACE_ordinary
|
| 1580 |
+
$APPEND_practice
|
| 1581 |
+
$REPLACE_train
|
| 1582 |
+
$REPLACE_results
|
| 1583 |
+
$REPLACE_Italian
|
| 1584 |
+
$REPLACE_weak
|
| 1585 |
+
$REPLACE_period
|
| 1586 |
+
$REPLACE_above
|
| 1587 |
+
$REPLACE_hot
|
| 1588 |
+
$REPLACE_Not
|
| 1589 |
+
$REPLACE_feelings
|
| 1590 |
+
$REPLACE_mobile
|
| 1591 |
+
$REPLACE_walk
|
| 1592 |
+
$APPEND_game
|
| 1593 |
+
$REPLACE_impressed
|
| 1594 |
+
$APPEND_same
|
| 1595 |
+
$REPLACE_Germany
|
| 1596 |
+
$REPLACE_girl
|
| 1597 |
+
$REPLACE_closer
|
| 1598 |
+
$REPLACE_communication
|
| 1599 |
+
$REPLACE_worst
|
| 1600 |
+
$APPEND_No
|
| 1601 |
+
$REPLACE_located
|
| 1602 |
+
$REPLACE_phone
|
| 1603 |
+
$REPLACE_sit
|
| 1604 |
+
$REPLACE_Lastly
|
| 1605 |
+
$REPLACE_feels
|
| 1606 |
+
$APPEND_listen
|
| 1607 |
+
$APPEND_done
|
| 1608 |
+
$REPLACE_subtitles
|
| 1609 |
+
$REPLACE_Whenever
|
| 1610 |
+
$REPLACE_potatoes
|
| 1611 |
+
$REPLACE_fluent
|
| 1612 |
+
$REPLACE_amazing
|
| 1613 |
+
$REPLACE_neither
|
| 1614 |
+
$APPEND_With
|
| 1615 |
+
$APPEND_never
|
| 1616 |
+
$REPLACE_stressed
|
| 1617 |
+
$REPLACE_prevent
|
| 1618 |
+
$REPLACE_photos
|
| 1619 |
+
$APPEND_$
|
| 1620 |
+
$REPLACE_non
|
| 1621 |
+
$REPLACE_agree
|
| 1622 |
+
$REPLACE_Moreover
|
| 1623 |
+
$REPLACE_restaurants
|
| 1624 |
+
$REPLACE_types
|
| 1625 |
+
$REPLACE_office
|
| 1626 |
+
$REPLACE_studies
|
| 1627 |
+
$REPLACE_history
|
| 1628 |
+
$REPLACE_calm
|
| 1629 |
+
$REPLACE_walked
|
| 1630 |
+
$REPLACE_modern
|
| 1631 |
+
$APPEND_three
|
| 1632 |
+
$REPLACE_clothing
|
| 1633 |
+
$REPLACE_private
|
| 1634 |
+
$APPEND_little
|
| 1635 |
+
$APPEND_outside
|
| 1636 |
+
$APPEND_OR
|
| 1637 |
+
$REPLACE_simply
|
| 1638 |
+
$REPLACE_particularly
|
| 1639 |
+
$REPLACE_notice
|
| 1640 |
+
$REPLACE_side
|
| 1641 |
+
$APPEND_looked
|
| 1642 |
+
$REPLACE_YouTube
|
| 1643 |
+
$APPEND_students
|
| 1644 |
+
$REPLACE_afterwards
|
| 1645 |
+
$APPEND_reading
|
| 1646 |
+
$REPLACE_graduate
|
| 1647 |
+
$REPLACE_library
|
| 1648 |
+
$REPLACE_gained
|
| 1649 |
+
$REPLACE_bicycle
|
| 1650 |
+
$REPLACE_son
|
| 1651 |
+
$APPEND_compared
|
| 1652 |
+
$REPLACE_events
|
| 1653 |
+
$APPEND_Although
|
| 1654 |
+
$REPLACE_US
|
| 1655 |
+
$REPLACE_properly
|
| 1656 |
+
$APPEND_Maybe
|
| 1657 |
+
$APPEND_Can
|
| 1658 |
+
$APPEND_best
|
| 1659 |
+
$REPLACE_wondered
|
| 1660 |
+
$REPLACE_arrive
|
| 1661 |
+
$APPEND_say
|
| 1662 |
+
$REPLACE_considered
|
| 1663 |
+
$REPLACE_dream
|
| 1664 |
+
$REPLACE_feet
|
| 1665 |
+
$REPLACE_broke
|
| 1666 |
+
$APPEND_From
|
| 1667 |
+
$REPLACE_southern
|
| 1668 |
+
$REPLACE_hometown
|
| 1669 |
+
$APPEND_journal
|
| 1670 |
+
$REPLACE_Everything
|
| 1671 |
+
$APPEND_money
|
| 1672 |
+
$REPLACE_concentrate
|
| 1673 |
+
$REPLACE_stories
|
| 1674 |
+
$REPLACE_teachers
|
| 1675 |
+
$APPEND_happened
|
| 1676 |
+
$REPLACE_New
|
| 1677 |
+
$REPLACE_transport
|
| 1678 |
+
$REPLACE_stronger
|
| 1679 |
+
$REPLACE_heart
|
| 1680 |
+
$REPLACE_staying
|
| 1681 |
+
$REPLACE_honest
|
| 1682 |
+
$REPLACE_sold
|
| 1683 |
+
$APPEND_wrong
|
| 1684 |
+
$APPEND_Or
|
| 1685 |
+
$REPLACE_relax
|
| 1686 |
+
$REPLACE_heavy
|
| 1687 |
+
$REPLACE_*
|
| 1688 |
+
$REPLACE_speaker
|
| 1689 |
+
$REPLACE_limited
|
| 1690 |
+
$APPEND_speaking
|
| 1691 |
+
$APPEND_e
|
| 1692 |
+
$REPLACE_countryside
|
| 1693 |
+
$REPLACE_heat
|
| 1694 |
+
$REPLACE_prepared
|
| 1695 |
+
$REPLACE_truth
|
| 1696 |
+
$REPLACE_books
|
| 1697 |
+
$REPLACE_drank
|
| 1698 |
+
$REPLACE_nuclear
|
| 1699 |
+
$REPLACE_title
|
| 1700 |
+
$REPLACE_6
|
| 1701 |
+
$REPLACE_boring
|
| 1702 |
+
$REPLACE_totally
|
| 1703 |
+
$REPLACE_practiced
|
| 1704 |
+
$REPLACE_therefore
|
| 1705 |
+
$REPLACE_book
|
| 1706 |
+
$REPLACE_regularly
|
| 1707 |
+
$REPLACE_safety
|
| 1708 |
+
$REPLACE_normally
|
| 1709 |
+
$REPLACE_visiting
|
| 1710 |
+
$APPEND_kinds
|
| 1711 |
+
$REPLACE_impressive
|
| 1712 |
+
$REPLACE_final
|
| 1713 |
+
$REPLACE_driving
|
| 1714 |
+
$REPLACE_stuff
|
| 1715 |
+
$REPLACE_guess
|
| 1716 |
+
$REPLACE_avoid
|
| 1717 |
+
$REPLACE_answered
|
| 1718 |
+
$REPLACE_pleasant
|
| 1719 |
+
$APPEND_times
|
| 1720 |
+
$APPEND_without
|
| 1721 |
+
$REPLACE_focused
|
| 1722 |
+
$REPLACE_badly
|
| 1723 |
+
$REPLACE_solve
|
| 1724 |
+
$REPLACE_grow
|
| 1725 |
+
$REPLACE_drive
|
| 1726 |
+
$APPEND_although
|
| 1727 |
+
$REPLACE_news
|
| 1728 |
+
$REPLACE_Afterwards
|
| 1729 |
+
$APPEND_6
|
| 1730 |
+
$REPLACE_Learning
|
| 1731 |
+
$REPLACE_Thanks
|
| 1732 |
+
$REPLACE_flight
|
| 1733 |
+
$REPLACE_building
|
| 1734 |
+
$REPLACE_opened
|
| 1735 |
+
$REPLACE_shocked
|
| 1736 |
+
$REPLACE_volleyball
|
| 1737 |
+
$REPLACE_accepted
|
| 1738 |
+
$APPEND_exam
|
| 1739 |
+
$REPLACE_team
|
| 1740 |
+
$REPLACE_system
|
| 1741 |
+
$APPEND_ones
|
| 1742 |
+
$REPLACE_goals
|
| 1743 |
+
$REPLACE_Before
|
| 1744 |
+
$REPLACE_meat
|
| 1745 |
+
$APPEND_Does
|
| 1746 |
+
$REPLACE_schedule
|
| 1747 |
+
$REPLACE_cream
|
| 1748 |
+
$REPLACE_listened
|
| 1749 |
+
$REPLACE_Why
|
| 1750 |
+
$REPLACE_worth
|
| 1751 |
+
$APPEND_members
|
| 1752 |
+
$REPLACE_strength
|
| 1753 |
+
$REPLACE_works
|
| 1754 |
+
$APPEND_m
|
| 1755 |
+
$REPLACE_surprise
|
| 1756 |
+
$REPLACE_holidays
|
| 1757 |
+
$REPLACE_7
|
| 1758 |
+
$APPEND_written
|
| 1759 |
+
$REPLACE_medicine
|
| 1760 |
+
$REPLACE_contact
|
| 1761 |
+
$REPLACE_position
|
| 1762 |
+
$APPEND_tried
|
| 1763 |
+
$REPLACE_highly
|
| 1764 |
+
$REPLACE_missed
|
| 1765 |
+
$REPLACE_typhoon
|
| 1766 |
+
$REPLACE_celebrate
|
| 1767 |
+
$REPLACE_February
|
| 1768 |
+
$REPLACE_greater
|
| 1769 |
+
$REPLACE_support
|
| 1770 |
+
$REPLACE_allow
|
| 1771 |
+
$REPLACE_appeared
|
| 1772 |
+
$REPLACE_naturally
|
| 1773 |
+
$REPLACE_breakfast
|
| 1774 |
+
$REPLACE_afternoon
|
| 1775 |
+
$REPLACE_dead
|
| 1776 |
+
$REPLACE_proud
|
| 1777 |
+
$REPLACE_stuck
|
| 1778 |
+
$APPEND_half
|
| 1779 |
+
$REPLACE_lyrics
|
| 1780 |
+
$APPEND_based
|
| 1781 |
+
$REPLACE_sing
|
| 1782 |
+
$REPLACE_process
|
| 1783 |
+
$REPLACE_search
|
| 1784 |
+
$REPLACE_sell
|
| 1785 |
+
$REPLACE_learnt
|
| 1786 |
+
$REPLACE_responsibility
|
| 1787 |
+
$REPLACE_field
|
| 1788 |
+
$REPLACE_lifestyle
|
| 1789 |
+
$REPLACE_helpful
|
| 1790 |
+
$REPLACE_Koreans
|
| 1791 |
+
$REPLACE_awake
|
| 1792 |
+
$REPLACE_success
|
| 1793 |
+
$APPEND_living
|
| 1794 |
+
$REPLACE_latest
|
| 1795 |
+
$REPLACE_corrected
|
| 1796 |
+
$REPLACE_communicating
|
| 1797 |
+
$REPLACE_raise
|
| 1798 |
+
$REPLACE_showed
|
| 1799 |
+
$REPLACE_father
|
| 1800 |
+
$REPLACE_marriage
|
| 1801 |
+
$REPLACE_elementary
|
| 1802 |
+
$REPLACE_allows
|
| 1803 |
+
$APPEND_lot
|
| 1804 |
+
$REPLACE_eventually
|
| 1805 |
+
$REPLACE_customer
|
| 1806 |
+
$REPLACE_unusual
|
| 1807 |
+
$REPLACE_advise
|
| 1808 |
+
$REPLACE_letter
|
| 1809 |
+
$REPLACE_clearly
|
| 1810 |
+
$REPLACE_essay
|
| 1811 |
+
$REPLACE_bigger
|
| 1812 |
+
$REPLACE_habit
|
| 1813 |
+
$APPEND_system
|
| 1814 |
+
$REPLACE_ran
|
| 1815 |
+
$APPEND_speakers
|
| 1816 |
+
$REPLACE_bored
|
| 1817 |
+
$REPLACE_whatever
|
| 1818 |
+
$REPLACE_fourth
|
| 1819 |
+
$REPLACE_chosen
|
| 1820 |
+
$REPLACE_room
|
| 1821 |
+
$REPLACE_30
|
| 1822 |
+
$REPLACE_carefully
|
| 1823 |
+
$REPLACE_loss
|
| 1824 |
+
$REPLACE_ingredients
|
| 1825 |
+
$REPLACE_singing
|
| 1826 |
+
$REPLACE_ride
|
| 1827 |
+
$REPLACE_build
|
| 1828 |
+
$REPLACE_cooking
|
| 1829 |
+
$REPLACE_add
|
| 1830 |
+
$REPLACE_mom
|
| 1831 |
+
$REPLACE_sign
|
| 1832 |
+
$REPLACE_chatting
|
| 1833 |
+
$REPLACE_happier
|
| 1834 |
+
$REPLACE_seat
|
| 1835 |
+
$REPLACE_affect
|
| 1836 |
+
$REPLACE_appropriate
|
| 1837 |
+
$REPLACE_named
|
| 1838 |
+
$APPEND_30
|
| 1839 |
+
$REPLACE_female
|
| 1840 |
+
$REPLACE_fashion
|
| 1841 |
+
$REPLACE_attending
|
| 1842 |
+
$REPLACE_Tonight
|
| 1843 |
+
$REPLACE_role
|
| 1844 |
+
$REPLACE_somebody
|
| 1845 |
+
$APPEND_Unfortunately
|
| 1846 |
+
$REPLACE_employees
|
| 1847 |
+
$REPLACE_face
|
| 1848 |
+
$REPLACE_middle
|
| 1849 |
+
$REPLACE_junior
|
| 1850 |
+
$REPLACE_lovely
|
| 1851 |
+
$REPLACE_reduce
|
| 1852 |
+
$REPLACE_positive
|
| 1853 |
+
$REPLACE_concerned
|
| 1854 |
+
$REPLACE_overseas
|
| 1855 |
+
$REPLACE_"
|
| 1856 |
+
$REPLACE_Second
|
| 1857 |
+
$APPEND_Our
|
| 1858 |
+
$APPEND_named
|
| 1859 |
+
$REPLACE_mountain
|
| 1860 |
+
$APPEND_eating
|
| 1861 |
+
$REPLACE_warmer
|
| 1862 |
+
$REPLACE_death
|
| 1863 |
+
$REPLACE_electronic
|
| 1864 |
+
$REPLACE_figure
|
| 1865 |
+
$REPLACE_frequently
|
| 1866 |
+
$REPLACE_pair
|
| 1867 |
+
$REPLACE_Americans
|
| 1868 |
+
$REPLACE_rest
|
| 1869 |
+
$REPLACE_TV
|
| 1870 |
+
$APPEND_themselves
|
| 1871 |
+
$APPEND_however
|
| 1872 |
+
$REPLACE_subject
|
| 1873 |
+
$APPEND_music
|
| 1874 |
+
$REPLACE_dormitory
|
| 1875 |
+
$APPEND_forward
|
| 1876 |
+
$REPLACE_department
|
| 1877 |
+
$REPLACE_pronounce
|
| 1878 |
+
$REPLACE_wake
|
| 1879 |
+
$REPLACE_cook
|
| 1880 |
+
$APPEND_visit
|
| 1881 |
+
$REPLACE_raised
|
| 1882 |
+
$REPLACE_smaller
|
| 1883 |
+
$REPLACE_stressful
|
| 1884 |
+
$APPEND_lately
|
| 1885 |
+
$REPLACE_completed
|
| 1886 |
+
$REPLACE_photography
|
| 1887 |
+
$REPLACE_10
|
| 1888 |
+
$APPEND_saying
|
| 1889 |
+
$REPLACE_dropped
|
| 1890 |
+
$REPLACE_laughed
|
| 1891 |
+
$APPEND_read
|
| 1892 |
+
$REPLACE_complain
|
| 1893 |
+
$REPLACE_Usually
|
| 1894 |
+
$APPEND_felt
|
| 1895 |
+
$REPLACE_Thus
|
| 1896 |
+
$REPLACE_foreigner
|
| 1897 |
+
$REPLACE_theatre
|
| 1898 |
+
$APPEND_website
|
| 1899 |
+
$APPEND_days
|
| 1900 |
+
$REPLACE_slightly
|
| 1901 |
+
$REPLACE_incorrect
|
| 1902 |
+
$REPLACE_frustrated
|
| 1903 |
+
$REPLACE_grandmother
|
| 1904 |
+
$REPLACE_forty
|
| 1905 |
+
$REPLACE_signed
|
| 1906 |
+
$APPEND_book
|
| 1907 |
+
$REPLACE_sore
|
| 1908 |
+
$REPLACE_classmates
|
| 1909 |
+
$REPLACE_equipment
|
| 1910 |
+
$REPLACE_memory
|
| 1911 |
+
$REPLACE_ordered
|
| 1912 |
+
$APPEND_stay
|
| 1913 |
+
$REPLACE_expect
|
| 1914 |
+
$REPLACE_drunk
|
| 1915 |
+
$APPEND_gave
|
| 1916 |
+
$REPLACE_midnight
|
| 1917 |
+
$APPEND_seem
|
| 1918 |
+
$APPEND_cut
|
| 1919 |
+
$REPLACE_address
|
| 1920 |
+
$REPLACE_couple
|
| 1921 |
+
$REPLACE_Compared
|
| 1922 |
+
$REPLACE_friendly
|
| 1923 |
+
$REPLACE_rode
|
| 1924 |
+
$REPLACE_losing
|
| 1925 |
+
$REPLACE_nearly
|
| 1926 |
+
$REPLACE_six
|
| 1927 |
+
$REPLACE_speeches
|
| 1928 |
+
$REPLACE_international
|
| 1929 |
+
$REPLACE_understood
|
| 1930 |
+
$REPLACE_thank
|
| 1931 |
+
$REPLACE_rarely
|
| 1932 |
+
$REPLACE_match
|
| 1933 |
+
$REPLACE_uploaded
|
| 1934 |
+
$REPLACE_Luckily
|
| 1935 |
+
$REPLACE_failed
|
| 1936 |
+
$REPLACE_hamburger
|
| 1937 |
+
$REPLACE_sleeping
|
| 1938 |
+
$REPLACE_tongue
|
| 1939 |
+
$REPLACE_colleague
|
| 1940 |
+
$REPLACE_require
|
| 1941 |
+
$REPLACE_terribly
|
| 1942 |
+
$REPLACE_case
|
| 1943 |
+
$APPEND_traditional
|
| 1944 |
+
$REPLACE_graduation
|
| 1945 |
+
$REPLACE_offer
|
| 1946 |
+
$REPLACE_respond
|
| 1947 |
+
$REPLACE_perfectly
|
| 1948 |
+
$REPLACE_businesses
|
| 1949 |
+
$REPLACE_8
|
| 1950 |
+
$APPEND_s
|
| 1951 |
+
$REPLACE_understanding
|
| 1952 |
+
$REPLACE_hungry
|
| 1953 |
+
$REPLACE_conclusion
|
| 1954 |
+
$REPLACE_homework
|
| 1955 |
+
$REPLACE_design
|
| 1956 |
+
$REPLACE_British
|
| 1957 |
+
$REPLACE_peaceful
|
| 1958 |
+
$REPLACE_forgot
|
| 1959 |
+
$REPLACE_suitable
|
| 1960 |
+
$REPLACE_soccer
|
| 1961 |
+
$REPLACE_tells
|
| 1962 |
+
$REPLACE_third
|
| 1963 |
+
$REPLACE_exactly
|
| 1964 |
+
$REPLACE_term
|
| 1965 |
+
$REPLACE_drinking
|
| 1966 |
+
$REPLACE_searching
|
| 1967 |
+
$REPLACE_hung
|
| 1968 |
+
$REPLACE_air
|
| 1969 |
+
$REPLACE_strongly
|
| 1970 |
+
$APPEND_looking
|
| 1971 |
+
$REPLACE_band
|
| 1972 |
+
$REPLACE_checked
|
| 1973 |
+
$REPLACE_send
|
| 1974 |
+
$REPLACE_Zealand
|
| 1975 |
+
$REPLACE_draw
|
| 1976 |
+
$REPLACE_educational
|
| 1977 |
+
$REPLACE_incident
|
| 1978 |
+
$APPEND_Some
|
| 1979 |
+
$APPEND_friend
|
| 1980 |
+
$APPEND_free
|
| 1981 |
+
$REPLACE_toward
|
| 1982 |
+
$REPLACE_interview
|
| 1983 |
+
$APPEND_>
|
| 1984 |
+
$REPLACE_tough
|
| 1985 |
+
$REPLACE_canceled
|
| 1986 |
+
$REPLACE_memorize
|
| 1987 |
+
$REPLACE_historical
|
| 1988 |
+
$REPLACE_slang
|
| 1989 |
+
$REPLACE_replied
|
| 1990 |
+
$REPLACE_considering
|
| 1991 |
+
$REPLACE_skill
|
| 1992 |
+
$REPLACE_musical
|
| 1993 |
+
$REPLACE_improvement
|
| 1994 |
+
$REPLACE_carry
|
| 1995 |
+
$REPLACE_education
|
| 1996 |
+
$APPEND_great
|
| 1997 |
+
$REPLACE_companies
|
| 1998 |
+
$REPLACE_cool
|
| 1999 |
+
$APPEND_comes
|
| 2000 |
+
$REPLACE_employee
|
| 2001 |
+
$REPLACE_age
|
| 2002 |
+
$APPEND_Yes
|
| 2003 |
+
$REPLACE_Could
|
| 2004 |
+
$REPLACE_relaxed
|
| 2005 |
+
$REPLACE_greatest
|
| 2006 |
+
$REPLACE_total
|
| 2007 |
+
$REPLACE_ready
|
| 2008 |
+
$REPLACE_guy
|
| 2009 |
+
$REPLACE_chocolate
|
| 2010 |
+
$APPEND_tense
|
| 2011 |
+
$REPLACE_earn
|
| 2012 |
+
$REPLACE_topic
|
| 2013 |
+
$REPLACE_beat
|
| 2014 |
+
$REPLACE_date
|
| 2015 |
+
$REPLACE_illnesses
|
| 2016 |
+
$REPLACE_conditioner
|
| 2017 |
+
$APPEND_inside
|
| 2018 |
+
$REPLACE_suggested
|
| 2019 |
+
$REPLACE_drama
|
| 2020 |
+
$REPLACE_pick
|
| 2021 |
+
$REPLACE_starts
|
| 2022 |
+
$REPLACE_manage
|
| 2023 |
+
$APPEND_anyway
|
| 2024 |
+
$REPLACE_Thailand
|
| 2025 |
+
$REPLACE_McDonald
|
| 2026 |
+
$REPLACE_Writing
|
| 2027 |
+
$APPEND_Are
|
| 2028 |
+
$REPLACE_2nd
|
| 2029 |
+
$APPEND_fall
|
| 2030 |
+
$REPLACE_flu
|
| 2031 |
+
$REPLACE_websites
|
| 2032 |
+
$REPLACE_snowy
|
| 2033 |
+
$APPEND_diary
|
| 2034 |
+
$REPLACE_road
|
| 2035 |
+
$REPLACE_professor
|
| 2036 |
+
$REPLACE_exhausted
|
| 2037 |
+
$APPEND_held
|
| 2038 |
+
$REPLACE_colored
|
| 2039 |
+
$REPLACE_sitting
|
| 2040 |
+
$REPLACE_wanna
|
| 2041 |
+
$REPLACE_according
|
| 2042 |
+
$REPLACE_lead
|
| 2043 |
+
$REPLACE_scene
|
| 2044 |
+
$REPLACE_hardly
|
| 2045 |
+
$REPLACE_ticket
|
| 2046 |
+
$REPLACE_remain
|
| 2047 |
+
$REPLACE_worrying
|
| 2048 |
+
$REPLACE_patience
|
| 2049 |
+
$REPLACE_Having
|
| 2050 |
+
$REPLACE_allowed
|
| 2051 |
+
$REPLACE_whilst
|
| 2052 |
+
$REPLACE_entire
|
| 2053 |
+
$REPLACE_promised
|
| 2054 |
+
$REPLACE_photo
|
| 2055 |
+
$REPLACE_motivated
|
| 2056 |
+
$REPLACE_dairy
|
| 2057 |
+
$APPEND_full
|
| 2058 |
+
$REPLACE_points
|
| 2059 |
+
$REPLACE_Soon
|
| 2060 |
+
$REPLACE_messages
|
| 2061 |
+
$APPEND_alone
|
| 2062 |
+
$REPLACE_alive
|
| 2063 |
+
$APPEND_Every
|
| 2064 |
+
$APPEND_entire
|
| 2065 |
+
$REPLACE_programme
|
| 2066 |
+
$REPLACE_fully
|
| 2067 |
+
$REPLACE_cloudy
|
| 2068 |
+
$REPLACE_occur
|
| 2069 |
+
$REPLACE_meaning
|
| 2070 |
+
$APPEND_area
|
| 2071 |
+
$REPLACE_liked
|
| 2072 |
+
$REPLACE_sweet
|
| 2073 |
+
$REPLACE_act
|
| 2074 |
+
$REPLACE_graduated
|
| 2075 |
+
$REPLACE_childhood
|
| 2076 |
+
$APPEND_available
|
| 2077 |
+
$REPLACE_believed
|
| 2078 |
+
$REPLACE_newspaper
|
| 2079 |
+
$REPLACE_enjoying
|
| 2080 |
+
$REPLACE_riding
|
| 2081 |
+
$APPEND_Not
|
| 2082 |
+
$REPLACE_body
|
| 2083 |
+
$REPLACE_beneficial
|
| 2084 |
+
$REPLACE_recognize
|
| 2085 |
+
$APPEND_native
|
| 2086 |
+
$REPLACE_attention
|
| 2087 |
+
$REPLACE_Until
|
| 2088 |
+
$REPLACE_struck
|
| 2089 |
+
$REPLACE_Just
|
| 2090 |
+
$REPLACE_correcting
|
| 2091 |
+
$REPLACE_interest
|
| 2092 |
+
$REPLACE_changing
|
| 2093 |
+
$REPLACE_pollution
|
| 2094 |
+
$APPEND_pieces
|
| 2095 |
+
$REPLACE_According
|
| 2096 |
+
$REPLACE_autumn
|
| 2097 |
+
$APPEND_problem
|
| 2098 |
+
$REPLACE_gym
|
| 2099 |
+
$REPLACE_basic
|
| 2100 |
+
$REPLACE_includes
|
| 2101 |
+
$REPLACE_games
|
| 2102 |
+
$APPEND_seeing
|
| 2103 |
+
$REPLACE_sunny
|
| 2104 |
+
$REPLACE_5
|
| 2105 |
+
$APPEND_learned
|
| 2106 |
+
$REPLACE_stage
|
| 2107 |
+
$REPLACE_touch
|
| 2108 |
+
$REPLACE_discuss
|
| 2109 |
+
$REPLACE_airplane
|
| 2110 |
+
$REPLACE_Has
|
| 2111 |
+
$REPLACE_die
|
| 2112 |
+
$REPLACE_relationships
|
| 2113 |
+
$REPLACE_effects
|
| 2114 |
+
$REPLACE_sat
|
| 2115 |
+
$REPLACE_parts
|
| 2116 |
+
$REPLACE_tsunami
|
| 2117 |
+
$REPLACE_response
|
| 2118 |
+
$REPLACE_teaches
|
| 2119 |
+
$REPLACE_self
|
| 2120 |
+
$REPLACE_thanks
|
| 2121 |
+
$REPLACE_rained
|
| 2122 |
+
$REPLACE_laundry
|
| 2123 |
+
$REPLACE_dependent
|
| 2124 |
+
$APPEND_near
|
| 2125 |
+
$REPLACE_below
|
| 2126 |
+
$REPLACE_custom
|
| 2127 |
+
$REPLACE_inconvenient
|
| 2128 |
+
$REPLACE_relaxing
|
| 2129 |
+
$REPLACE_wedding
|
| 2130 |
+
$REPLACE_challenge
|
| 2131 |
+
$APPEND_set
|
| 2132 |
+
$REPLACE_chatted
|
| 2133 |
+
$APPEND_immediately
|
| 2134 |
+
$REPLACE_attractive
|
| 2135 |
+
$REPLACE_translate
|
| 2136 |
+
$APPEND_Just
|
| 2137 |
+
$APPEND_TV
|
| 2138 |
+
$REPLACE_win
|
| 2139 |
+
$REPLACE_museum
|
| 2140 |
+
$REPLACE_neighborhood
|
| 2141 |
+
$REPLACE_Right
|
| 2142 |
+
$REPLACE_regular
|
| 2143 |
+
$REPLACE_experiences
|
| 2144 |
+
$APPEND_word
|
| 2145 |
+
$APPEND_played
|
| 2146 |
+
$REPLACE_hobby
|
| 2147 |
+
$REPLACE_developing
|
| 2148 |
+
$REPLACE_truly
|
| 2149 |
+
$APPEND_ended
|
| 2150 |
+
$REPLACE_issue
|
| 2151 |
+
$APPEND_correct
|
| 2152 |
+
$REPLACE_impossible
|
| 2153 |
+
$REPLACE_concerning
|
| 2154 |
+
$REPLACE_realise
|
| 2155 |
+
$REPLACE_brings
|
| 2156 |
+
$APPEND_room
|
| 2157 |
+
$REPLACE_advised
|
| 2158 |
+
$REPLACE_workplace
|
| 2159 |
+
$REPLACE_surfing
|
| 2160 |
+
$APPEND_Let
|
| 2161 |
+
$APPEND_daily
|
| 2162 |
+
$REPLACE_stomach
|
| 2163 |
+
$APPEND_night
|
| 2164 |
+
$REPLACE_meal
|
| 2165 |
+
$REPLACE_disadvantages
|
| 2166 |
+
$REPLACE_loudly
|
| 2167 |
+
$REPLACE_prize
|
| 2168 |
+
$REPLACE_besides
|
| 2169 |
+
$APPEND_experience
|
| 2170 |
+
$REPLACE_Despite
|
| 2171 |
+
$REPLACE_4
|
| 2172 |
+
$APPEND_concert
|
| 2173 |
+
$REPLACE_3rd
|
| 2174 |
+
$REPLACE_power
|
| 2175 |
+
$REPLACE_`
|
| 2176 |
+
$APPEND_lots
|
| 2177 |
+
$REPLACE_changes
|
| 2178 |
+
$REPLACE_kindergarten
|
| 2179 |
+
$REPLACE_sweat
|
| 2180 |
+
$REPLACE_ten
|
| 2181 |
+
$APPEND_wo
|
| 2182 |
+
$REPLACE_overcome
|
| 2183 |
+
$REPLACE_effective
|
| 2184 |
+
$REPLACE_terms
|
| 2185 |
+
$REPLACE_shown
|
| 2186 |
+
$REPLACE_chat
|
| 2187 |
+
$APPEND_team
|
| 2188 |
+
$REPLACE_sorry
|
| 2189 |
+
$APPEND_7
|
| 2190 |
+
$REPLACE_station
|
| 2191 |
+
$APPEND_man
|
| 2192 |
+
$REPLACE_produce
|
| 2193 |
+
$REPLACE_technological
|
| 2194 |
+
$REPLACE_differently
|
| 2195 |
+
$REPLACE_transferred
|
| 2196 |
+
$APPEND_told
|
| 2197 |
+
$APPEND_late
|
| 2198 |
+
$REPLACE_laugh
|
| 2199 |
+
$REPLACE_worker
|
| 2200 |
+
$REPLACE_space
|
| 2201 |
+
$REPLACE_introduced
|
| 2202 |
+
$REPLACE_single
|
| 2203 |
+
$REPLACE_cancelled
|
| 2204 |
+
$REPLACE_methods
|
| 2205 |
+
$REPLACE_transportation
|
| 2206 |
+
$REPLACE_Philippines
|
| 2207 |
+
$REPLACE_possibility
|
| 2208 |
+
$REPLACE_tasty
|
| 2209 |
+
$REPLACE_location
|
| 2210 |
+
$REPLACE_male
|
| 2211 |
+
$APPEND_simply
|
| 2212 |
+
$REPLACE_tastes
|
| 2213 |
+
$REPLACE_ease
|
| 2214 |
+
$REPLACE_straight
|
| 2215 |
+
$REPLACE_uses
|
| 2216 |
+
$REPLACE_participate
|
| 2217 |
+
$REPLACE_discover
|
| 2218 |
+
$APPEND_co
|
| 2219 |
+
$REPLACE_details
|
| 2220 |
+
$REPLACE_logged
|
| 2221 |
+
$REPLACE_bright
|
| 2222 |
+
$REPLACE_Once
|
| 2223 |
+
$REPLACE_walking
|
| 2224 |
+
$APPEND_spent
|
| 2225 |
+
$MERGE_HYPHEN
|
| 2226 |
+
$REPLACE_growing
|
| 2227 |
+
$REPLACE_slight
|
| 2228 |
+
$APPEND_current
|
| 2229 |
+
$REPLACE_moving
|
| 2230 |
+
$REPLACE_spring
|
| 2231 |
+
$REPLACE_August
|
| 2232 |
+
$REPLACE_fans
|
| 2233 |
+
$REPLACE_Well
|
| 2234 |
+
$APPEND_nervous
|
| 2235 |
+
$REPLACE_version
|
| 2236 |
+
$REPLACE_upset
|
| 2237 |
+
$REPLACE_stress
|
| 2238 |
+
$REPLACE_appointment
|
| 2239 |
+
$REPLACE_tasks
|
| 2240 |
+
$REPLACE_Being
|
| 2241 |
+
$REPLACE_encouraged
|
| 2242 |
+
$REPLACE_town
|
| 2243 |
+
$REPLACE_eight
|
| 2244 |
+
$REPLACE_mood
|
| 2245 |
+
$REPLACE_forecast
|
| 2246 |
+
$APPEND_lessons
|
| 2247 |
+
$APPEND_finished
|
| 2248 |
+
$REPLACE_increased
|
| 2249 |
+
$REPLACE_blossoms
|
| 2250 |
+
$REPLACE_aware
|
| 2251 |
+
$REPLACE_Besides
|
| 2252 |
+
$REPLACE_Taiwanese
|
| 2253 |
+
$REPLACE_someday
|
| 2254 |
+
$REPLACE_happening
|
| 2255 |
+
$REPLACE_volunteer
|
| 2256 |
+
$REPLACE_fireworks
|
| 2257 |
+
$REPLACE_ideas
|
| 2258 |
+
$REPLACE_curious
|
| 2259 |
+
$REPLACE_responsible
|
| 2260 |
+
$REPLACE_voice
|
| 2261 |
+
$REPLACE_covered
|
| 2262 |
+
$APPEND_ice
|
| 2263 |
+
$REPLACE_rang
|
| 2264 |
+
$REPLACE_items
|
| 2265 |
+
$REPLACE_apart
|
| 2266 |
+
$APPEND_program
|
| 2267 |
+
$REPLACE_bye
|
| 2268 |
+
$REPLACE_Next
|
| 2269 |
+
$REPLACE_complicated
|
| 2270 |
+
$REPLACE_Someone
|
| 2271 |
+
$APPEND_earlier
|
| 2272 |
+
$APPEND_difficult
|
| 2273 |
+
$REPLACE_invited
|
| 2274 |
+
$REPLACE_applied
|
| 2275 |
+
$APPEND_anyone
|
| 2276 |
+
$REPLACE_gaining
|
| 2277 |
+
$REPLACE_cute
|
| 2278 |
+
$REPLACE_line
|
| 2279 |
+
$REPLACE_partner
|
| 2280 |
+
$REPLACE_regretted
|
| 2281 |
+
$REPLACE_clock
|
| 2282 |
+
$APPEND_according
|
| 2283 |
+
$REPLACE_greatly
|
| 2284 |
+
$REPLACE_appear
|
| 2285 |
+
$REPLACE_opposite
|
| 2286 |
+
$REPLACE_Like
|
| 2287 |
+
$REPLACE_patient
|
| 2288 |
+
$REPLACE_spread
|
| 2289 |
+
$REPLACE_dollars
|
| 2290 |
+
$REPLACE_relieve
|
| 2291 |
+
$REPLACE_article
|
| 2292 |
+
$REPLACE_benefits
|
| 2293 |
+
$APPEND_American
|
| 2294 |
+
$REPLACE_Looking
|
| 2295 |
+
$REPLACE_Who
|
| 2296 |
+
$REPLACE_fix
|
| 2297 |
+
$REPLACE_human
|
| 2298 |
+
$REPLACE_technologies
|
| 2299 |
+
$REPLACE_breathe
|
| 2300 |
+
$REPLACE_strict
|
| 2301 |
+
$REPLACE_opinions
|
| 2302 |
+
$APPEND_possibly
|
| 2303 |
+
$REPLACE_appearance
|
| 2304 |
+
$REPLACE_explanation
|
| 2305 |
+
$REPLACE_herself
|
| 2306 |
+
$APPEND_student
|
| 2307 |
+
$REPLACE_plane
|
| 2308 |
+
$REPLACE_hearing
|
| 2309 |
+
$REPLACE_personality
|
| 2310 |
+
$REPLACE_attitude
|
| 2311 |
+
$REPLACE_journey
|
| 2312 |
+
$REPLACE_recover
|
| 2313 |
+
$REPLACE_magazine
|
| 2314 |
+
$REPLACE_disappeared
|
| 2315 |
+
$APPEND_taken
|
| 2316 |
+
$REPLACE_Me
|
| 2317 |
+
$REPLACE_efficiently
|
| 2318 |
+
$REPLACE_strawberries
|
| 2319 |
+
$APPEND_becoming
|
| 2320 |
+
$REPLACE_October
|
| 2321 |
+
$REPLACE_social
|
| 2322 |
+
$REPLACE_suicide
|
| 2323 |
+
$REPLACE_reached
|
| 2324 |
+
$REPLACE_damaged
|
| 2325 |
+
$REPLACE_personalities
|
| 2326 |
+
$REPLACE_valuable
|
| 2327 |
+
$REPLACE_height
|
| 2328 |
+
$REPLACE_Asian
|
| 2329 |
+
$REPLACE_sight
|
| 2330 |
+
$REPLACE_issues
|
| 2331 |
+
$REPLACE_titled
|
| 2332 |
+
$REPLACE_science
|
| 2333 |
+
$REPLACE_cell
|
| 2334 |
+
$REPLACE_amongst
|
| 2335 |
+
$APPEND_movies
|
| 2336 |
+
$REPLACE_June
|
| 2337 |
+
$REPLACE_policies
|
| 2338 |
+
$REPLACE_silent
|
| 2339 |
+
$REPLACE_girls
|
| 2340 |
+
$APPEND_company
|
| 2341 |
+
$APPEND_second
|
| 2342 |
+
$APPEND_ability
|
| 2343 |
+
$APPEND_hope
|
| 2344 |
+
$REPLACE_former
|
| 2345 |
+
$APPEND_GOOD
|
| 2346 |
+
$REPLACE_fashionable
|
| 2347 |
+
$REPLACE_club
|
| 2348 |
+
$APPEND_end
|
| 2349 |
+
$REPLACE_path
|
| 2350 |
+
$APPEND_+
|
| 2351 |
+
$REPLACE_top
|
| 2352 |
+
$APPEND_happy
|
| 2353 |
+
$REPLACE_lay
|
| 2354 |
+
$REPLACE_accident
|
| 2355 |
+
$REPLACE_festival
|
| 2356 |
+
$REPLACE_Later
|
| 2357 |
+
$REPLACE_destroyed
|
| 2358 |
+
$APPEND_plan
|
| 2359 |
+
$APPEND_famous
|
| 2360 |
+
$REPLACE_safely
|
| 2361 |
+
$APPEND_related
|
| 2362 |
+
$REPLACE_suit
|
| 2363 |
+
$REPLACE_stand
|
| 2364 |
+
$REPLACE_contrast
|
| 2365 |
+
$APPEND_period
|
| 2366 |
+
$REPLACE_highest
|
| 2367 |
+
$REPLACE_habits
|
| 2368 |
+
$APPEND_First
|
| 2369 |
+
$REPLACE_January
|
| 2370 |
+
$REPLACE_putting
|
| 2371 |
+
$REPLACE_grew
|
| 2372 |
+
$REPLACE_degrees
|
| 2373 |
+
$REPLACE_latter
|
| 2374 |
+
$REPLACE_extent
|
| 2375 |
+
$REPLACE_lang
|
| 2376 |
+
$REPLACE_episode
|
| 2377 |
+
$REPLACE_physically
|
| 2378 |
+
$APPEND_types
|
| 2379 |
+
$REPLACE_cooked
|
| 2380 |
+
$REPLACE_original
|
| 2381 |
+
$REPLACE_fresh
|
| 2382 |
+
$APPEND_world
|
| 2383 |
+
$REPLACE_l
|
| 2384 |
+
$REPLACE_Year
|
| 2385 |
+
$APPEND_wanted
|
| 2386 |
+
$REPLACE_Wednesday
|
| 2387 |
+
$REPLACE_unique
|
| 2388 |
+
$REPLACE_active
|
| 2389 |
+
$REPLACE_center
|
| 2390 |
+
$APPEND_problems
|
| 2391 |
+
$REPLACE_encourage
|
| 2392 |
+
$APPEND_8
|
| 2393 |
+
$REPLACE_individual
|
| 2394 |
+
$REPLACE_included
|
| 2395 |
+
$REPLACE_suggestions
|
| 2396 |
+
$REPLACE_sea
|
| 2397 |
+
$REPLACE_smoothly
|
| 2398 |
+
$REPLACE_headache
|
| 2399 |
+
$REPLACE_Was
|
| 2400 |
+
$REPLACE_Internet
|
| 2401 |
+
$REPLACE_pleasure
|
| 2402 |
+
$REPLACE_Thursday
|
| 2403 |
+
$REPLACE_board
|
| 2404 |
+
$REPLACE_phrases
|
| 2405 |
+
$REPLACE_built
|
| 2406 |
+
$APPEND_caused
|
| 2407 |
+
$REPLACE_subjects
|
| 2408 |
+
$APPEND_places
|
| 2409 |
+
$REPLACE_grammatical
|
| 2410 |
+
$REPLACE_suggest
|
| 2411 |
+
$APPEND_big
|
| 2412 |
+
$REPLACE_bath
|
| 2413 |
+
$APPEND_train
|
| 2414 |
+
$REPLACE_hesitant
|
| 2415 |
+
$APPEND_seriously
|
| 2416 |
+
$REPLACE_deep
|
| 2417 |
+
$APPEND_children
|
| 2418 |
+
$REPLACE_refreshed
|
| 2419 |
+
$APPEND_Correct
|
| 2420 |
+
$APPEND_yourself
|
| 2421 |
+
$APPEND_THE
|
| 2422 |
+
$REPLACE_reasonable
|
| 2423 |
+
$APPEND_spend
|
| 2424 |
+
$APPEND_skill
|
| 2425 |
+
$REPLACE_obvious
|
| 2426 |
+
$REPLACE_Friday
|
| 2427 |
+
$REPLACE_soup
|
| 2428 |
+
$REPLACE_basketball
|
| 2429 |
+
$REPLACE_Your
|
| 2430 |
+
$REPLACE_drawing
|
| 2431 |
+
$REPLACE_m
|
| 2432 |
+
$APPEND_sentences
|
| 2433 |
+
$REPLACE_english
|
| 2434 |
+
$APPEND_fell
|
| 2435 |
+
$REPLACE_colder
|
| 2436 |
+
$REPLACE_car
|
| 2437 |
+
$APPEND_group
|
| 2438 |
+
$REPLACE_receiving
|
| 2439 |
+
$REPLACE_sun
|
| 2440 |
+
$APPEND_15
|
| 2441 |
+
$APPEND_hot
|
| 2442 |
+
$APPEND_verb
|
| 2443 |
+
$REPLACE_technical
|
| 2444 |
+
$REPLACE_Through
|
| 2445 |
+
$APPEND_buy
|
| 2446 |
+
$REPLACE_route
|
| 2447 |
+
$REPLACE_Vietnamese
|
| 2448 |
+
$REPLACE_grandfather
|
| 2449 |
+
$REPLACE_April
|
| 2450 |
+
$REPLACE_lasts
|
| 2451 |
+
$REPLACE_environmentally
|
| 2452 |
+
$REPLACE_progress
|
| 2453 |
+
$REPLACE_telling
|
| 2454 |
+
$REPLACE_preparation
|
| 2455 |
+
$REPLACE_supermarket
|
| 2456 |
+
$REPLACE_Perhaps
|
| 2457 |
+
$REPLACE_plays
|
| 2458 |
+
$REPLACE_driver
|
| 2459 |
+
$REPLACE_anyway
|
| 2460 |
+
$APPEND_within
|
| 2461 |
+
$REPLACE_Vietnam
|
| 2462 |
+
$REPLACE_green
|
| 2463 |
+
$REPLACE_access
|
| 2464 |
+
$APPEND_t
|
| 2465 |
+
$REPLACE_concerns
|
| 2466 |
+
$REPLACE_laptop
|
| 2467 |
+
$APPEND_eventually
|
| 2468 |
+
$REPLACE_fried
|
| 2469 |
+
$REPLACE_pieces
|
| 2470 |
+
$REPLACE_security
|
| 2471 |
+
$REPLACE_condition
|
| 2472 |
+
$REPLACE_dreams
|
| 2473 |
+
$REPLACE_reminded
|
| 2474 |
+
$REPLACE_December
|
| 2475 |
+
$REPLACE_finding
|
| 2476 |
+
$REPLACE_produced
|
| 2477 |
+
$REPLACE_broken
|
| 2478 |
+
$REPLACE_raising
|
| 2479 |
+
$REPLACE_specific
|
| 2480 |
+
$REPLACE_humid
|
| 2481 |
+
$APPEND_reason
|
| 2482 |
+
$REPLACE_programming
|
| 2483 |
+
$REPLACE_brush
|
| 2484 |
+
$REPLACE_powerful
|
| 2485 |
+
$REPLACE_shape
|
| 2486 |
+
$REPLACE_involves
|
| 2487 |
+
$APPEND_summer
|
| 2488 |
+
$REPLACE_kinds
|
| 2489 |
+
$APPEND_eat
|
| 2490 |
+
$REPLACE_market
|
| 2491 |
+
$REPLACE_Introducing
|
| 2492 |
+
$APPEND_kept
|
| 2493 |
+
$APPEND_information
|
| 2494 |
+
$REPLACE_Filipino
|
| 2495 |
+
$REPLACE_hang
|
| 2496 |
+
$REPLACE_nature
|
| 2497 |
+
$REPLACE_stood
|
| 2498 |
+
$REPLACE_oldest
|
| 2499 |
+
$APPEND_books
|
| 2500 |
+
$APPEND_top
|
| 2501 |
+
$REPLACE_physical
|
| 2502 |
+
$REPLACE_Thai
|
| 2503 |
+
$REPLACE_effort
|
| 2504 |
+
$REPLACE_U
|
| 2505 |
+
$APPEND_phone
|
| 2506 |
+
$REPLACE_author
|
| 2507 |
+
$REPLACE_imagined
|
| 2508 |
+
$REPLACE_request
|
| 2509 |
+
$REPLACE_Australian
|
| 2510 |
+
$REPLACE_didn
|
| 2511 |
+
$REPLACE_Something
|
| 2512 |
+
$REPLACE_translator
|
| 2513 |
+
$REPLACE_text
|
| 2514 |
+
$APPEND_account
|
| 2515 |
+
$REPLACE_protect
|
| 2516 |
+
$REPLACE_resources
|
| 2517 |
+
$REPLACE_Additionally
|
| 2518 |
+
$APPEND_afterwards
|
| 2519 |
+
$APPEND_Should
|
| 2520 |
+
$REPLACE_awhile
|
| 2521 |
+
$REPLACE_meanings
|
| 2522 |
+
$APPEND_pictures
|
| 2523 |
+
$REPLACE_benefit
|
| 2524 |
+
$REPLACE_exist
|
| 2525 |
+
$REPLACE_connection
|
| 2526 |
+
$REPLACE_impression
|
| 2527 |
+
$APPEND_meeting
|
| 2528 |
+
$REPLACE_electrical
|
| 2529 |
+
$APPEND_style
|
| 2530 |
+
$REPLACE_larger
|
| 2531 |
+
$REPLACE_hotter
|
| 2532 |
+
$REPLACE_foot
|
| 2533 |
+
$APPEND_further
|
| 2534 |
+
$REPLACE_described
|
| 2535 |
+
$REPLACE_note
|
| 2536 |
+
$REPLACE_football
|
| 2537 |
+
$APPEND_ourselves
|
| 2538 |
+
$REPLACE_searched
|
| 2539 |
+
$REPLACE_temporary
|
| 2540 |
+
$REPLACE_semester
|
| 2541 |
+
$REPLACE_announced
|
| 2542 |
+
$REPLACE_Suddenly
|
| 2543 |
+
$APPEND_others
|
| 2544 |
+
$APPEND_goes
|
| 2545 |
+
$REPLACE_sort
|
| 2546 |
+
$REPLACE_itself
|
| 2547 |
+
$REPLACE_rich
|
| 2548 |
+
$APPEND_song
|
| 2549 |
+
$REPLACE_memorable
|
| 2550 |
+
$REPLACE_Europe
|
| 2551 |
+
$REPLACE_features
|
| 2552 |
+
$REPLACE_apply
|
| 2553 |
+
$REPLACE_celebrated
|
| 2554 |
+
$REPLACE_delivery
|
| 2555 |
+
$REPLACE_winter
|
| 2556 |
+
$REPLACE_miss
|
| 2557 |
+
$REPLACE_application
|
| 2558 |
+
$APPEND_onwards
|
| 2559 |
+
$REPLACE_population
|
| 2560 |
+
$REPLACE_failure
|
| 2561 |
+
$REPLACE_lazy
|
| 2562 |
+
$REPLACE_scored
|
| 2563 |
+
$REPLACE_November
|
| 2564 |
+
$APPEND_travel
|
| 2565 |
+
$REPLACE_Let
|
| 2566 |
+
$REPLACE_alcoholic
|
| 2567 |
+
$REPLACE_disappointment
|
| 2568 |
+
$REPLACE_severe
|
| 2569 |
+
$REPLACE_effect
|
| 2570 |
+
$REPLACE_speed
|
| 2571 |
+
$APPEND_How
|
| 2572 |
+
$APPEND_sounds
|
| 2573 |
+
$REPLACE_cooler
|
| 2574 |
+
$REPLACE_'cause
|
| 2575 |
+
$APPEND_mean
|
| 2576 |
+
$REPLACE_divided
|
| 2577 |
+
$REPLACE_ha
|
| 2578 |
+
$REPLACE_9
|
| 2579 |
+
$REPLACE_advantages
|
| 2580 |
+
$APPEND_call
|
| 2581 |
+
$REPLACE_21st
|
| 2582 |
+
$REPLACE_fit
|
| 2583 |
+
$REPLACE_lit
|
| 2584 |
+
$REPLACE_directly
|
| 2585 |
+
$REPLACE_videos
|
| 2586 |
+
$REPLACE_pressure
|
| 2587 |
+
$REPLACE_pursue
|
| 2588 |
+
$REPLACE_forgotten
|
| 2589 |
+
$REPLACE_industry
|
| 2590 |
+
$REPLACE_Speaking
|
| 2591 |
+
$APPEND_enjoy
|
| 2592 |
+
$REPLACE_Should
|
| 2593 |
+
$REPLACE_grown
|
| 2594 |
+
$REPLACE_participated
|
| 2595 |
+
$REPLACE_treat
|
| 2596 |
+
$REPLACE_expression
|
| 2597 |
+
$REPLACE_fly
|
| 2598 |
+
$REPLACE_tall
|
| 2599 |
+
$REPLACE_situations
|
| 2600 |
+
$REPLACE_host
|
| 2601 |
+
$REPLACE_visitors
|
| 2602 |
+
$APPEND_hear
|
| 2603 |
+
$REPLACE_Instead
|
| 2604 |
+
$REPLACE_agreed
|
| 2605 |
+
$REPLACE_affects
|
| 2606 |
+
$REPLACE_drew
|
| 2607 |
+
$REPLACE_spending
|
| 2608 |
+
$REPLACE_huge
|
| 2609 |
+
$REPLACE_ill
|
| 2610 |
+
$REPLACE_tradition
|
| 2611 |
+
$REPLACE_argue
|
| 2612 |
+
$REPLACE_turns
|
| 2613 |
+
$REPLACE_ground
|
| 2614 |
+
$REPLACE_sometime
|
| 2615 |
+
$REPLACE_Italy
|
| 2616 |
+
$APPEND_works
|
| 2617 |
+
$REPLACE_likely
|
| 2618 |
+
$REPLACE_Madam
|
| 2619 |
+
$APPEND_questions
|
| 2620 |
+
$REPLACE_ceremonies
|
| 2621 |
+
$APPEND_turn
|
| 2622 |
+
$APPEND_Korean
|
| 2623 |
+
$REPLACE_gradually
|
| 2624 |
+
$REPLACE_financial
|
| 2625 |
+
$REPLACE_involved
|
| 2626 |
+
$REPLACE_throw
|
| 2627 |
+
$REPLACE_advertising
|
| 2628 |
+
$REPLACE_tend
|
| 2629 |
+
$REPLACE_characteristics
|
| 2630 |
+
$APPEND_among
|
| 2631 |
+
$REPLACE_electric
|
| 2632 |
+
$REPLACE_sister
|
| 2633 |
+
$APPEND_car
|
| 2634 |
+
$REPLACE_fantastic
|
| 2635 |
+
$REPLACE_examination
|
| 2636 |
+
$APPEND_city
|
| 2637 |
+
$REPLACE_eaten
|
| 2638 |
+
$REPLACE_film
|
| 2639 |
+
$APPEND_small
|
| 2640 |
+
$REPLACE_players
|
| 2641 |
+
$REPLACE_stores
|
| 2642 |
+
$REPLACE_machine
|
| 2643 |
+
$REPLACE_managed
|
| 2644 |
+
$REPLACE_tour
|
| 2645 |
+
$APPEND_video
|
| 2646 |
+
$REPLACE_journals
|
| 2647 |
+
$REPLACE_guys
|
| 2648 |
+
$APPEND_meet
|
| 2649 |
+
$REPLACE_deeply
|
| 2650 |
+
$REPLACE_floor
|
| 2651 |
+
$REPLACE_keeps
|
| 2652 |
+
$REPLACE_talks
|
| 2653 |
+
$REPLACE_focusing
|
| 2654 |
+
$REPLACE_mysterious
|
| 2655 |
+
$APPEND_less
|
| 2656 |
+
$REPLACE_rice
|
| 2657 |
+
$REPLACE_recovered
|
| 2658 |
+
$REPLACE_injured
|
| 2659 |
+
$REPLACE_poorly
|
| 2660 |
+
$REPLACE_comedy
|
| 2661 |
+
$REPLACE_cigarettes
|
| 2662 |
+
$REPLACE_anime
|
| 2663 |
+
$REPLACE_influence
|
| 2664 |
+
$REPLACE_Eventually
|
| 2665 |
+
$REPLACE_offered
|
| 2666 |
+
$REPLACE_sale
|
| 2667 |
+
$REPLACE_effectively
|
| 2668 |
+
$REPLACE_disappointing
|
| 2669 |
+
$REPLACE_illness
|
| 2670 |
+
$REPLACE_comments
|
| 2671 |
+
$APPEND_talk
|
| 2672 |
+
$REPLACE_contains
|
| 2673 |
+
$APPEND_People
|
| 2674 |
+
$APPEND_power
|
| 2675 |
+
$REPLACE_31st
|
| 2676 |
+
$REPLACE_distance
|
| 2677 |
+
$REPLACE_appears
|
| 2678 |
+
$REPLACE_importance
|
| 2679 |
+
$REPLACE_choosing
|
| 2680 |
+
$APPEND_interesting
|
| 2681 |
+
$REPLACE_snow
|
| 2682 |
+
$APPEND_o
|
| 2683 |
+
$REPLACE_tennis
|
| 2684 |
+
$REPLACE_continues
|
| 2685 |
+
$REPLACE_dress
|
| 2686 |
+
$REPLACE_percent
|
| 2687 |
+
$REPLACE_size
|
| 2688 |
+
$REPLACE_dictionaries
|
| 2689 |
+
$APPEND_seems
|
| 2690 |
+
$REPLACE_fever
|
| 2691 |
+
$APPEND_etc
|
| 2692 |
+
$APPEND_Though
|
| 2693 |
+
$REPLACE_whereas
|
| 2694 |
+
$APPEND_several
|
| 2695 |
+
$APPEND_far
|
| 2696 |
+
$APPEND_classes
|
| 2697 |
+
$APPEND_public
|
| 2698 |
+
$REPLACE_traffic
|
| 2699 |
+
$REPLACE_damage
|
| 2700 |
+
$APPEND_nothing
|
| 2701 |
+
$REPLACE_worthwhile
|
| 2702 |
+
$REPLACE_appreciated
|
| 2703 |
+
$REPLACE_articles
|
| 2704 |
+
$APPEND_begin
|
| 2705 |
+
$APPEND_needed
|
| 2706 |
+
$REPLACE_recommendations
|
| 2707 |
+
$REPLACE_don
|
| 2708 |
+
$REPLACE_buildings
|
| 2709 |
+
$APPEND_four
|
| 2710 |
+
$REPLACE_jealous
|
| 2711 |
+
$REPLACE_seminar
|
| 2712 |
+
$APPEND_gradually
|
| 2713 |
+
$REPLACE_complaints
|
| 2714 |
+
$REPLACE_Nothing
|
| 2715 |
+
$REPLACE_advance
|
| 2716 |
+
$REPLACE_flowers
|
| 2717 |
+
$APPEND_Starting
|
| 2718 |
+
$REPLACE_beyond
|
| 2719 |
+
$REPLACE_advertised
|
| 2720 |
+
$APPEND_mainly
|
| 2721 |
+
$APPEND_possible
|
| 2722 |
+
$REPLACE_suffering
|
| 2723 |
+
$APPEND_12
|
| 2724 |
+
$REPLACE_Hopefully
|
| 2725 |
+
$APPEND_countries
|
| 2726 |
+
$APPEND_similar
|
| 2727 |
+
$REPLACE_quick
|
| 2728 |
+
$REPLACE_general
|
| 2729 |
+
$REPLACE_successfully
|
| 2730 |
+
$REPLACE_dark
|
| 2731 |
+
$REPLACE_unbelievable
|
| 2732 |
+
$REPLACE_causing
|
| 2733 |
+
$REPLACE_13th
|
| 2734 |
+
$REPLACE_unexpected
|
| 2735 |
+
$REPLACE_begins
|
| 2736 |
+
$REPLACE_tea
|
| 2737 |
+
$REPLACE_Sunday
|
| 2738 |
+
$APPEND_somewhere
|
| 2739 |
+
$REPLACE_digital
|
| 2740 |
+
$APPEND_stories
|
| 2741 |
+
$APPEND_idea
|
| 2742 |
+
$APPEND_tired
|
| 2743 |
+
$APPEND_family
|
| 2744 |
+
$REPLACE_animation
|
| 2745 |
+
$REPLACE_shot
|
| 2746 |
+
$REPLACE_Or
|
| 2747 |
+
$APPEND_managed
|
| 2748 |
+
$REPLACE_bus
|
| 2749 |
+
$APPEND_close
|
| 2750 |
+
$REPLACE_disease
|
| 2751 |
+
$REPLACE_desire
|
| 2752 |
+
$REPLACE_carried
|
| 2753 |
+
$REPLACE_disappear
|
| 2754 |
+
$REPLACE_essential
|
| 2755 |
+
$APPEND_news
|
| 2756 |
+
$REPLACE_forced
|
| 2757 |
+
$REPLACE_fault
|
| 2758 |
+
$REPLACE_translation
|
| 2759 |
+
$REPLACE_television
|
| 2760 |
+
$REPLACE_cried
|
| 2761 |
+
$REPLACE_freely
|
| 2762 |
+
$REPLACE_Valentine
|
| 2763 |
+
$REPLACE_somewhat
|
| 2764 |
+
$REPLACE_operation
|
| 2765 |
+
$REPLACE_conversational
|
| 2766 |
+
$APPEND_absolutely
|
| 2767 |
+
$APPEND_properly
|
| 2768 |
+
$REPLACE_sites
|
| 2769 |
+
$REPLACE_allergies
|
| 2770 |
+
$REPLACE_salary
|
| 2771 |
+
$REPLACE_rise
|
| 2772 |
+
$REPLACE_entertainment
|
| 2773 |
+
$REPLACE_kitchen
|
| 2774 |
+
$REPLACE_emotional
|
| 2775 |
+
$REPLACE_McDonalds
|
| 2776 |
+
$REPLACE_extra
|
| 2777 |
+
$APPEND_nearby
|
| 2778 |
+
$REPLACE_mention
|
| 2779 |
+
$APPEND_Here
|
| 2780 |
+
$APPEND_nice
|
| 2781 |
+
$APPEND_college
|
| 2782 |
+
$APPEND_Before
|
| 2783 |
+
$APPEND_form
|
| 2784 |
+
$REPLACE_likes
|
| 2785 |
+
$APPEND_turned
|
| 2786 |
+
$REPLACE_rent
|
| 2787 |
+
$REPLACE_tourists
|
| 2788 |
+
$REPLACE_unknown
|
| 2789 |
+
$REPLACE_actors
|
| 2790 |
+
$APPEND_longer
|
| 2791 |
+
$REPLACE_fill
|
| 2792 |
+
$REPLACE_Nobody
|
| 2793 |
+
$REPLACE_Singapore
|
| 2794 |
+
$REPLACE_helping
|
| 2795 |
+
$REPLACE_exercises
|
| 2796 |
+
$APPEND_real
|
| 2797 |
+
$APPEND_located
|
| 2798 |
+
$APPEND_received
|
| 2799 |
+
$APPEND_gets
|
| 2800 |
+
$APPEND_bad
|
| 2801 |
+
$REPLACE_doubt
|
| 2802 |
+
$REPLACE_sweaty
|
| 2803 |
+
$REPLACE_prefecture
|
| 2804 |
+
$REPLACE_audience
|
| 2805 |
+
$REPLACE_sports
|
| 2806 |
+
$REPLACE_minute
|
| 2807 |
+
$REPLACE_product
|
| 2808 |
+
$REPLACE_buying
|
| 2809 |
+
$REPLACE_exact
|
| 2810 |
+
$REPLACE_temporarily
|
| 2811 |
+
$REPLACE_Avatar
|
| 2812 |
+
$REPLACE_Skype
|
| 2813 |
+
$REPLACE_discussion
|
| 2814 |
+
$REPLACE_item
|
| 2815 |
+
$REPLACE_gon
|
| 2816 |
+
$REPLACE_accessories
|
| 2817 |
+
$REPLACE_incredibly
|
| 2818 |
+
$REPLACE_Where
|
| 2819 |
+
$REPLACE_World
|
| 2820 |
+
$REPLACE_advantage
|
| 2821 |
+
$REPLACE_ridiculous
|
| 2822 |
+
$REPLACE_wherever
|
| 2823 |
+
$REPLACE_shook
|
| 2824 |
+
$REPLACE_global
|
| 2825 |
+
$REPLACE_entitled
|
| 2826 |
+
$REPLACE_Working
|
| 2827 |
+
$APPEND_hours
|
| 2828 |
+
$REPLACE_Starbucks
|
| 2829 |
+
$REPLACE_routine
|
| 2830 |
+
$REPLACE_flavored
|
| 2831 |
+
$APPEND_item
|
| 2832 |
+
$REPLACE_techniques
|
| 2833 |
+
$REPLACE_creates
|
| 2834 |
+
$REPLACE_peace
|
| 2835 |
+
$REPLACE_annoyed
|
| 2836 |
+
$REPLACE_rate
|
| 2837 |
+
$REPLACE_September
|
| 2838 |
+
$REPLACE_Russian
|
| 2839 |
+
$REPLACE_assistant
|
| 2840 |
+
$REPLACE_plenty
|
| 2841 |
+
$REPLACE_local
|
| 2842 |
+
$APPEND_store
|
| 2843 |
+
$REPLACE_sooner
|
| 2844 |
+
$REPLACE_overslept
|
| 2845 |
+
$REPLACE_Everybody
|
| 2846 |
+
$REPLACE_selling
|
| 2847 |
+
$REPLACE_negative
|
| 2848 |
+
$REPLACE_setting
|
| 2849 |
+
$APPEND_helps
|
| 2850 |
+
$REPLACE_lecture
|
| 2851 |
+
$APPEND_happen
|
| 2852 |
+
$REPLACE_survive
|
| 2853 |
+
$REPLACE_art
|
| 2854 |
+
$APPEND_certainly
|
| 2855 |
+
$APPEND_fully
|
| 2856 |
+
$APPEND_above
|
| 2857 |
+
$REPLACE_speaks
|
| 2858 |
+
$REPLACE_asking
|
| 2859 |
+
$REPLACE_economical
|
| 2860 |
+
$REPLACE_salaries
|
| 2861 |
+
$APPEND_clearly
|
| 2862 |
+
$REPLACE_mail
|
| 2863 |
+
$REPLACE_holding
|
| 2864 |
+
$REPLACE_organise
|
| 2865 |
+
$REPLACE_efficient
|
| 2866 |
+
$APPEND_name
|
| 2867 |
+
$REPLACE_constantly
|
| 2868 |
+
$REPLACE_overtime
|
| 2869 |
+
$REPLACE_grandma
|
| 2870 |
+
$REPLACE_returning
|
| 2871 |
+
$REPLACE_laziness
|
| 2872 |
+
$REPLACE_importantly
|
| 2873 |
+
$APPEND_true
|
| 2874 |
+
$APPEND_series
|
| 2875 |
+
$REPLACE_converse
|
| 2876 |
+
$APPEND_session
|
| 2877 |
+
$REPLACE_sugar
|
| 2878 |
+
$APPEND_Currently
|
| 2879 |
+
$REPLACE_mentally
|
| 2880 |
+
$APPEND_starts
|
| 2881 |
+
$REPLACE_theater
|
| 2882 |
+
$APPEND_tonight
|
| 2883 |
+
$REPLACE_succeeded
|
| 2884 |
+
$REPLACE_awful
|
| 2885 |
+
$REPLACE_political
|
| 2886 |
+
$APPEND_important
|
| 2887 |
+
$REPLACE_log
|
| 2888 |
+
$REPLACE_awesome
|
| 2889 |
+
$REPLACE_00
|
| 2890 |
+
$APPEND_Did
|
| 2891 |
+
$REPLACE_announcement
|
| 2892 |
+
$REPLACE_addicted
|
| 2893 |
+
$REPLACE_disaster
|
| 2894 |
+
$REPLACE_page
|
| 2895 |
+
$REPLACE_blossom
|
| 2896 |
+
$REPLACE_stars
|
| 2897 |
+
$REPLACE_presentation
|
| 2898 |
+
$REPLACE_Nevertheless
|
| 2899 |
+
$APPEND_talking
|
| 2900 |
+
$APPEND_Instead
|
| 2901 |
+
$APPEND_Chinese
|
| 2902 |
+
$REPLACE_Festival
|
| 2903 |
+
$REPLACE_reasonably
|
| 2904 |
+
$APPEND_someday
|
| 2905 |
+
$REPLACE_expressions
|
| 2906 |
+
$APPEND_Lately
|
| 2907 |
+
$REPLACE_average
|
| 2908 |
+
$APPEND_season
|
| 2909 |
+
$REPLACE_cover
|
| 2910 |
+
$REPLACE_manager
|
| 2911 |
+
$REPLACE_wife
|
| 2912 |
+
$REPLACE_12
|
| 2913 |
+
$REPLACE_possibly
|
| 2914 |
+
$REPLACE_approaching
|
| 2915 |
+
$REPLACE_keeping
|
| 2916 |
+
$REPLACE_motorcycle
|
| 2917 |
+
$REPLACE_happily
|
| 2918 |
+
$APPEND_items
|
| 2919 |
+
$REPLACE_cherry
|
| 2920 |
+
$REPLACE_shall
|
| 2921 |
+
$REPLACE_determined
|
| 2922 |
+
$REPLACE_cheerful
|
| 2923 |
+
$REPLACE_ahead
|
| 2924 |
+
$REPLACE_solution
|
| 2925 |
+
$REPLACE_patients
|
| 2926 |
+
$REPLACE_unforgettable
|
| 2927 |
+
$REPLACE_decreasing
|
| 2928 |
+
$REPLACE_laid
|
| 2929 |
+
$REPLACE_arrange
|
| 2930 |
+
$REPLACE_content
|
| 2931 |
+
$REPLACE_starring
|
| 2932 |
+
$REPLACE_opening
|
| 2933 |
+
$REPLACE_continuing
|
| 2934 |
+
$REPLACE_bloom
|
| 2935 |
+
$REPLACE_concern
|
| 2936 |
+
$APPEND_towards
|
| 2937 |
+
$REPLACE_extreme
|
| 2938 |
+
$APPEND_Will
|
| 2939 |
+
$REPLACE_tests
|
| 2940 |
+
$REPLACE_replace
|
| 2941 |
+
$APPEND_mostly
|
| 2942 |
+
$REPLACE_inform
|
| 2943 |
+
$REPLACE_lying
|
| 2944 |
+
$REPLACE_barely
|
| 2945 |
+
$REPLACE_unpleasant
|
| 2946 |
+
$REPLACE_brand
|
| 2947 |
+
$REPLACE_turning
|
| 2948 |
+
$REPLACE_added
|
| 2949 |
+
$APPEND_age
|
| 2950 |
+
$REPLACE_wide
|
| 2951 |
+
$REPLACE_passing
|
| 2952 |
+
$REPLACE_production
|
| 2953 |
+
$REPLACE_23rd
|
| 2954 |
+
$REPLACE_ramen
|
| 2955 |
+
$REPLACE_occasionally
|
| 2956 |
+
$REPLACE_borrowed
|
| 2957 |
+
$REPLACE_comparison
|
| 2958 |
+
$REPLACE_curry
|
| 2959 |
+
$REPLACE_upcoming
|
| 2960 |
+
$REPLACE_begun
|
| 2961 |
+
$APPEND_mistakes
|
| 2962 |
+
$REPLACE_mouth
|
| 2963 |
+
$REPLACE_scenes
|
| 2964 |
+
$REPLACE_accidentally
|
| 2965 |
+
$REPLACE_gases
|
| 2966 |
+
$REPLACE_blog
|
| 2967 |
+
$REPLACE_Disney
|
| 2968 |
+
$APPEND_straight
|
| 2969 |
+
$REPLACE_topics
|
| 2970 |
+
$REPLACE_register
|
| 2971 |
+
$REPLACE_color
|
| 2972 |
+
$REPLACE_explained
|
| 2973 |
+
$APPEND_shopping
|
| 2974 |
+
$REPLACE_Taiwan
|
| 2975 |
+
$REPLACE_sales
|
| 2976 |
+
$REPLACE_dictionary
|
| 2977 |
+
$REPLACE_inexpensive
|
| 2978 |
+
$APPEND_directly
|
| 2979 |
+
$REPLACE_comfortably
|
| 2980 |
+
$REPLACE_suprised
|
| 2981 |
+
$APPEND_AM
|
| 2982 |
+
$REPLACE_dance
|
| 2983 |
+
$REPLACE_eager
|
| 2984 |
+
$REPLACE_envious
|
| 2985 |
+
$REPLACE_lie
|
| 2986 |
+
$REPLACE_Apart
|
| 2987 |
+
$REPLACE_closed
|
| 2988 |
+
$REPLACE_brother
|
| 2989 |
+
$REPLACE_hopefully
|
| 2990 |
+
$APPEND_caught
|
| 2991 |
+
$REPLACE_background
|
| 2992 |
+
$REPLACE_conditions
|
| 2993 |
+
$REPLACE_attracted
|
| 2994 |
+
$REPLACE_aim
|
| 2995 |
+
$REPLACE_twenty
|
| 2996 |
+
$REPLACE_Each
|
| 2997 |
+
$APPEND_air
|
| 2998 |
+
$REPLACE_technique
|
| 2999 |
+
$REPLACE_umbrella
|
| 3000 |
+
$REPLACE_Buddhist
|
| 3001 |
+
$REPLACE_yen
|
| 3002 |
+
$APPEND_clothes
|
| 3003 |
+
$APPEND_open
|
| 3004 |
+
$REPLACE_originally
|
| 3005 |
+
$APPEND_OK
|
| 3006 |
+
$REPLACE_complex
|
| 3007 |
+
$APPEND_upon
|
| 3008 |
+
$REPLACE_<
|
| 3009 |
+
$REPLACE_showing
|
| 3010 |
+
$REPLACE_weaknesses
|
| 3011 |
+
$REPLACE_OR
|
| 3012 |
+
$REPLACE_potato
|
| 3013 |
+
$APPEND_photo
|
| 3014 |
+
$REPLACE_flavor
|
| 3015 |
+
$REPLACE_Tuesday
|
| 3016 |
+
$REPLACE_organized
|
| 3017 |
+
$REPLACE_preferred
|
| 3018 |
+
$REPLACE_state
|
| 3019 |
+
$APPEND_normally
|
| 3020 |
+
$APPEND_areas
|
| 3021 |
+
$REPLACE_arranged
|
| 3022 |
+
$REPLACE_embarrassing
|
| 3023 |
+
$REPLACE_positively
|
| 3024 |
+
$REPLACE_coworkers
|
| 3025 |
+
$APPEND_host
|
| 3026 |
+
$REPLACE_influenced
|
| 3027 |
+
$REPLACE_respect
|
| 3028 |
+
$REPLACE_separate
|
| 3029 |
+
$REPLACE_comedies
|
| 3030 |
+
$APPEND_listened
|
| 3031 |
+
$REPLACE_report
|
| 3032 |
+
$REPLACE_Using
|
| 3033 |
+
$REPLACE_performing
|
| 3034 |
+
$REPLACE_construction
|
| 3035 |
+
$REPLACE_trees
|
| 3036 |
+
$REPLACE_conversations
|
| 3037 |
+
$REPLACE_western
|
| 3038 |
+
$APPEND_drinking
|
| 3039 |
+
$APPEND_Next
|
| 3040 |
+
$APPEND_points
|
| 3041 |
+
$APPEND_young
|
| 3042 |
+
$REPLACE_provides
|
| 3043 |
+
$REPLACE_motivation
|
| 3044 |
+
$REPLACE_muscle
|
| 3045 |
+
$REPLACE_diet
|
| 3046 |
+
$APPEND_fluently
|
| 3047 |
+
$REPLACE_Such
|
| 3048 |
+
$REPLACE_task
|
| 3049 |
+
$REPLACE_sounded
|
| 3050 |
+
$REPLACE_schools
|
| 3051 |
+
$REPLACE_park
|
| 3052 |
+
$APPEND_various
|
| 3053 |
+
$APPEND_five
|
| 3054 |
+
$REPLACE_unhappy
|
| 3055 |
+
$REPLACE_Due
|
| 3056 |
+
$REPLACE_alright
|
| 3057 |
+
$REPLACE_campus
|
| 3058 |
+
$APPEND_foreign
|
| 3059 |
+
$APPEND_studies
|
| 3060 |
+
$REPLACE_handle
|
| 3061 |
+
$REPLACE_continuous
|
| 3062 |
+
$REPLACE_drug
|
| 3063 |
+
$REPLACE_expenses
|
| 3064 |
+
$REPLACE_aged
|
| 3065 |
+
$REPLACE_surrounded
|
| 3066 |
+
$REPLACE_thus
|
| 3067 |
+
$REPLACE_noise
|
| 3068 |
+
$REPLACE_healthier
|
| 3069 |
+
$REPLACE_potential
|
| 3070 |
+
$REPLACE_Potter
|
| 3071 |
+
$APPEND_self
|
| 3072 |
+
$APPEND_picture
|
| 3073 |
+
$REPLACE_None
|
| 3074 |
+
$REPLACE_sudden
|
| 3075 |
+
$REPLACE_lifestyles
|
| 3076 |
+
$APPEND_given
|
| 3077 |
+
$REPLACE_aspects
|
| 3078 |
+
$REPLACE_specifically
|
| 3079 |
+
$REPLACE_destination
|
| 3080 |
+
$REPLACE_followed
|
| 3081 |
+
$REPLACE_Other
|
| 3082 |
+
$REPLACE_horrible
|
| 3083 |
+
$REPLACE_radiation
|
| 3084 |
+
$REPLACE_essays
|
| 3085 |
+
$REPLACE_apologize
|
| 3086 |
+
$REPLACE_placed
|
| 3087 |
+
$APPEND_future
|
| 3088 |
+
$REPLACE_awkward
|
| 3089 |
+
$REPLACE_thirty
|
| 3090 |
+
$REPLACE_kids
|
| 3091 |
+
$REPLACE_responsibilities
|
| 3092 |
+
$REPLACE_Generally
|
| 3093 |
+
$REPLACE_relatives
|
| 3094 |
+
$APPEND_More
|
| 3095 |
+
$REPLACE_safer
|
| 3096 |
+
$REPLACE_hoping
|
| 3097 |
+
$REPLACE_heroes
|
| 3098 |
+
$REPLACE_psychological
|
| 3099 |
+
$REPLACE_posted
|
| 3100 |
+
$REPLACE_treatment
|
| 3101 |
+
$REPLACE_glasses
|
| 3102 |
+
$REPLACE_souvenirs
|
| 3103 |
+
$REPLACE_entertaining
|
| 3104 |
+
$APPEND_Tomorrow
|
| 3105 |
+
$APPEND_activities
|
| 3106 |
+
$REPLACE_serve
|
| 3107 |
+
$REPLACE_actions
|
| 3108 |
+
$APPEND_teacher
|
| 3109 |
+
$REPLACE_o
|
| 3110 |
+
$REPLACE_forever
|
| 3111 |
+
$REPLACE_colour
|
| 3112 |
+
$APPEND_change
|
| 3113 |
+
$REPLACE_plants
|
| 3114 |
+
$REPLACE_fulfill
|
| 3115 |
+
$REPLACE_animated
|
| 3116 |
+
$REPLACE_textbook
|
| 3117 |
+
$REPLACE_mathematics
|
| 3118 |
+
$REPLACE_figured
|
| 3119 |
+
$APPEND_running
|
| 3120 |
+
$REPLACE_computers
|
| 3121 |
+
$REPLACE_Singaporean
|
| 3122 |
+
$REPLACE_imagination
|
| 3123 |
+
$REPLACE_runny
|
| 3124 |
+
$REPLACE_bill
|
| 3125 |
+
$REPLACE_meals
|
| 3126 |
+
$APPEND_perhaps
|
| 3127 |
+
$REPLACE_stupid
|
| 3128 |
+
$REPLACE_worries
|
| 3129 |
+
$APPEND_bought
|
| 3130 |
+
$APPEND_article
|
| 3131 |
+
$REPLACE_wasted
|
| 3132 |
+
$REPLACE_falling
|
| 3133 |
+
$REPLACE_necessity
|
| 3134 |
+
$APPEND_common
|
| 3135 |
+
$REPLACE_Tokyo
|
| 3136 |
+
$REPLACE_fascinating
|
| 3137 |
+
$REPLACE_Only
|
| 3138 |
+
$REPLACE_tense
|
| 3139 |
+
$APPEND_Ever
|
| 3140 |
+
$REPLACE_behaviour
|
| 3141 |
+
$REPLACE_magazines
|
| 3142 |
+
$REPLACE_cultures
|
| 3143 |
+
$REPLACE_rid
|
| 3144 |
+
$REPLACE_choices
|
| 3145 |
+
$REPLACE_track
|
| 3146 |
+
$REPLACE_complaint
|
| 3147 |
+
$REPLACE_white
|
| 3148 |
+
$REPLACE_approximately
|
| 3149 |
+
$REPLACE_largest
|
| 3150 |
+
$REPLACE_smart
|
| 3151 |
+
$APPEND_finish
|
| 3152 |
+
$REPLACE_acting
|
| 3153 |
+
$REPLACE_foolish
|
| 3154 |
+
$REPLACE_prices
|
| 3155 |
+
$REPLACE_r
|
| 3156 |
+
$REPLACE_swim
|
| 3157 |
+
$REPLACE_review
|
| 3158 |
+
$REPLACE_shameful
|
| 3159 |
+
$APPEND_Anyway
|
| 3160 |
+
$REPLACE_senior
|
| 3161 |
+
$REPLACE_proper
|
| 3162 |
+
$REPLACE_provided
|
| 3163 |
+
$REPLACE_troublesome
|
| 3164 |
+
$APPEND_known
|
| 3165 |
+
$REPLACE_homes
|
| 3166 |
+
$REPLACE_spirit
|
| 3167 |
+
$REPLACE_ga
|
| 3168 |
+
$REPLACE_Michael
|
| 3169 |
+
$APPEND_wish
|
| 3170 |
+
$APPEND_performance
|
| 3171 |
+
$REPLACE_typical
|
| 3172 |
+
$APPEND_Well
|
| 3173 |
+
$REPLACE_previously
|
| 3174 |
+
$REPLACE_fail
|
| 3175 |
+
$APPEND_itself
|
| 3176 |
+
$REPLACE_sung
|
| 3177 |
+
$REPLACE_citizens
|
| 3178 |
+
$REPLACE_rapidly
|
| 3179 |
+
$REPLACE_stadium
|
| 3180 |
+
$APPEND_page
|
| 3181 |
+
$APPEND_unfortunately
|
| 3182 |
+
$REPLACE_surprising
|
| 3183 |
+
$REPLACE_unfamiliar
|
| 3184 |
+
$REPLACE_repair
|
| 3185 |
+
$REPLACE_escape
|
| 3186 |
+
$REPLACE_actor
|
| 3187 |
+
$REPLACE_Almost
|
| 3188 |
+
$REPLACE_shoes
|
| 3189 |
+
$REPLACE_disagree
|
| 3190 |
+
$REPLACE_co
|
| 3191 |
+
$REPLACE_attempt
|
| 3192 |
+
$REPLACE_instance
|
| 3193 |
+
$REPLACE_lasted
|
| 3194 |
+
$APPEND_connect
|
| 3195 |
+
$APPEND_San
|
| 3196 |
+
$REPLACE_hairstyle
|
| 3197 |
+
$REPLACE_internship
|
| 3198 |
+
$REPLACE_Bye
|
| 3199 |
+
$REPLACE_tourist
|
| 3200 |
+
$REPLACE_5th
|
| 3201 |
+
$REPLACE_cousin
|
| 3202 |
+
$REPLACE_beside
|
| 3203 |
+
$REPLACE_facilities
|
| 3204 |
+
$REPLACE_yummy
|
| 3205 |
+
$REPLACE_prove
|
| 3206 |
+
$APPEND_certain
|
| 3207 |
+
$REPLACE_beginners
|
| 3208 |
+
$REPLACE_worn
|
| 3209 |
+
$REPLACE_wont
|
| 3210 |
+
$APPEND_wearing
|
| 3211 |
+
$REPLACE_improves
|
| 3212 |
+
$REPLACE_electronics
|
| 3213 |
+
$REPLACE_realistic
|
| 3214 |
+
$REPLACE_annoying
|
| 3215 |
+
$REPLACE_dreamed
|
| 3216 |
+
$APPEND_results
|
| 3217 |
+
$REPLACE_certainly
|
| 3218 |
+
$REPLACE_classroom
|
| 3219 |
+
$REPLACE_argument
|
| 3220 |
+
$REPLACE_warmth
|
| 3221 |
+
$REPLACE_achieved
|
| 3222 |
+
$APPEND_meaning
|
| 3223 |
+
$REPLACE_photographs
|
| 3224 |
+
$REPLACE_animals
|
| 3225 |
+
$REPLACE_community
|
| 3226 |
+
$REPLACE_interests
|
| 3227 |
+
$REPLACE_medium
|
| 3228 |
+
$REPLACE_beer
|
| 3229 |
+
$REPLACE_engineer
|
| 3230 |
+
$REPLACE_Good
|
| 3231 |
+
$APPEND_looks
|
| 3232 |
+
$REPLACE_beauty
|
| 3233 |
+
$APPEND_speaker
|
| 3234 |
+
$REPLACE_connect
|
| 3235 |
+
$APPEND_driving
|
| 3236 |
+
$APPEND_Have
|
| 3237 |
+
$REPLACE_reminds
|
| 3238 |
+
$REPLACE_apologized
|
| 3239 |
+
$REPLACE_obtain
|
| 3240 |
+
$REPLACE_Probably
|
| 3241 |
+
$REPLACE_strangers
|
| 3242 |
+
$APPEND_bring
|
| 3243 |
+
$REPLACE_smile
|
| 3244 |
+
$REPLACE_exhibition
|
| 3245 |
+
$REPLACE_pot
|
| 3246 |
+
$REPLACE_encounter
|
| 3247 |
+
$APPEND_degrees
|
| 3248 |
+
$REPLACE_lights
|
| 3249 |
+
$APPEND_bus
|
| 3250 |
+
$REPLACE_movement
|
| 3251 |
+
$REPLACE_cancel
|
| 3252 |
+
$REPLACE_y
|
| 3253 |
+
$REPLACE_black
|
| 3254 |
+
$REPLACE_concentration
|
| 3255 |
+
$REPLACE_graduating
|
| 3256 |
+
$REPLACE_usage
|
| 3257 |
+
$REPLACE_handsome
|
| 3258 |
+
$APPEND_ride
|
| 3259 |
+
$REPLACE_degree
|
| 3260 |
+
$APPEND_point
|
| 3261 |
+
$APPEND_conversation
|
| 3262 |
+
$REPLACE_menu
|
| 3263 |
+
$REPLACE_assistance
|
| 3264 |
+
$REPLACE_Summer
|
| 3265 |
+
$APPEND_behind
|
| 3266 |
+
$REPLACE_police
|
| 3267 |
+
$REPLACE_15th
|
| 3268 |
+
$REPLACE_separated
|
| 3269 |
+
$REPLACE_regardless
|
| 3270 |
+
$REPLACE_significant
|
| 3271 |
+
$REPLACE_transfer
|
| 3272 |
+
$REPLACE_religious
|
| 3273 |
+
$REPLACE_tempura
|
| 3274 |
+
$REPLACE_academic
|
| 3275 |
+
$REPLACE_otherwise
|
| 3276 |
+
$REPLACE_useless
|
| 3277 |
+
$REPLACE_celebrities
|
| 3278 |
+
$REPLACE_dislike
|
| 3279 |
+
$REPLACE_11
|
| 3280 |
+
$APPEND_sound
|
| 3281 |
+
$APPEND_^
|
| 3282 |
+
$REPLACE_replaced
|
| 3283 |
+
$REPLACE_sushi
|
| 3284 |
+
$REPLACE_wind
|
| 3285 |
+
$REPLACE_web
|
| 3286 |
+
$REPLACE_Britain
|
| 3287 |
+
$REPLACE_complained
|
| 3288 |
+
$REPLACE_model
|
| 3289 |
+
$REPLACE_de
|
| 3290 |
+
$REPLACE_depends
|
| 3291 |
+
$REPLACE_pm
|
| 3292 |
+
$REPLACE_cafe
|
| 3293 |
+
$REPLACE_congratulate
|
| 3294 |
+
$REPLACE_ending
|
| 3295 |
+
$APPEND_building
|
| 3296 |
+
$REPLACE_presented
|
| 3297 |
+
$REPLACE_shut
|
| 3298 |
+
$APPEND_restaurant
|
| 3299 |
+
$APPEND_March
|
| 3300 |
+
$REPLACE_freedom
|
| 3301 |
+
$APPEND_story
|
| 3302 |
+
$REPLACE_creating
|
| 3303 |
+
$REPLACE_concept
|
| 3304 |
+
$REPLACE_conduct
|
| 3305 |
+
$REPLACE_France
|
| 3306 |
+
$REPLACE_paper
|
| 3307 |
+
$REPLACE_offers
|
| 3308 |
+
$REPLACE_Oh
|
| 3309 |
+
$REPLACE_occured
|
| 3310 |
+
$REPLACE_touched
|
| 3311 |
+
$REPLACE_travelled
|
| 3312 |
+
$APPEND_Thus
|
| 3313 |
+
$REPLACE_sickness
|
| 3314 |
+
$REPLACE_neighbor
|
| 3315 |
+
$REPLACE_paying
|
| 3316 |
+
$REPLACE_national
|
| 3317 |
+
$APPEND_needs
|
| 3318 |
+
$REPLACE_climb
|
| 3319 |
+
$REPLACE_Take
|
| 3320 |
+
$APPEND_Everyone
|
| 3321 |
+
$REPLACE_aftershocks
|
| 3322 |
+
$REPLACE_committed
|
| 3323 |
+
$REPLACE_textbooks
|
| 3324 |
+
$REPLACE_waited
|
| 3325 |
+
$REPLACE_round
|
| 3326 |
+
$REPLACE_Okay
|
| 3327 |
+
$REPLACE_eldest
|
| 3328 |
+
$APPEND_allow
|
| 3329 |
+
$REPLACE_Spanish
|
| 3330 |
+
$REPLACE_Spring
|
| 3331 |
+
$REPLACE_absence
|
| 3332 |
+
$REPLACE_actresses
|
| 3333 |
+
$REPLACE_majority
|
| 3334 |
+
$REPLACE_growth
|
| 3335 |
+
$APPEND_requires
|
| 3336 |
+
$REPLACE_About
|
| 3337 |
+
$REPLACE_intend
|
| 3338 |
+
$APPEND_deep
|
| 3339 |
+
$REPLACE_enjoyment
|
| 3340 |
+
$APPEND_raining
|
| 3341 |
+
$REPLACE_Am
|
| 3342 |
+
$REPLACE_eyes
|
| 3343 |
+
$REPLACE_Afterward
|
| 3344 |
+
$REPLACE_drugs
|
| 3345 |
+
$REPLACE_cram
|
| 3346 |
+
$REPLACE_dancing
|
| 3347 |
+
$APPEND_M
|
| 3348 |
+
$REPLACE_nationalities
|
| 3349 |
+
$REPLACE_throat
|
| 3350 |
+
$APPEND_shows
|
| 3351 |
+
$REPLACE_Facebook
|
| 3352 |
+
$APPEND_TO
|
| 3353 |
+
$REPLACE_brilliant
|
| 3354 |
+
$REPLACE_drop
|
| 3355 |
+
$REPLACE_owner
|
| 3356 |
+
$APPEND_side
|
| 3357 |
+
$REPLACE_struggling
|
| 3358 |
+
$REPLACE_100
|
| 3359 |
+
$REPLACE_surely
|
| 3360 |
+
$REPLACE_devices
|
| 3361 |
+
$APPEND_takes
|
| 3362 |
+
$REPLACE_TO
|
| 3363 |
+
$REPLACE_neighbors
|
| 3364 |
+
$REPLACE_youth
|
| 3365 |
+
$REPLACE_connected
|
| 3366 |
+
$REPLACE_passes
|
| 3367 |
+
$REPLACE_kilometers
|
| 3368 |
+
$APPEND_fun
|
| 3369 |
+
$REPLACE_viewing
|
| 3370 |
+
$REPLACE_behavior
|
| 3371 |
+
$REPLACE_chores
|
| 3372 |
+
$REPLACE_mystery
|
| 3373 |
+
$APPEND_shall
|
| 3374 |
+
$APPEND_taught
|
| 3375 |
+
$REPLACE_display
|
| 3376 |
+
$REPLACE_ensure
|
| 3377 |
+
$APPEND_online
|
| 3378 |
+
$REPLACE_assignment
|
| 3379 |
+
$REPLACE_compare
|
| 3380 |
+
$APPEND_Still
|
| 3381 |
+
$REPLACE_conditioning
|
| 3382 |
+
$REPLACE_suffered
|
| 3383 |
+
$REPLACE_haven't
|
| 3384 |
+
$REPLACE_muscles
|
| 3385 |
+
$APPEND_grammar
|
| 3386 |
+
$APPEND_Two
|
| 3387 |
+
$REPLACE_chemistry
|
| 3388 |
+
$REPLACE_consideration
|
| 3389 |
+
$REPLACE_smoking
|
| 3390 |
+
$REPLACE_Harry
|
| 3391 |
+
$APPEND_seemed
|
| 3392 |
+
$REPLACE_marry
|
| 3393 |
+
$REPLACE_hunting
|
| 3394 |
+
$REPLACE_recommendation
|
| 3395 |
+
$APPEND_previously
|
| 3396 |
+
$REPLACE_dramas
|
| 3397 |
+
$REPLACE_passionate
|
| 3398 |
+
$APPEND_ways
|
| 3399 |
+
$REPLACE_hurts
|
| 3400 |
+
$APPEND_sense
|
| 3401 |
+
$APPEND_drink
|
| 3402 |
+
$REPLACE_refrigerator
|
| 3403 |
+
$REPLACE_organised
|
| 3404 |
+
$REPLACE_cleaning
|
| 3405 |
+
$REPLACE_courage
|
| 3406 |
+
$APPEND_arrived
|
| 3407 |
+
$REPLACE_housework
|
| 3408 |
+
$REPLACE_charge
|
| 3409 |
+
$REPLACE_violin
|
| 3410 |
+
$APPEND_offer
|
| 3411 |
+
$APPEND_water
|
| 3412 |
+
$REPLACE_injuries
|
| 3413 |
+
$REPLACE_perspective
|
| 3414 |
+
$REPLACE_hoped
|
| 3415 |
+
$REPLACE_challenging
|
| 3416 |
+
$REPLACE_THE
|
| 3417 |
+
$APPEND_regarding
|
| 3418 |
+
$APPEND_Their
|
| 3419 |
+
$REPLACE_upload
|
| 3420 |
+
$REPLACE_luxurious
|
| 3421 |
+
$REPLACE_unnecessary
|
| 3422 |
+
$APPEND_harder
|
| 3423 |
+
$APPEND_twice
|
| 3424 |
+
$REPLACE_rules
|
| 3425 |
+
$APPEND_rest
|
| 3426 |
+
$REPLACE_afford
|
| 3427 |
+
$APPEND_says
|
| 3428 |
+
$REPLACE_project
|
| 3429 |
+
$REPLACE_bear
|
| 3430 |
+
$REPLACE_mainly
|
| 3431 |
+
$REPLACE_Yet
|
| 3432 |
+
$REPLACE_diligently
|
| 3433 |
+
$REPLACE_led
|
| 3434 |
+
$REPLACE_architecture
|
| 3435 |
+
$REPLACE_accurate
|
| 3436 |
+
$REPLACE_mindset
|
| 3437 |
+
$REPLACE_fought
|
| 3438 |
+
$REPLACE_mid
|
| 3439 |
+
$REPLACE_vocalist
|
| 3440 |
+
$REPLACE_flexible
|
| 3441 |
+
$APPEND_girl
|
| 3442 |
+
$REPLACE_tiring
|
| 3443 |
+
$REPLACE_broadcast
|
| 3444 |
+
$REPLACE_July
|
| 3445 |
+
$APPEND_version
|
| 3446 |
+
$REPLACE_seven
|
| 3447 |
+
$REPLACE_Nice
|
| 3448 |
+
$REPLACE_alarm
|
| 3449 |
+
$APPEND_dish
|
| 3450 |
+
$REPLACE_jewelry
|
| 3451 |
+
$REPLACE_studing
|
| 3452 |
+
$REPLACE_cuisine
|
| 3453 |
+
$APPEND_According
|
| 3454 |
+
$APPEND_delicious
|
| 3455 |
+
$REPLACE_ladies
|
| 3456 |
+
$REPLACE_hospital
|
| 3457 |
+
$REPLACE_sweating
|
| 3458 |
+
$REPLACE_obviously
|
| 3459 |
+
$APPEND_interested
|
| 3460 |
+
$REPLACE_College
|
| 3461 |
+
$REPLACE_Autumn
|
| 3462 |
+
$REPLACE_Hawaii
|
| 3463 |
+
$REPLACE_scheduled
|
| 3464 |
+
$REPLACE_crying
|
| 3465 |
+
$REPLACE_climbing
|
| 3466 |
+
$APPEND_giving
|
| 3467 |
+
$REPLACE_smoke
|
| 3468 |
+
$APPEND_9
|
| 3469 |
+
$REPLACE_limit
|
| 3470 |
+
$REPLACE_flying
|
| 3471 |
+
$APPEND_knowledge
|
| 3472 |
+
$REPLACE_4th
|
| 3473 |
+
$REPLACE_Francisco
|
| 3474 |
+
$REPLACE_tournament
|
| 3475 |
+
$APPEND_sleep
|
| 3476 |
+
$REPLACE_participants
|
| 3477 |
+
$REPLACE_snacks
|
| 3478 |
+
$REPLACE_energetic
|
| 3479 |
+
$REPLACE_allergic
|
| 3480 |
+
$APPEND_fast
|
| 3481 |
+
$APPEND_score
|
| 3482 |
+
$REPLACE_clearer
|
| 3483 |
+
$APPEND_source
|
| 3484 |
+
$REPLACE_lottery
|
| 3485 |
+
$APPEND_service
|
| 3486 |
+
$REPLACE_acquire
|
| 3487 |
+
$REPLACE_arrival
|
| 3488 |
+
$APPEND_situation
|
| 3489 |
+
$REPLACE_polite
|
| 3490 |
+
$REPLACE_laughter
|
| 3491 |
+
$REPLACE_Thirdly
|
| 3492 |
+
$APPEND_particular
|
| 3493 |
+
$REPLACE_standard
|
| 3494 |
+
$REPLACE_suppose
|
| 3495 |
+
$REPLACE_emails
|
| 3496 |
+
$REPLACE_Disneyland
|
| 3497 |
+
$REPLACE_nine
|
| 3498 |
+
$REPLACE_rising
|
| 3499 |
+
$REPLACE_cartoon
|
| 3500 |
+
$REPLACE_refreshing
|
| 3501 |
+
$REPLACE_factories
|
| 3502 |
+
$REPLACE_20th
|
| 3503 |
+
$APPEND_single
|
| 3504 |
+
$APPEND_sometime
|
| 3505 |
+
$REPLACE_cleaner
|
| 3506 |
+
$APPEND_Such
|
| 3507 |
+
$APPEND_particularly
|
| 3508 |
+
$REPLACE_fruit
|
| 3509 |
+
$REPLACE_beforehand
|
| 3510 |
+
$REPLACE_11th
|
| 3511 |
+
$REPLACE_Halloween
|
| 3512 |
+
$REPLACE_attract
|
| 3513 |
+
$APPEND_forms
|
| 3514 |
+
$APPEND_under
|
| 3515 |
+
$REPLACE_guests
|
| 3516 |
+
$REPLACE_classmate
|
| 3517 |
+
$APPEND_Yours
|
| 3518 |
+
$REPLACE_learners
|
| 3519 |
+
$REPLACE_red
|
| 3520 |
+
$REPLACE_critical
|
| 3521 |
+
$REPLACE_pitiful
|
| 3522 |
+
$REPLACE_groups
|
| 3523 |
+
$REPLACE_grandparents
|
| 3524 |
+
$REPLACE_primary
|
| 3525 |
+
$REPLACE_Both
|
| 3526 |
+
$REPLACE_aside
|
| 3527 |
+
$REPLACE_youngest
|
| 3528 |
+
$REPLACE_practising
|
| 3529 |
+
$APPEND_Am
|
| 3530 |
+
$REPLACE_summary
|
| 3531 |
+
$REPLACE_telephone
|
| 3532 |
+
$APPEND_nowadays
|
| 3533 |
+
$REPLACE_20
|
| 3534 |
+
$REPLACE_tons
|
| 3535 |
+
$REPLACE_Listening
|
| 3536 |
+
$REPLACE_guilt
|
| 3537 |
+
$REPLACE_occurs
|
| 3538 |
+
$REPLACE_Anyways
|
| 3539 |
+
$REPLACE_rush
|
| 3540 |
+
$REPLACE_intermediate
|
| 3541 |
+
$REPLACE_theirs
|
| 3542 |
+
$APPEND_business
|
| 3543 |
+
$REPLACE_neighboring
|
| 3544 |
+
$REPLACE_independence
|
| 3545 |
+
$APPEND_cost
|
| 3546 |
+
$APPEND_country
|
| 3547 |
+
$REPLACE_beef
|
| 3548 |
+
$REPLACE_formal
|
| 3549 |
+
$APPEND_worked
|
| 3550 |
+
$REPLACE_Hence
|
| 3551 |
+
$REPLACE_Mother
|
| 3552 |
+
$REPLACE_picked
|
| 3553 |
+
$REPLACE_star
|
| 3554 |
+
$REPLACE_fishing
|
| 3555 |
+
$REPLACE_planted
|
| 3556 |
+
$REPLACE_fear
|
| 3557 |
+
$APPEND_100
|
| 3558 |
+
$APPEND_onto
|
| 3559 |
+
$REPLACE_choir
|
| 3560 |
+
$REPLACE_spot
|
| 3561 |
+
$REPLACE_correction
|
| 3562 |
+
$REPLACE_suits
|
| 3563 |
+
$REPLACE_Day
|
| 3564 |
+
$REPLACE_supported
|
| 3565 |
+
$REPLACE_comfort
|
| 3566 |
+
$REPLACE_newspapers
|
| 3567 |
+
$REPLACE_friendship
|
| 3568 |
+
$REPLACE_May
|
| 3569 |
+
$REPLACE_freezing
|
| 3570 |
+
$REPLACE_discussed
|
| 3571 |
+
$APPEND_{
|
| 3572 |
+
$APPEND_whom
|
| 3573 |
+
$REPLACE_trust
|
| 3574 |
+
$REPLACE_industries
|
| 3575 |
+
$REPLACE_decisions
|
| 3576 |
+
$APPEND_poor
|
| 3577 |
+
$APPEND_correctly
|
| 3578 |
+
$REPLACE_hundred
|
| 3579 |
+
$REPLACE_recipe
|
| 3580 |
+
$REPLACE_competitive
|
| 3581 |
+
$REPLACE_burden
|
| 3582 |
+
$REPLACE_abandoned
|
| 3583 |
+
$APPEND_walking
|
| 3584 |
+
$REPLACE_individuals
|
| 3585 |
+
$APPEND_travelling
|
| 3586 |
+
$REPLACE_theme
|
| 3587 |
+
$REPLACE_runs
|
| 3588 |
+
$REPLACE_threw
|
| 3589 |
+
$REPLACE_rock
|
| 3590 |
+
$APPEND_thinking
|
| 3591 |
+
$REPLACE_Taking
|
| 3592 |
+
$REPLACE_ideal
|
| 3593 |
+
$REPLACE_practical
|
| 3594 |
+
$APPEND_re
|
| 3595 |
+
$APPEND_station
|
| 3596 |
+
$REPLACE_collect
|
| 3597 |
+
$REPLACE_perhaps
|
| 3598 |
+
$REPLACE_advanced
|
| 3599 |
+
$REPLACE_humans
|
| 3600 |
+
$APPEND_realized
|
| 3601 |
+
$REPLACE_remove
|
| 3602 |
+
$REPLACE_notebook
|
| 3603 |
+
$REPLACE_continuously
|
| 3604 |
+
$REPLACE_beach
|
| 3605 |
+
$REPLACE_ends
|
| 3606 |
+
$REPLACE_secret
|
| 3607 |
+
$REPLACE_skilled
|
| 3608 |
+
$REPLACE_jump
|
| 3609 |
+
$REPLACE_episodes
|
| 3610 |
+
$REPLACE_cup
|
| 3611 |
+
$REPLACE_consists
|
| 3612 |
+
$REPLACE_release
|
| 3613 |
+
$REPLACE_notes
|
| 3614 |
+
$REPLACE_22nd
|
| 3615 |
+
$REPLACE_fallen
|
| 3616 |
+
$APPEND_Which
|
| 3617 |
+
$APPEND_saw
|
| 3618 |
+
$REPLACE_libraries
|
| 3619 |
+
$REPLACE_consecutive
|
| 3620 |
+
$REPLACE_March
|
| 3621 |
+
$REPLACE_closely
|
| 3622 |
+
$REPLACE_century
|
| 3623 |
+
$APPEND_per
|
| 3624 |
+
$REPLACE_circumstances
|
| 3625 |
+
$REPLACE_whoever
|
| 3626 |
+
$REPLACE_rented
|
| 3627 |
+
$REPLACE_aging
|
| 3628 |
+
$APPEND_regularly
|
| 3629 |
+
$REPLACE_cycling
|
| 3630 |
+
$REPLACE_depression
|
| 3631 |
+
$REPLACE_row
|
| 3632 |
+
$APPEND_constantly
|
| 3633 |
+
$APPEND_feelings
|
| 3634 |
+
$REPLACE_Angeles
|
| 3635 |
+
$REPLACE_talented
|
| 3636 |
+
$REPLACE_00am
|
| 3637 |
+
$REPLACE_shower
|
| 3638 |
+
$REPLACE_functions
|
| 3639 |
+
$APPEND_love
|
| 3640 |
+
$APPEND_believe
|
| 3641 |
+
$REPLACE_basis
|
| 3642 |
+
$REPLACE_follows
|
| 3643 |
+
$APPEND_hardly
|
| 3644 |
+
$REPLACE_teenager
|
| 3645 |
+
$REPLACE_diverse
|
| 3646 |
+
$REPLACE_Sir
|
| 3647 |
+
$REPLACE_decrease
|
| 3648 |
+
$REPLACE_goodbye
|
| 3649 |
+
$REPLACE_behave
|
| 3650 |
+
$APPEND_everywhere
|
| 3651 |
+
$REPLACE_users
|
| 3652 |
+
$REPLACE_analysis
|
| 3653 |
+
$REPLACE_translating
|
| 3654 |
+
$REPLACE_relaxation
|
| 3655 |
+
$REPLACE_unexpectedly
|
| 3656 |
+
$REPLACE_Russia
|
| 3657 |
+
$REPLACE_championship
|
| 3658 |
+
$APPEND_lives
|
| 3659 |
+
$REPLACE_hate
|
| 3660 |
+
$APPEND_somehow
|
| 3661 |
+
$REPLACE_joining
|
| 3662 |
+
$APPEND_stop
|
| 3663 |
+
$APPEND_enjoyed
|
| 3664 |
+
$APPEND_cup
|
| 3665 |
+
$REPLACE_flies
|
| 3666 |
+
$REPLACE_Talking
|
| 3667 |
+
$REPLACE_painting
|
| 3668 |
+
$REPLACE_letters
|
| 3669 |
+
$REPLACE_master
|
| 3670 |
+
$REPLACE_stated
|
| 3671 |
+
$REPLACE_aggressive
|
| 3672 |
+
$REPLACE_shy
|
| 3673 |
+
$APPEND_care
|
| 3674 |
+
$APPEND_wear
|
| 3675 |
+
$REPLACE_served
|
| 3676 |
+
$REPLACE_stops
|
| 3677 |
+
$APPEND_house
|
| 3678 |
+
$REPLACE_diligent
|
| 3679 |
+
$REPLACE_IN
|
| 3680 |
+
$REPLACE_deciding
|
| 3681 |
+
$REPLACE_sweets
|
| 3682 |
+
$REPLACE_argued
|
| 3683 |
+
$REPLACE_bookstore
|
| 3684 |
+
$APPEND_pretty
|
| 3685 |
+
$REPLACE_range
|
| 3686 |
+
$REPLACE_vegetable
|
| 3687 |
+
$REPLACE_appreciation
|
| 3688 |
+
$REPLACE_pity
|
| 3689 |
+
$REPLACE_update
|
| 3690 |
+
$REPLACE_More
|
| 3691 |
+
$REPLACE_laughing
|
| 3692 |
+
$REPLACE_economics
|
| 3693 |
+
$REPLACE_cellphone
|
| 3694 |
+
$REPLACE_OK
|
| 3695 |
+
$REPLACE_pregnant
|
| 3696 |
+
$REPLACE_spite
|
| 3697 |
+
$REPLACE_karaoke
|
| 3698 |
+
$REPLACE_tutor
|
| 3699 |
+
$REPLACE_cockroaches
|
| 3700 |
+
$APPEND_Most
|
| 3701 |
+
$REPLACE_additional
|
| 3702 |
+
$APPEND_energy
|
| 3703 |
+
$REPLACE_contain
|
| 3704 |
+
$REPLACE_actual
|
| 3705 |
+
$REPLACE_shining
|
| 3706 |
+
$APPEND_feels
|
| 3707 |
+
$REPLACE_lesser
|
| 3708 |
+
$REPLACE_pages
|
| 3709 |
+
$REPLACE_cartoons
|
| 3710 |
+
$REPLACE_arise
|
| 3711 |
+
$REPLACE_f
|
| 3712 |
+
$REPLACE_luckily
|
| 3713 |
+
$REPLACE_airport
|
| 3714 |
+
$REPLACE_windy
|
| 3715 |
+
$REPLACE_instructor
|
| 3716 |
+
$APPEND_Why
|
| 3717 |
+
$REPLACE_weighed
|
| 3718 |
+
$REPLACE_river
|
| 3719 |
+
$APPEND_frequently
|
| 3720 |
+
$APPEND_method
|
| 3721 |
+
$REPLACE_shrine
|
| 3722 |
+
$APPEND_short
|
| 3723 |
+
$REPLACE_suffer
|
| 3724 |
+
$REPLACE_6th
|
| 3725 |
+
$REPLACE_fight
|
| 3726 |
+
$APPEND_worth
|
| 3727 |
+
$REPLACE_absent
|
| 3728 |
+
$REPLACE_United
|
| 3729 |
+
$REPLACE_chef
|
| 3730 |
+
$REPLACE_anytime
|
| 3731 |
+
$REPLACE_Three
|
| 3732 |
+
$REPLACE_noisy
|
| 3733 |
+
$APPEND_therefore
|
| 3734 |
+
$REPLACE_iPod
|
| 3735 |
+
$APPEND_French
|
| 3736 |
+
$REPLACE_wishes
|
| 3737 |
+
$REPLACE_Yours
|
| 3738 |
+
$APPEND_Being
|
| 3739 |
+
$APPEND_Its
|
| 3740 |
+
$APPEND_field
|
| 3741 |
+
$APPEND_photos
|
| 3742 |
+
$REPLACE_definition
|
| 3743 |
+
$APPEND_gives
|
| 3744 |
+
$REPLACE_scores
|
| 3745 |
+
$APPEND_Having
|
| 3746 |
+
$REPLACE_statement
|
| 3747 |
+
$APPEND_spoken
|
| 3748 |
+
$APPEND_price
|
| 3749 |
+
$REPLACE_cleaned
|
| 3750 |
+
$REPLACE_varied
|
| 3751 |
+
$APPEND_Oh
|
| 3752 |
+
$REPLACE_wash
|
| 3753 |
+
$REPLACE_satisfactory
|
| 3754 |
+
$REPLACE_ceiling
|
| 3755 |
+
$APPEND_including
|
| 3756 |
+
$APPEND_special
|
| 3757 |
+
$APPEND_popular
|
| 3758 |
+
$REPLACE_invention
|
| 3759 |
+
$REPLACE_materials
|
| 3760 |
+
$REPLACE_media
|
| 3761 |
+
$REPLACE_=
|
| 3762 |
+
$REPLACE_dialogue
|
| 3763 |
+
$REPLACE_designed
|
| 3764 |
+
$REPLACE_popularity
|
| 3765 |
+
$REPLACE_York
|
| 3766 |
+
$REPLACE_Getting
|
| 3767 |
+
$APPEND_shown
|
| 3768 |
+
$REPLACE_carrying
|
| 3769 |
+
$REPLACE_00pm
|
| 3770 |
+
$REPLACE_stations
|
| 3771 |
+
$REPLACE_puts
|
| 3772 |
+
$REPLACE_screen
|
| 3773 |
+
$REPLACE_appreciative
|
| 3774 |
+
$REPLACE_cruel
|
| 3775 |
+
$APPEND_main
|
| 3776 |
+
$REPLACE_action
|
| 3777 |
+
$REPLACE_unlucky
|
| 3778 |
+
$REPLACE_God
|
| 3779 |
+
$APPEND_basically
|
| 3780 |
+
$REPLACE_d
|
| 3781 |
+
$REPLACE_climbed
|
| 3782 |
+
$REPLACE_thoroughly
|
| 3783 |
+
$REPLACE_Canada
|
| 3784 |
+
$REPLACE_hesitate
|
| 3785 |
+
$APPEND_developed
|
| 3786 |
+
$APPEND_post
|
| 3787 |
+
$REPLACE_represent
|
| 3788 |
+
$REPLACE_comment
|
| 3789 |
+
$REPLACE_controlled
|
| 3790 |
+
$REPLACE_source
|
| 3791 |
+
$REPLACE_customs
|
| 3792 |
+
$REPLACE_drawn
|
| 3793 |
+
$REPLACE_mature
|
| 3794 |
+
$REPLACE_commute
|
| 3795 |
+
$APPEND_Once
|
| 3796 |
+
$APPEND_letter
|
| 3797 |
+
$REPLACE_attached
|
| 3798 |
+
$REPLACE_gift
|
| 3799 |
+
$REPLACE_nap
|
| 3800 |
+
$APPEND_asked
|
| 3801 |
+
$REPLACE_inspired
|
| 3802 |
+
$APPEND_event
|
| 3803 |
+
$REPLACE_seafood
|
| 3804 |
+
$APPEND_watched
|
| 3805 |
+
$REPLACE_errors
|
| 3806 |
+
$APPEND_passed
|
| 3807 |
+
$APPEND_english
|
| 3808 |
+
$REPLACE_complaining
|
| 3809 |
+
$REPLACE_roommate
|
| 3810 |
+
$REPLACE_Life
|
| 3811 |
+
$REPLACE_mental
|
| 3812 |
+
$REPLACE_grades
|
| 3813 |
+
$APPEND_parts
|
| 3814 |
+
$REPLACE_pronounciation
|
| 3815 |
+
$REPLACE_strengthen
|
| 3816 |
+
$REPLACE_priority
|
| 3817 |
+
$APPEND_abroad
|
| 3818 |
+
$APPEND_ticket
|
| 3819 |
+
$REPLACE_insurance
|
| 3820 |
+
$REPLACE_hesitation
|
| 3821 |
+
$REPLACE_researched
|
| 3822 |
+
$REPLACE_unlike
|
| 3823 |
+
$REPLACE_exercising
|
| 3824 |
+
$REPLACE_exchanged
|
| 3825 |
+
$REPLACE_knows
|
| 3826 |
+
$REPLACE_founded
|
| 3827 |
+
$REPLACE_messy
|
| 3828 |
+
$REPLACE_dying
|
| 3829 |
+
$APPEND_plans
|
| 3830 |
+
$APPEND_match
|
| 3831 |
+
$REPLACE_Fourth
|
| 3832 |
+
$REPLACE_answers
|
| 3833 |
+
$REPLACE_assignments
|
| 3834 |
+
$REPLACE_Whether
|
| 3835 |
+
$REPLACE_elder
|
| 3836 |
+
$REPLACE_gas
|
| 3837 |
+
$REPLACE_heading
|
| 3838 |
+
$REPLACE_laws
|
| 3839 |
+
$REPLACE_kindly
|
| 3840 |
+
$REPLACE_wine
|
| 3841 |
+
$REPLACE_household
|
| 3842 |
+
$REPLACE_dining
|
| 3843 |
+
$REPLACE_sensitive
|
| 3844 |
+
$REPLACE_wet
|
| 3845 |
+
$REPLACE_Personally
|
| 3846 |
+
$APPEND_middle
|
| 3847 |
+
$REPLACE_busier
|
| 3848 |
+
$REPLACE_dirty
|
| 3849 |
+
$REPLACE_religion
|
| 3850 |
+
$REPLACE_facing
|
| 3851 |
+
$APPEND_totally
|
| 3852 |
+
$REPLACE_repeatedly
|
| 3853 |
+
$REPLACE_tries
|
| 3854 |
+
$REPLACE_organising
|
| 3855 |
+
$REPLACE_operating
|
| 3856 |
+
$REPLACE_ex
|
| 3857 |
+
$APPEND_languages
|
| 3858 |
+
$REPLACE_services
|
| 3859 |
+
$REPLACE_remaining
|
| 3860 |
+
$REPLACE_killed
|
| 3861 |
+
$REPLACE_fair
|
| 3862 |
+
$REPLACE_bike
|
| 3863 |
+
$REPLACE_'t
|
| 3864 |
+
$APPEND_titled
|
| 3865 |
+
$REPLACE_exception
|
| 3866 |
+
$APPEND_carefully
|
| 3867 |
+
$REPLACE_salon
|
| 3868 |
+
$REPLACE_translated
|
| 3869 |
+
$REPLACE_welcome
|
| 3870 |
+
$REPLACE_gratitude
|
| 3871 |
+
$REPLACE_Watching
|
| 3872 |
+
$REPLACE_adults
|
| 3873 |
+
$APPEND_large
|
| 3874 |
+
$REPLACE_untill
|
| 3875 |
+
$REPLACE_coach
|
| 3876 |
+
$REPLACE_mountains
|
| 3877 |
+
$REPLACE_sandwich
|
| 3878 |
+
$REPLACE_examples
|
| 3879 |
+
$APPEND_gone
|
| 3880 |
+
$REPLACE_multiple
|
| 3881 |
+
$APPEND_meant
|
| 3882 |
+
$REPLACE_delivered
|
| 3883 |
+
$REPLACE_entering
|
| 3884 |
+
$APPEND_Hello
|
| 3885 |
+
$REPLACE_option
|
| 3886 |
+
$REPLACE_cigarette
|
| 3887 |
+
$REPLACE_acted
|
| 3888 |
+
$REPLACE_bathroom
|
| 3889 |
+
$REPLACE_accustomed
|
| 3890 |
+
$REPLACE_literature
|
| 3891 |
+
$REPLACE_bottom
|
| 3892 |
+
$APPEND_course
|
| 3893 |
+
$APPEND_choose
|
| 3894 |
+
$REPLACE_resume
|
| 3895 |
+
$APPEND_web
|
| 3896 |
+
$REPLACE_aloud
|
| 3897 |
+
$REPLACE_material
|
| 3898 |
+
$REPLACE_struggle
|
| 3899 |
+
$REPLACE_trains
|
| 3900 |
+
$REPLACE_dog
|
| 3901 |
+
$APPEND_Both
|
| 3902 |
+
$REPLACE_leisure
|
| 3903 |
+
$REPLACE_climate
|
| 3904 |
+
$REPLACE_japanese
|
| 3905 |
+
$REPLACE_reduced
|
| 3906 |
+
$APPEND_break
|
| 3907 |
+
$APPEND_grow
|
| 3908 |
+
$REPLACE_Thinking
|
| 3909 |
+
$REPLACE_dessert
|
| 3910 |
+
$REPLACE_Yeah
|
| 3911 |
+
$REPLACE_salt
|
| 3912 |
+
$REPLACE_rare
|
| 3913 |
+
$REPLACE_fairly
|
| 3914 |
+
$REPLACE_knowing
|
| 3915 |
+
$REPLACE_varieties
|
| 3916 |
+
$APPEND_festival
|
| 3917 |
+
$REPLACE_kitten
|
| 3918 |
+
$APPEND_changes
|
| 3919 |
+
$REPLACE_Introduction
|
| 3920 |
+
$REPLACE_viruses
|
| 3921 |
+
$APPEND_gotten
|
| 3922 |
+
$REPLACE_h
|
| 3923 |
+
$REPLACE_experiencing
|
| 3924 |
+
$APPEND_rain
|
| 3925 |
+
$APPEND_weight
|
| 3926 |
+
$REPLACE_brown
|
| 3927 |
+
$REPLACE_Everyday
|
| 3928 |
+
$APPEND_Tokyo
|
| 3929 |
+
$REPLACE_split
|
| 3930 |
+
$REPLACE_section
|
| 3931 |
+
$APPEND_dinner
|
| 3932 |
+
$REPLACE_Making
|
| 3933 |
+
$REPLACE_courses
|
| 3934 |
+
$REPLACE_remains
|
| 3935 |
+
$REPLACE_Dragon
|
| 3936 |
+
$REPLACE_soft
|
| 3937 |
+
$REPLACE_independent
|
| 3938 |
+
$REPLACE_conducted
|
| 3939 |
+
$APPEND_mode
|
| 3940 |
+
$APPEND_tickets
|
| 3941 |
+
$APPEND_leave
|
| 3942 |
+
$APPEND_culture
|
| 3943 |
+
$REPLACE_Iam
|
| 3944 |
+
$REPLACE_joy
|
| 3945 |
+
$REPLACE_violent
|
| 3946 |
+
$REPLACE_leaf
|
| 3947 |
+
$REPLACE_fortune
|
| 3948 |
+
$APPEND_reasons
|
| 3949 |
+
$REPLACE_Fukushima
|
| 3950 |
+
$APPEND_thus
|
| 3951 |
+
$REPLACE_boss
|
| 3952 |
+
$REPLACE_player
|
| 3953 |
+
$REPLACE_closest
|
| 3954 |
+
$REPLACE_lies
|
| 3955 |
+
$APPEND_consists
|
| 3956 |
+
$REPLACE_impolite
|
| 3957 |
+
$REPLACE_unpredictable
|
| 3958 |
+
$REPLACE_shared
|
| 3959 |
+
$REPLACE_7th
|
| 3960 |
+
$APPEND_Up
|
| 3961 |
+
$REPLACE_step
|
| 3962 |
+
$APPEND_football
|
| 3963 |
+
$REPLACE_central
|
| 3964 |
+
$REPLACE_symptoms
|
| 3965 |
+
$REPLACE_funds
|
| 3966 |
+
$REPLACE_resolve
|
| 3967 |
+
$REPLACE_Technology
|
| 3968 |
+
$REPLACE_solutions
|
| 3969 |
+
$REPLACE_adult
|
| 3970 |
+
$REPLACE_military
|
| 3971 |
+
$REPLACE_supermarkets
|
| 3972 |
+
$APPEND_sites
|
| 3973 |
+
$REPLACE_levels
|
| 3974 |
+
$REPLACE_broad
|
| 3975 |
+
$REPLACE_smiling
|
| 3976 |
+
$REPLACE_expecting
|
| 3977 |
+
$REPLACE_shorter
|
| 3978 |
+
$APPEND_Like
|
| 3979 |
+
$REPLACE_gloomy
|
| 3980 |
+
$REPLACE_weekdays
|
| 3981 |
+
$REPLACE_blew
|
| 3982 |
+
$REPLACE_determine
|
| 3983 |
+
$REPLACE_discount
|
| 3984 |
+
$APPEND_attend
|
| 3985 |
+
$REPLACE_treated
|
| 3986 |
+
$REPLACE_length
|
| 3987 |
+
$REPLACE_raw
|
| 3988 |
+
$REPLACE_promote
|
| 3989 |
+
$REPLACE_court
|
| 3990 |
+
$REPLACE_commercial
|
| 3991 |
+
$REPLACE_expectations
|
| 3992 |
+
$APPEND_exercise
|
| 3993 |
+
$REPLACE_tickets
|
| 3994 |
+
$REPLACE_status
|
| 3995 |
+
$REPLACE_retirement
|
| 3996 |
+
$REPLACE_crowd
|
| 3997 |
+
$REPLACE_requested
|
| 3998 |
+
$REPLACE_South
|
| 3999 |
+
$APPEND_corrected
|
| 4000 |
+
$REPLACE_aunt
|
| 4001 |
+
$REPLACE_Traveling
|
| 4002 |
+
$REPLACE_region
|
| 4003 |
+
$REPLACE_pulled
|
| 4004 |
+
$APPEND_14
|
| 4005 |
+
$REPLACE_impatient
|
| 4006 |
+
$REPLACE_roads
|
| 4007 |
+
$REPLACE_value
|
| 4008 |
+
$REPLACE_existence
|
| 4009 |
+
$REPLACE_applications
|
| 4010 |
+
$REPLACE_boiled
|
| 4011 |
+
$REPLACE_warming
|
| 4012 |
+
$REPLACE_15
|
| 4013 |
+
$REPLACE_Iwas
|
| 4014 |
+
$REPLACE_accomplish
|
| 4015 |
+
$APPEND_Sounds
|
| 4016 |
+
$APPEND_send
|
| 4017 |
+
$APPEND_programs
|
| 4018 |
+
$REPLACE_costume
|
| 4019 |
+
$APPEND_1st
|
| 4020 |
+
$REPLACE_ancient
|
| 4021 |
+
$REPLACE_physics
|
| 4022 |
+
$REPLACE_record
|
| 4023 |
+
$REPLACE_published
|
| 4024 |
+
$REPLACE_cross
|
| 4025 |
+
$REPLACE_harmful
|
| 4026 |
+
$REPLACE_description
|
| 4027 |
+
$APPEND_wrote
|
| 4028 |
+
$APPEND_pay
|
| 4029 |
+
$REPLACE_fond
|
| 4030 |
+
$APPEND_color
|
| 4031 |
+
$REPLACE_asks
|
| 4032 |
+
$APPEND_stuff
|
| 4033 |
+
$REPLACE_specially
|
| 4034 |
+
$REPLACE_uneasy
|
| 4035 |
+
$APPEND_riding
|
| 4036 |
+
$REPLACE_inthe
|
| 4037 |
+
$REPLACE_nose
|
| 4038 |
+
$REPLACE_scientific
|
| 4039 |
+
$REPLACE_Among
|
| 4040 |
+
$REPLACE_danger
|
| 4041 |
+
$REPLACE_commit
|
| 4042 |
+
$REPLACE_Particularly
|
| 4043 |
+
$REPLACE_troubles
|
| 4044 |
+
$REPLACE_button
|
| 4045 |
+
$REPLACE_delayed
|
| 4046 |
+
$REPLACE_Diego
|
| 4047 |
+
$REPLACE_daytime
|
| 4048 |
+
$REPLACE_phenomenon
|
| 4049 |
+
$APPEND_following
|
| 4050 |
+
$REPLACE_Consequently
|
| 4051 |
+
$REPLACE_saving
|
| 4052 |
+
$REPLACE_souvenir
|
| 4053 |
+
$REPLACE_missing
|
| 4054 |
+
$REPLACE_unless
|
| 4055 |
+
$APPEND_office
|
| 4056 |
+
$REPLACE_anniversary
|
| 4057 |
+
$REPLACE_anger
|
| 4058 |
+
$APPEND_himself
|
| 4059 |
+
$APPEND_happening
|
| 4060 |
+
$REPLACE_cheer
|
| 4061 |
+
$REPLACE_animal
|
| 4062 |
+
$APPEND_subject
|
| 4063 |
+
$REPLACE_nicer
|
| 4064 |
+
$REPLACE_sells
|
| 4065 |
+
$REPLACE_lenses
|
| 4066 |
+
$REPLACE_OF
|
| 4067 |
+
$REPLACE_possibilities
|
| 4068 |
+
$REPLACE_efforts
|
| 4069 |
+
$REPLACE_Years
|
| 4070 |
+
$REPLACE_merchandise
|
| 4071 |
+
$REPLACE_subsidies
|
| 4072 |
+
$REPLACE_forms
|
| 4073 |
+
$REPLACE_hotel
|
| 4074 |
+
$APPEND_non
|
| 4075 |
+
$REPLACE_appetite
|
| 4076 |
+
$REPLACE_sport
|
| 4077 |
+
$REPLACE_expand
|
| 4078 |
+
$REPLACE_rhythm
|
| 4079 |
+
$APPEND_Another
|
| 4080 |
+
$REPLACE_Language
|
| 4081 |
+
$APPEND_Each
|
| 4082 |
+
$REPLACE_window
|
| 4083 |
+
$REPLACE_increases
|
| 4084 |
+
$REPLACE_states
|
| 4085 |
+
$REPLACE_excitement
|
| 4086 |
+
$REPLACE_promise
|
| 4087 |
+
$APPEND_seen
|
| 4088 |
+
$REPLACE_luggage
|
| 4089 |
+
$APPEND_generally
|
| 4090 |
+
$REPLACE_frustrating
|
| 4091 |
+
$REPLACE_colors
|
| 4092 |
+
$REPLACE_mosquitoes
|
| 4093 |
+
$REPLACE_seats
|
| 4094 |
+
$REPLACE_woken
|
| 4095 |
+
$REPLACE_switched
|
| 4096 |
+
$REPLACE_grammatically
|
| 4097 |
+
$REPLACE_ON
|
| 4098 |
+
$REPLACE_kindness
|
| 4099 |
+
$REPLACE_thieves
|
| 4100 |
+
$REPLACE_spoiled
|
| 4101 |
+
$REPLACE_States
|
| 4102 |
+
$REPLACE_hamburgers
|
| 4103 |
+
$APPEND_nearly
|
| 4104 |
+
$REPLACE_situated
|
| 4105 |
+
$REPLACE_foods
|
| 4106 |
+
$REPLACE_collecting
|
| 4107 |
+
$REPLACE_unfortunate
|
| 4108 |
+
$REPLACE_camera
|
| 4109 |
+
$REPLACE_dramatic
|
| 4110 |
+
$REPLACE_noodle
|
| 4111 |
+
$APPEND_human
|
| 4112 |
+
$REPLACE_re
|
| 4113 |
+
$REPLACE_humidity
|
| 4114 |
+
$APPEND_strongly
|
| 4115 |
+
$REPLACE_kimchi
|
| 4116 |
+
$APPEND_difference
|
| 4117 |
+
$REPLACE_artists
|
| 4118 |
+
$REPLACE_medical
|
| 4119 |
+
$REPLACE_incredible
|
| 4120 |
+
$APPEND_helping
|
| 4121 |
+
$APPEND_ahead
|
| 4122 |
+
$REPLACE_lines
|
| 4123 |
+
$REPLACE_thinks
|
| 4124 |
+
$REPLACE_thousand
|
| 4125 |
+
$REPLACE_sixth
|
| 4126 |
+
$REPLACE_exposed
|
| 4127 |
+
$REPLACE_colours
|
| 4128 |
+
$REPLACE_widely
|
| 4129 |
+
$APPEND_nuclear
|
| 4130 |
+
$REPLACE_worldwide
|
| 4131 |
+
$REPLACE_comprehension
|
| 4132 |
+
$APPEND_hair
|
| 4133 |
+
$REPLACE_halfway
|
| 4134 |
+
$APPEND_cause
|
| 4135 |
+
$REPLACE_cast
|
| 4136 |
+
$APPEND_coffee
|
| 4137 |
+
$REPLACE_attractions
|
| 4138 |
+
$REPLACE_beautifully
|
| 4139 |
+
$REPLACE_handwritten
|
| 4140 |
+
$APPEND_band
|
| 4141 |
+
$APPEND_improving
|
| 4142 |
+
$APPEND_40
|
| 4143 |
+
$REPLACE_shops
|
| 4144 |
+
$REPLACE_basically
|
| 4145 |
+
$APPEND_studied
|
| 4146 |
+
$REPLACE_manufacturer
|
| 4147 |
+
$REPLACE_Western
|
| 4148 |
+
$APPEND_throughout
|
| 4149 |
+
$REPLACE_identify
|
| 4150 |
+
$APPEND_Would
|
| 4151 |
+
$REPLACE_Switzerland
|
| 4152 |
+
$APPEND_everybody
|
| 4153 |
+
$APPEND_grade
|
| 4154 |
+
$REPLACE_farewell
|
| 4155 |
+
$REPLACE_romantic
|
| 4156 |
+
$REPLACE_Celsius
|
| 4157 |
+
$REPLACE_bread
|
| 4158 |
+
$APPEND_favorite
|
| 4159 |
+
$APPEND_Despite
|
| 4160 |
+
$REPLACE_downloaded
|
| 4161 |
+
$REPLACE_balance
|
| 4162 |
+
$APPEND_carry
|
| 4163 |
+
$REPLACE_cure
|
| 4164 |
+
$REPLACE_programmer
|
| 4165 |
+
$APPEND_considered
|
| 4166 |
+
$APPEND_slowly
|
| 4167 |
+
$REPLACE_discovery
|
| 4168 |
+
$APPEND_stopped
|
| 4169 |
+
$REPLACE_standing
|
| 4170 |
+
$REPLACE_earned
|
| 4171 |
+
$REPLACE_skating
|
| 4172 |
+
$REPLACE_detail
|
| 4173 |
+
$REPLACE_apology
|
| 4174 |
+
$REPLACE_writer
|
| 4175 |
+
$REPLACE_highway
|
| 4176 |
+
$REPLACE_Goodbye
|
| 4177 |
+
$REPLACE_quote
|
| 4178 |
+
$REPLACE_maintenance
|
| 4179 |
+
$APPEND_taste
|
| 4180 |
+
$REPLACE_package
|
| 4181 |
+
$REPLACE_responded
|
| 4182 |
+
$REPLACE_criticize
|
| 4183 |
+
$APPEND_deeply
|
| 4184 |
+
$REPLACE_jogging
|
| 4185 |
+
$APPEND_waiting
|
| 4186 |
+
$REPLACE_fatter
|
| 4187 |
+
$REPLACE_cycle
|
| 4188 |
+
$APPEND_Only
|
| 4189 |
+
$REPLACE_afterward
|
| 4190 |
+
$REPLACE_specialty
|
| 4191 |
+
$REPLACE_goodness
|
| 4192 |
+
$REPLACE_groceries
|
| 4193 |
+
$APPEND_staff
|
| 4194 |
+
$REPLACE_somehow
|
| 4195 |
+
$APPEND_Moreover
|
| 4196 |
+
$APPEND_training
|
| 4197 |
+
$REPLACE_clever
|
| 4198 |
+
$REPLACE_camp
|
| 4199 |
+
$APPEND_traveling
|
| 4200 |
+
$APPEND_minutes
|
| 4201 |
+
$REPLACE_sandwiches
|
| 4202 |
+
$APPEND_run
|
| 4203 |
+
$REPLACE_options
|
| 4204 |
+
$REPLACE_calories
|
| 4205 |
+
$REPLACE_branch
|
| 4206 |
+
$REPLACE_barbecue
|
| 4207 |
+
$APPEND_entrance
|
| 4208 |
+
$REPLACE_noodles
|
| 4209 |
+
$APPEND_products
|
| 4210 |
+
$APPEND_helped
|
| 4211 |
+
$REPLACE_newly
|
| 4212 |
+
$APPEND_drank
|
| 4213 |
+
$REPLACE_precise
|
| 4214 |
+
$REPLACE_increasingly
|
| 4215 |
+
$APPEND_Dear
|
| 4216 |
+
$REPLACE_novels
|
| 4217 |
+
$REPLACE_mix
|
| 4218 |
+
$REPLACE_budget
|
| 4219 |
+
$REPLACE_petrol
|
| 4220 |
+
$REPLACE_trial
|
| 4221 |
+
$APPEND_Perhaps
|
| 4222 |
+
$REPLACE_occasions
|
| 4223 |
+
$APPEND_Actually
|
| 4224 |
+
$REPLACE_eastern
|
| 4225 |
+
$REPLACE_sights
|
| 4226 |
+
$REPLACE_industrial
|
| 4227 |
+
$APPEND_result
|
| 4228 |
+
$REPLACE_generally
|
| 4229 |
+
$REPLACE_Canadian
|
| 4230 |
+
$REPLACE_Surprisingly
|
| 4231 |
+
$APPEND_strong
|
| 4232 |
+
$REPLACE_memorizing
|
| 4233 |
+
$REPLACE_irritated
|
| 4234 |
+
$REPLACE_implemented
|
| 4235 |
+
$REPLACE_Welcome
|
| 4236 |
+
$REPLACE_coast
|
| 4237 |
+
$REPLACE_signs
|
| 4238 |
+
$REPLACE_leading
|
| 4239 |
+
$APPEND_PM
|
| 4240 |
+
$APPEND_access
|
| 4241 |
+
$REPLACE_fat
|
| 4242 |
+
$REPLACE_breeze
|
| 4243 |
+
$REPLACE_India
|
| 4244 |
+
$APPEND_slept
|
| 4245 |
+
$REPLACE_weigh
|
| 4246 |
+
$REPLACE_commonly
|
| 4247 |
+
$REPLACE_supervisor
|
| 4248 |
+
$REPLACE_tomato
|
| 4249 |
+
$REPLACE_agency
|
| 4250 |
+
$APPEND_till
|
| 4251 |
+
$REPLACE_couldn
|
| 4252 |
+
$REPLACE_strangely
|
| 4253 |
+
$APPEND_stayed
|
| 4254 |
+
$REPLACE_ni
|
| 4255 |
+
$APPEND_exams
|
| 4256 |
+
$REPLACE_School
|
| 4257 |
+
$REPLACE_blue
|
| 4258 |
+
$APPEND_allowed
|
| 4259 |
+
$REPLACE_30th
|
| 4260 |
+
$REPLACE_kittens
|
| 4261 |
+
$REPLACE_typing
|
| 4262 |
+
$REPLACE_headed
|
| 4263 |
+
$APPEND_present
|
| 4264 |
+
$REPLACE_Reading
|
| 4265 |
+
$REPLACE_injury
|
| 4266 |
+
$REPLACE_Dear
|
| 4267 |
+
$REPLACE_PM
|
| 4268 |
+
$REPLACE_minor
|
| 4269 |
+
$REPLACE_drinks
|
| 4270 |
+
$REPLACE_enthusiasm
|
| 4271 |
+
$REPLACE_dilemma
|
| 4272 |
+
$REPLACE_income
|
| 4273 |
+
$REPLACE_sadness
|
| 4274 |
+
$REPLACE_weaker
|
| 4275 |
+
$REPLACE_Thanksgiving
|
| 4276 |
+
$REPLACE_documents
|
| 4277 |
+
$REPLACE_fake
|
| 4278 |
+
$REPLACE_boy
|
| 4279 |
+
$REPLACE_regards
|
| 4280 |
+
$APPEND_Finally
|
| 4281 |
+
$REPLACE_obstacle
|
| 4282 |
+
$REPLACE_batteries
|
| 4283 |
+
$APPEND_talked
|
| 4284 |
+
$APPEND_becomes
|
| 4285 |
+
$REPLACE_numerous
|
| 4286 |
+
$REPLACE_cheese
|
| 4287 |
+
$REPLACE_judge
|
| 4288 |
+
$APPEND_busy
|
| 4289 |
+
$APPEND_reach
|
| 4290 |
+
$APPEND_Fuji
|
| 4291 |
+
$REPLACE_intelligent
|
| 4292 |
+
$REPLACE_reception
|
| 4293 |
+
$REPLACE_Chinatown
|
| 4294 |
+
$REPLACE_repeat
|
| 4295 |
+
$APPEND_June
|
| 4296 |
+
$REPLACE_reported
|
| 4297 |
+
$APPEND_required
|
| 4298 |
+
$REPLACE_cases
|
| 4299 |
+
$REPLACE_matters
|
| 4300 |
+
$REPLACE_prepositions
|
| 4301 |
+
$REPLACE_accidents
|
| 4302 |
+
$REPLACE_fields
|
| 4303 |
+
$APPEND_ask
|
| 4304 |
+
$APPEND_sad
|
| 4305 |
+
$REPLACE_selected
|
| 4306 |
+
$REPLACE_skipped
|
| 4307 |
+
$REPLACE_freshmen
|
| 4308 |
+
$REPLACE_mode
|
| 4309 |
+
$REPLACE_calendar
|
| 4310 |
+
$REPLACE_luxury
|
| 4311 |
+
$REPLACE_summertime
|
| 4312 |
+
$REPLACE_device
|
| 4313 |
+
$APPEND_lesson
|
| 4314 |
+
$APPEND_surely
|
| 4315 |
+
$REPLACE_loved
|
| 4316 |
+
$REPLACE_reflect
|
| 4317 |
+
$REPLACE_shoulder
|
| 4318 |
+
$REPLACE_muscular
|
| 4319 |
+
$APPEND_plenty
|
| 4320 |
+
$REPLACE_Indian
|
| 4321 |
+
$REPLACE_pork
|
| 4322 |
+
$REPLACE_double
|
| 4323 |
+
$REPLACE_loneliness
|
| 4324 |
+
$REPLACE_economies
|
| 4325 |
+
$REPLACE_meaningful
|
| 4326 |
+
$REPLACE_cooperate
|
| 4327 |
+
$REPLACE_land
|
| 4328 |
+
$APPEND_report
|
| 4329 |
+
$REPLACE_block
|
| 4330 |
+
$REPLACE_cheapest
|
| 4331 |
+
$REPLACE_mirror
|
| 4332 |
+
$REPLACE_wealthy
|
| 4333 |
+
$APPEND_application
|
| 4334 |
+
$REPLACE_quarter
|
| 4335 |
+
$REPLACE_babies
|
| 4336 |
+
$REPLACE_risk
|
| 4337 |
+
$REPLACE_discussions
|
| 4338 |
+
$REPLACE_lightning
|
| 4339 |
+
$REPLACE_briefly
|
| 4340 |
+
$REPLACE_congratulated
|
| 4341 |
+
$REPLACE_breathing
|
| 4342 |
+
$REPLACE_eagerly
|
| 4343 |
+
$REPLACE_resolved
|
| 4344 |
+
$APPEND_staying
|
| 4345 |
+
$APPEND_history
|
| 4346 |
+
$APPEND_phones
|
| 4347 |
+
$REPLACE_involving
|
| 4348 |
+
$REPLACE_enthusiastic
|
| 4349 |
+
$REPLACE_cookies
|
| 4350 |
+
$REPLACE_frightened
|
| 4351 |
+
$REPLACE_entirely
|
| 4352 |
+
$REPLACE_enormous
|
| 4353 |
+
$APPEND_aspects
|
| 4354 |
+
$REPLACE_stable
|
| 4355 |
+
$APPEND_section
|
| 4356 |
+
$APPEND_Thanks
|
| 4357 |
+
$APPEND_women
|
| 4358 |
+
$REPLACE_phase
|
| 4359 |
+
$REPLACE_16th
|
| 4360 |
+
$REPLACE_spicy
|
| 4361 |
+
$APPEND_produced
|
| 4362 |
+
$REPLACE_street
|
| 4363 |
+
$REPLACE_ignore
|
| 4364 |
+
$REPLACE_designer
|
| 4365 |
+
$APPEND_club
|
| 4366 |
+
$REPLACE_mum
|
| 4367 |
+
$REPLACE_sincere
|
| 4368 |
+
$REPLACE_offensive
|
| 4369 |
+
$REPLACE_memorized
|
| 4370 |
+
$APPEND_question
|
| 4371 |
+
$REPLACE_wa
|
| 4372 |
+
$REPLACE_garbage
|
| 4373 |
+
$REPLACE_Playing
|
| 4374 |
+
$REPLACE_castle
|
| 4375 |
+
$REPLACE_swam
|
| 4376 |
+
$REPLACE_leader
|
| 4377 |
+
$REPLACE_earthquakes
|
| 4378 |
+
$REPLACE_displayed
|
| 4379 |
+
$REPLACE_marathon
|
| 4380 |
+
$APPEND_songs
|
| 4381 |
+
$REPLACE_See
|
| 4382 |
+
$REPLACE_burn
|
| 4383 |
+
$APPEND_happily
|
| 4384 |
+
$REPLACE_salesman
|
| 4385 |
+
$REPLACE_unhealthy
|
| 4386 |
+
$REPLACE_base
|
| 4387 |
+
$REPLACE_crossing
|
| 4388 |
+
$REPLACE_Honestly
|
| 4389 |
+
$REPLACE_machines
|
| 4390 |
+
$REPLACE_freshman
|
| 4391 |
+
$REPLACE_dry
|
| 4392 |
+
$APPEND_exact
|
| 4393 |
+
$APPEND_January
|
| 4394 |
+
$APPEND_terms
|
| 4395 |
+
$REPLACE_happiest
|
| 4396 |
+
$APPEND_tastes
|
| 4397 |
+
$APPEND_design
|
| 4398 |
+
$REPLACE_champion
|
| 4399 |
+
$REPLACE_Diary
|
| 4400 |
+
$REPLACE_expressing
|
| 4401 |
+
$REPLACE_hardest
|
| 4402 |
+
$REPLACE_installed
|
| 4403 |
+
$REPLACE_Go
|
| 4404 |
+
$REPLACE_dollar
|
| 4405 |
+
$REPLACE_wooden
|
| 4406 |
+
$REPLACE_contrary
|
| 4407 |
+
$REPLACE_refers
|
| 4408 |
+
$REPLACE_employment
|
| 4409 |
+
$REPLACE_removed
|
| 4410 |
+
$REPLACE_opposing
|
| 4411 |
+
$REPLACE_actress
|
| 4412 |
+
$REPLACE_Ever
|
| 4413 |
+
$APPEND_beginning
|
| 4414 |
+
$REPLACE_approach
|
| 4415 |
+
$REPLACE_guide
|
| 4416 |
+
$REPLACE_blooming
|
| 4417 |
+
$REPLACE_necessarily
|
| 4418 |
+
$REPLACE_fed
|
| 4419 |
+
$REPLACE_stands
|
| 4420 |
+
$REPLACE_principal
|
| 4421 |
+
$REPLACE_faced
|
| 4422 |
+
$APPEND_local
|
| 4423 |
+
$APPEND_highly
|
| 4424 |
+
$REPLACE_fiction
|
| 4425 |
+
$APPEND_finding
|
| 4426 |
+
$REPLACE_attracts
|
| 4427 |
+
$REPLACE_2011
|
| 4428 |
+
$REPLACE_businessmen
|
| 4429 |
+
$REPLACE_Friends
|
| 4430 |
+
$REPLACE_repaired
|
| 4431 |
+
$REPLACE_bet
|
| 4432 |
+
$REPLACE_hunger
|
| 4433 |
+
$REPLACE_dealing
|
| 4434 |
+
$REPLACE_Except
|
| 4435 |
+
$APPEND_role
|
| 4436 |
+
$REPLACE_admitted
|
| 4437 |
+
$REPLACE_island
|
| 4438 |
+
$REPLACE_quietly
|
| 4439 |
+
$REPLACE_lets
|
| 4440 |
+
$REPLACE_fee
|
| 4441 |
+
$REPLACE_performances
|
| 4442 |
+
$REPLACE_bar
|
| 4443 |
+
$REPLACE_maximum
|
| 4444 |
+
$REPLACE_escaped
|
| 4445 |
+
$REPLACE_ours
|
| 4446 |
+
$APPEND_originally
|
| 4447 |
+
$REPLACE_surroundings
|
| 4448 |
+
$REPLACE_golden
|
| 4449 |
+
$APPEND_technology
|
| 4450 |
+
$APPEND_research
|
| 4451 |
+
$REPLACE_borrow
|
| 4452 |
+
$REPLACE_remind
|
| 4453 |
+
$REPLACE_Beginning
|
| 4454 |
+
$REPLACE_passage
|
| 4455 |
+
$APPEND_drive
|
| 4456 |
+
$APPEND_teaching
|
| 4457 |
+
$REPLACE_typhoons
|
| 4458 |
+
$REPLACE_grabbed
|
| 4459 |
+
$REPLACE_incidents
|
| 4460 |
+
$REPLACE_hid
|
| 4461 |
+
$REPLACE_operate
|
| 4462 |
+
$REPLACE_19th
|
| 4463 |
+
$APPEND_sure
|
| 4464 |
+
$REPLACE_permission
|
| 4465 |
+
$APPEND_previous
|
| 4466 |
+
$REPLACE_rental
|
| 4467 |
+
$REPLACE_tothe
|
| 4468 |
+
$APPEND_round
|
| 4469 |
+
$REPLACE_Oops
|
| 4470 |
+
$REPLACE_survival
|
| 4471 |
+
$REPLACE_shaped
|
| 4472 |
+
$APPEND_costs
|
| 4473 |
+
$REPLACE_conference
|
| 4474 |
+
$APPEND_move
|
| 4475 |
+
$REPLACE_dressed
|
| 4476 |
+
$REPLACE_smells
|
| 4477 |
+
$REPLACE_artistic
|
| 4478 |
+
$REPLACE_holds
|
| 4479 |
+
$REPLACE_introducing
|
| 4480 |
+
$REPLACE_nursery
|
| 4481 |
+
$APPEND_May
|
| 4482 |
+
$REPLACE_troubled
|
| 4483 |
+
$REPLACE_optimistic
|
| 4484 |
+
$REPLACE_guarantee
|
| 4485 |
+
$REPLACE_toothache
|
| 4486 |
+
$REPLACE_bother
|
| 4487 |
+
$REPLACE_Congratulations
|
| 4488 |
+
$REPLACE_purchased
|
| 4489 |
+
$APPEND_21
|
| 4490 |
+
$REPLACE_accurately
|
| 4491 |
+
$REPLACE_belief
|
| 4492 |
+
$REPLACE_numbers
|
| 4493 |
+
$REPLACE_switch
|
| 4494 |
+
$REPLACE_personally
|
| 4495 |
+
$REPLACE_negatively
|
| 4496 |
+
$REPLACE_fireflies
|
| 4497 |
+
$APPEND_receive
|
| 4498 |
+
$APPEND_shop
|
| 4499 |
+
$REPLACE_haircut
|
| 4500 |
+
$REPLACE_productive
|
| 4501 |
+
$REPLACE_crisis
|
| 4502 |
+
$REPLACE_relatively
|
| 4503 |
+
$REPLACE_celebration
|
| 4504 |
+
$REPLACE_controversial
|
| 4505 |
+
$REPLACE_AM
|
| 4506 |
+
$REPLACE_factors
|
| 4507 |
+
$REPLACE_snowing
|
| 4508 |
+
$REPLACE_amusing
|
| 4509 |
+
$REPLACE_sharing
|
| 4510 |
+
$REPLACE_Companies
|
| 4511 |
+
$REPLACE_NYC
|
| 4512 |
+
$REPLACE_moves
|
| 4513 |
+
$REPLACE_hanging
|
| 4514 |
+
$REPLACE_simpler
|
| 4515 |
+
$APPEND_apart
|
| 4516 |
+
$REPLACE_race
|
| 4517 |
+
$REPLACE_hip
|
| 4518 |
+
$REPLACE_underwear
|
| 4519 |
+
$REPLACE_official
|
| 4520 |
+
$REPLACE_shift
|
| 4521 |
+
$APPEND_week
|
| 4522 |
+
$REPLACE_analyse
|
| 4523 |
+
$REPLACE_25th
|
| 4524 |
+
$REPLACE_teenage
|
| 4525 |
+
$APPEND_recent
|
| 4526 |
+
$REPLACE_skin
|
| 4527 |
+
$REPLACE_enroll
|
| 4528 |
+
$REPLACE_nickname
|
| 4529 |
+
$APPEND_accidentally
|
| 4530 |
+
$REPLACE_inventions
|
| 4531 |
+
$REPLACE_boys
|
| 4532 |
+
$APPEND_Afterwards
|
| 4533 |
+
$REPLACE_gentle
|
| 4534 |
+
$REPLACE_overnight
|
| 4535 |
+
$APPEND_explain
|
| 4536 |
+
$REPLACE_wanting
|
| 4537 |
+
$REPLACE_encouraging
|
| 4538 |
+
$REPLACE_contribute
|
| 4539 |
+
$REPLACE_necessities
|
| 4540 |
+
$REPLACE_enrolled
|
| 4541 |
+
$REPLACE_Normally
|
| 4542 |
+
$REPLACE_balloon
|
| 4543 |
+
$REPLACE_applying
|
| 4544 |
+
$APPEND_uses
|
| 4545 |
+
$REPLACE_recall
|
| 4546 |
+
$REPLACE_nearest
|
| 4547 |
+
$REPLACE_cashier
|
| 4548 |
+
$REPLACE_corner
|
| 4549 |
+
$APPEND_space
|
| 4550 |
+
$REPLACE_thatI
|
| 4551 |
+
$REPLACE_treasure
|
| 4552 |
+
$REPLACE_International
|
| 4553 |
+
$REPLACE_forth
|
| 4554 |
+
$REPLACE_assigned
|
| 4555 |
+
$APPEND_education
|
| 4556 |
+
$APPEND_except
|
| 4557 |
+
$REPLACE_jewellery
|
| 4558 |
+
$REPLACE_manga
|
| 4559 |
+
$APPEND_participate
|
| 4560 |
+
$APPEND_increase
|
| 4561 |
+
$REPLACE_slippery
|
| 4562 |
+
$REPLACE_snowboard
|
| 4563 |
+
$REPLACE_novel
|
| 4564 |
+
$REPLACE_predict
|
| 4565 |
+
$REPLACE_remained
|
| 4566 |
+
$REPLACE_outcome
|
| 4567 |
+
$APPEND_whose
|
| 4568 |
+
$APPEND_slightly
|
| 4569 |
+
$APPEND_serious
|
| 4570 |
+
$REPLACE_Research
|
| 4571 |
+
$REPLACE_marvelous
|
| 4572 |
+
$APPEND_excited
|
| 4573 |
+
$REPLACE_organization
|
| 4574 |
+
$REPLACE_list
|
| 4575 |
+
$REPLACE_automatically
|
| 4576 |
+
$REPLACE_differ
|
| 4577 |
+
$REPLACE_Mount
|
| 4578 |
+
$REPLACE_arrangement
|
| 4579 |
+
$APPEND_spending
|
| 4580 |
+
$REPLACE_adopt
|
| 4581 |
+
$APPEND_Soon
|
| 4582 |
+
$APPEND_Mr
|
| 4583 |
+
$REPLACE_irritable
|
| 4584 |
+
$REPLACE_Wish
|
| 4585 |
+
$REPLACE_writting
|
| 4586 |
+
$REPLACE_Sincerely
|
| 4587 |
+
$APPEND_winter
|
| 4588 |
+
$REPLACE_rose
|
| 4589 |
+
$REPLACE_businessman
|
| 4590 |
+
$REPLACE_flavors
|
| 4591 |
+
$REPLACE_smell
|
| 4592 |
+
$REPLACE_fortunate
|
| 4593 |
+
$APPEND_TOEIC
|
| 4594 |
+
$APPEND_mentioned
|
| 4595 |
+
$APPEND_process
|
| 4596 |
+
$APPEND_amp
|
| 4597 |
+
$APPEND_neither
|
| 4598 |
+
$REPLACE_enemies
|
| 4599 |
+
$REPLACE_acceptance
|
| 4600 |
+
$REPLACE_drivers
|
| 4601 |
+
$REPLACE_murderer
|
| 4602 |
+
$REPLACE_Melbourne
|
| 4603 |
+
$REPLACE_Specifically
|
| 4604 |
+
$APPEND_complete
|
| 4605 |
+
$APPEND_focus
|
| 4606 |
+
$REPLACE_illegal
|
| 4607 |
+
$APPEND_hurts
|
| 4608 |
+
$REPLACE_groom
|
| 4609 |
+
$APPEND_preposition
|
| 4610 |
+
$APPEND_com
|
| 4611 |
+
$APPEND_beautiful
|
| 4612 |
+
$REPLACE_sightseeing
|
| 4613 |
+
$REPLACE_bringing
|
| 4614 |
+
$REPLACE_sources
|
| 4615 |
+
$APPEND_videos
|
| 4616 |
+
$APPEND_lunch
|
| 4617 |
+
$APPEND_11
|
| 4618 |
+
$REPLACE_suggestion
|
| 4619 |
+
$REPLACE_programmes
|
| 4620 |
+
$APPEND_jobs
|
| 4621 |
+
$REPLACE_scent
|
| 4622 |
+
$REPLACE_crime
|
| 4623 |
+
$REPLACE_desperate
|
| 4624 |
+
$REPLACE_deliver
|
| 4625 |
+
$APPEND_performed
|
| 4626 |
+
$REPLACE_cars
|
| 4627 |
+
$REPLACE_pet
|
| 4628 |
+
$REPLACE_dangers
|
| 4629 |
+
$APPEND_perform
|
| 4630 |
+
$REPLACE_vehicles
|
| 4631 |
+
$APPEND_figure
|
| 4632 |
+
$APPEND_Later
|
| 4633 |
+
$REPLACE_matches
|
| 4634 |
+
$REPLACE_spaghetti
|
| 4635 |
+
$APPEND_light
|
| 4636 |
+
$REPLACE_corrects
|
| 4637 |
+
$REPLACE_Unlike
|
| 4638 |
+
$APPEND_occasionally
|
| 4639 |
+
$APPEND_truly
|
| 4640 |
+
$REPLACE_silence
|
| 4641 |
+
$REPLACE_intense
|
| 4642 |
+
$REPLACE_substitute
|
| 4643 |
+
$APPEND_freely
|
| 4644 |
+
$APPEND_party
|
| 4645 |
+
$APPEND_His
|
| 4646 |
+
$REPLACE_bothersome
|
| 4647 |
+
$REPLACE_pursuing
|
| 4648 |
+
$REPLACE_Out
|
| 4649 |
+
$REPLACE_direction
|
| 4650 |
+
$APPEND_check
|
| 4651 |
+
$REPLACE_authorities
|
| 4652 |
+
$APPEND_sort
|
| 4653 |
+
$REPLACE_challenges
|
| 4654 |
+
$REPLACE_plural
|
| 4655 |
+
$REPLACE_refused
|
| 4656 |
+
$REPLACE_informed
|
| 4657 |
+
$REPLACE_demand
|
| 4658 |
+
$REPLACE_mess
|
| 4659 |
+
$REPLACE_force
|
| 4660 |
+
$REPLACE_paintings
|
| 4661 |
+
$APPEND_remember
|
| 4662 |
+
$REPLACE_sky
|
| 4663 |
+
$APPEND_practicing
|
| 4664 |
+
$REPLACE_understandable
|
| 4665 |
+
$REPLACE_crashed
|
| 4666 |
+
$APPEND_communicate
|
| 4667 |
+
$REPLACE_manner
|
| 4668 |
+
$REPLACE_payment
|
| 4669 |
+
$REPLACE_artist
|
| 4670 |
+
$APPEND_tend
|
| 4671 |
+
$REPLACE_recession
|
| 4672 |
+
$REPLACE_til
|
| 4673 |
+
$REPLACE_mixed
|
| 4674 |
+
$APPEND_bar
|
| 4675 |
+
$REPLACE_England
|
| 4676 |
+
$REPLACE_gathered
|
| 4677 |
+
$REPLACE_combined
|
| 4678 |
+
$REPLACE_Rome
|
| 4679 |
+
$APPEND_wet
|
| 4680 |
+
$REPLACE_network
|
| 4681 |
+
$REPLACE_steak
|
| 4682 |
+
$REPLACE_California
|
| 4683 |
+
$REPLACE_birth
|
| 4684 |
+
$APPEND_state
|
| 4685 |
+
$REPLACE_expressed
|
| 4686 |
+
$REPLACE_haven
|
| 4687 |
+
$REPLACE_seldom
|
| 4688 |
+
$APPEND_health
|
| 4689 |
+
$REPLACE_partners
|
| 4690 |
+
$REPLACE_finishing
|
| 4691 |
+
$REPLACE_Monday
|
| 4692 |
+
$REPLACE_liters
|
| 4693 |
+
$REPLACE_Hi
|
| 4694 |
+
$APPEND_August
|
| 4695 |
+
$REPLACE_gorgeous
|
| 4696 |
+
$APPEND_seven
|
| 4697 |
+
$APPEND_remaining
|
| 4698 |
+
$REPLACE_chances
|
| 4699 |
+
$APPEND_older
|
| 4700 |
+
$REPLACE_Eating
|
| 4701 |
+
$APPEND_Christmas
|
| 4702 |
+
$REPLACE_dentist
|
| 4703 |
+
$REPLACE_league
|
| 4704 |
+
$REPLACE_korean
|
| 4705 |
+
$APPEND_greatly
|
| 4706 |
+
$APPEND_return
|
| 4707 |
+
$REPLACE_genres
|
| 4708 |
+
$REPLACE_authors
|
| 4709 |
+
$APPEND_Thank
|
| 4710 |
+
$REPLACE_diseases
|
| 4711 |
+
$REPLACE_travels
|
| 4712 |
+
$REPLACE_sheet
|
| 4713 |
+
$REPLACE_fastest
|
| 4714 |
+
$APPEND_surprised
|
| 4715 |
+
$REPLACE_rushed
|
| 4716 |
+
$APPEND_attending
|
| 4717 |
+
$APPEND_Furthermore
|
| 4718 |
+
$REPLACE_Laden
|
| 4719 |
+
$REPLACE_creative
|
| 4720 |
+
$REPLACE_meantime
|
| 4721 |
+
$REPLACE_Turkey
|
| 4722 |
+
$REPLACE_presenting
|
| 4723 |
+
$REPLACE_Christian
|
| 4724 |
+
$REPLACE_nervousness
|
| 4725 |
+
$REPLACE_meaningless
|
| 4726 |
+
$APPEND_player
|
| 4727 |
+
$REPLACE_motivate
|
| 4728 |
+
$REPLACE_advertisements
|
| 4729 |
+
$REPLACE_artwork
|
| 4730 |
+
$REPLACE_encouragement
|
| 4731 |
+
$REPLACE_regard
|
| 4732 |
+
$REPLACE_slower
|
| 4733 |
+
$REPLACE_dolls
|
| 4734 |
+
$REPLACE_200
|
| 4735 |
+
$REPLACE_unconsciously
|
| 4736 |
+
$APPEND_happens
|
| 4737 |
+
$REPLACE_facility
|
| 4738 |
+
$APPEND_advice
|
| 4739 |
+
$REPLACE_North
|
| 4740 |
+
$REPLACE_awareness
|
| 4741 |
+
$APPEND_planned
|
| 4742 |
+
$REPLACE_genetic
|
| 4743 |
+
$REPLACE_management
|
| 4744 |
+
$REPLACE_refund
|
| 4745 |
+
$REPLACE_brighter
|
| 4746 |
+
$REPLACE_confirm
|
| 4747 |
+
$REPLACE_burning
|
| 4748 |
+
$REPLACE_composition
|
| 4749 |
+
$APPEND_answer
|
| 4750 |
+
$REPLACE_conserve
|
| 4751 |
+
$REPLACE_destruction
|
| 4752 |
+
$REPLACE_duties
|
| 4753 |
+
$REPLACE_creativity
|
| 4754 |
+
$APPEND_expressions
|
| 4755 |
+
$APPEND_commit
|
| 4756 |
+
$REPLACE_East
|
| 4757 |
+
$REPLACE_milk
|
| 4758 |
+
$REPLACE_30pm
|
| 4759 |
+
$REPLACE_belong
|
| 4760 |
+
$REPLACE_autograph
|
| 4761 |
+
$REPLACE_caring
|
| 4762 |
+
$REPLACE_download
|
| 4763 |
+
$APPEND_development
|
| 4764 |
+
$REPLACE_compete
|
| 4765 |
+
$REPLACE_qualities
|
| 4766 |
+
$APPEND_avoid
|
| 4767 |
+
$REPLACE_recieved
|
| 4768 |
+
$APPEND_Perfect
|
| 4769 |
+
$REPLACE_yours
|
| 4770 |
+
$REPLACE_breaks
|
| 4771 |
+
$REPLACE_amusement
|
| 4772 |
+
$REPLACE_models
|
| 4773 |
+
$REPLACE_persevere
|
| 4774 |
+
$REPLACE_emergency
|
| 4775 |
+
$REPLACE_empty
|
| 4776 |
+
$REPLACE_rescue
|
| 4777 |
+
$APPEND_term
|
| 4778 |
+
$REPLACE_requirements
|
| 4779 |
+
$REPLACE_sufficient
|
| 4780 |
+
$APPEND_cooking
|
| 4781 |
+
$REPLACE_fascinated
|
| 4782 |
+
$REPLACE_14th
|
| 4783 |
+
$REPLACE_relevant
|
| 4784 |
+
$REPLACE_listed
|
| 4785 |
+
$REPLACE_vision
|
| 4786 |
+
$REPLACE_g
|
| 4787 |
+
$REPLACE_leadership
|
| 4788 |
+
$REPLACE_butI
|
| 4789 |
+
$APPEND_provide
|
| 4790 |
+
$REPLACE_organize
|
| 4791 |
+
$APPEND_created
|
| 4792 |
+
$REPLACE_12th
|
| 4793 |
+
$REPLACE_collection
|
| 4794 |
+
$REPLACE_supply
|
| 4795 |
+
$APPEND_Besides
|
| 4796 |
+
$REPLACE_stranger
|
| 4797 |
+
$REPLACE_combination
|
| 4798 |
+
$REPLACE_farther
|
| 4799 |
+
$REPLACE_awaiting
|
| 4800 |
+
$APPEND_hand
|
| 4801 |
+
$REPLACE_unsure
|
| 4802 |
+
$REPLACE_profile
|
| 4803 |
+
$APPEND_moving
|
| 4804 |
+
$APPEND_street
|
| 4805 |
+
$REPLACE_delighted
|
| 4806 |
+
$REPLACE_pretended
|
| 4807 |
+
$REPLACE_driven
|
| 4808 |
+
$REPLACE_maintaining
|
| 4809 |
+
$REPLACE_liar
|
| 4810 |
+
$TRANSFORM_SPLIT_HYPHEN
|
| 4811 |
+
$REPLACE_glass
|
| 4812 |
+
$REPLACE_stick
|
| 4813 |
+
$REPLACE_itchy
|
| 4814 |
+
$REPLACE_ought
|
| 4815 |
+
$REPLACE_consumption
|
| 4816 |
+
$REPLACE_quicker
|
| 4817 |
+
$REPLACE_spare
|
| 4818 |
+
$REPLACE_governments
|
| 4819 |
+
$APPEND_view
|
| 4820 |
+
$REPLACE_P
|
| 4821 |
+
$REPLACE_colorful
|
| 4822 |
+
$REPLACE_guitarist
|
| 4823 |
+
$APPEND_wants
|
| 4824 |
+
$REPLACE_million
|
| 4825 |
+
$REPLACE_behalf
|
| 4826 |
+
$REPLACE_kilometres
|
| 4827 |
+
$REPLACE_bank
|
| 4828 |
+
$APPEND_morning
|
| 4829 |
+
$REPLACE_weekends
|
| 4830 |
+
$REPLACE_occasion
|
| 4831 |
+
$APPEND_tour
|
| 4832 |
+
$REPLACE_object
|
| 4833 |
+
$REPLACE_Others
|
| 4834 |
+
$REPLACE_Considering
|
| 4835 |
+
$REPLACE_species
|
| 4836 |
+
$REPLACE_session
|
| 4837 |
+
$APPEND_removed
|
| 4838 |
+
$REPLACE_hiking
|
| 4839 |
+
$REPLACE_resolutions
|
| 4840 |
+
$REPLACE_peak
|
| 4841 |
+
$REPLACE_consequences
|
| 4842 |
+
$REPLACE_soaked
|
| 4843 |
+
$REPLACE_presents
|
| 4844 |
+
$APPEND_25
|
| 4845 |
+
$REPLACE_salad
|
| 4846 |
+
$REPLACE_filling
|
| 4847 |
+
$REPLACE_attack
|
| 4848 |
+
$APPEND_foods
|
| 4849 |
+
$REPLACE_tendency
|
| 4850 |
+
$REPLACE_discoveries
|
| 4851 |
+
$REPLACE_immediate
|
| 4852 |
+
$REPLACE_submitted
|
| 4853 |
+
$REPLACE_THAT
|
| 4854 |
+
$APPEND_develop
|
| 4855 |
+
$REPLACE_battery
|
| 4856 |
+
$REPLACE_dont
|
| 4857 |
+
$REPLACE_feature
|
| 4858 |
+
$APPEND_opportunity
|
| 4859 |
+
$REPLACE_bodies
|
| 4860 |
+
$REPLACE_goldfish
|
| 4861 |
+
$REPLACE_adapt
|
| 4862 |
+
$REPLACE_views
|
| 4863 |
+
$REPLACE_forgetting
|
| 4864 |
+
$REPLACE_saved
|
| 4865 |
+
$REPLACE_doesn
|
| 4866 |
+
$REPLACE_thirst
|
| 4867 |
+
$APPEND_Me
|
| 4868 |
+
$REPLACE_distant
|
| 4869 |
+
$REPLACE_opposition
|
| 4870 |
+
$REPLACE_breed
|
| 4871 |
+
$REPLACE_practised
|
| 4872 |
+
$REPLACE_miserable
|
| 4873 |
+
$APPEND_sore
|
| 4874 |
+
$REPLACE_brain
|
| 4875 |
+
$REPLACE_sessions
|
| 4876 |
+
$REPLACE_policeman
|
| 4877 |
+
$REPLACE_favor
|
| 4878 |
+
$REPLACE_managing
|
| 4879 |
+
$REPLACE_rains
|
| 4880 |
+
$REPLACE_baths
|
| 4881 |
+
$REPLACE_surrounding
|
| 4882 |
+
$REPLACE_Seoul
|
| 4883 |
+
$APPEND_regardless
|
| 4884 |
+
$APPEND_Something
|
| 4885 |
+
$REPLACE_architectural
|
| 4886 |
+
$REPLACE_ok
|
| 4887 |
+
$REPLACE_welfare
|
| 4888 |
+
$APPEND_share
|
| 4889 |
+
$REPLACE_daughters
|
| 4890 |
+
$REPLACE_phones
|
| 4891 |
+
$REPLACE_downstairs
|
| 4892 |
+
$REPLACE_arriving
|
| 4893 |
+
$REPLACE_stepped
|
| 4894 |
+
$REPLACE_competing
|
| 4895 |
+
$REPLACE_catching
|
| 4896 |
+
$REPLACE_conversing
|
| 4897 |
+
$REPLACE_encourages
|
| 4898 |
+
$REPLACE_depressing
|
| 4899 |
+
$REPLACE_begining
|
| 4900 |
+
$REPLACE_admission
|
| 4901 |
+
$APPEND_voice
|
| 4902 |
+
$REPLACE_boredom
|
| 4903 |
+
$APPEND_alot
|
| 4904 |
+
$APPEND_familiar
|
| 4905 |
+
$REPLACE_breaking
|
| 4906 |
+
$REPLACE_fortunately
|
| 4907 |
+
$REPLACE_Over
|
| 4908 |
+
$APPEND_lost
|
| 4909 |
+
$REPLACE_intended
|
| 4910 |
+
$REPLACE_neighbourhood
|
| 4911 |
+
$REPLACE_mysteries
|
| 4912 |
+
$REPLACE_certificate
|
| 4913 |
+
$REPLACE_data
|
| 4914 |
+
$APPEND_personal
|
| 4915 |
+
$REPLACE_joyful
|
| 4916 |
+
$REPLACE_immigrants
|
| 4917 |
+
$REPLACE_emotions
|
| 4918 |
+
$REPLACE_checkup
|
| 4919 |
+
$REPLACE_licence
|
| 4920 |
+
$REPLACE_juice
|
| 4921 |
+
$APPEND_whenever
|
| 4922 |
+
$REPLACE_dogs
|
| 4923 |
+
$REPLACE_thereby
|
| 4924 |
+
$APPEND_department
|
| 4925 |
+
$APPEND_assignment
|
| 4926 |
+
$REPLACE_defend
|
| 4927 |
+
$REPLACE_approached
|
| 4928 |
+
$REPLACE_Fireworks
|
| 4929 |
+
$APPEND_activity
|
| 4930 |
+
$APPEND_quality
|
| 4931 |
+
$REPLACE_basics
|
| 4932 |
+
$REPLACE_costumes
|
| 4933 |
+
$REPLACE_key
|
| 4934 |
+
$REPLACE_outdoors
|
| 4935 |
+
$REPLACE_hay
|
| 4936 |
+
$APPEND_prepare
|
| 4937 |
+
$REPLACE_hiding
|
| 4938 |
+
$REPLACE_curiosity
|
| 4939 |
+
$APPEND_dealing
|
| 4940 |
+
$REPLACE_passion
|
| 4941 |
+
$REPLACE_costed
|
| 4942 |
+
$REPLACE_fries
|
| 4943 |
+
$REPLACE_HAVE
|
| 4944 |
+
$REPLACE_divorced
|
| 4945 |
+
$APPEND_display
|
| 4946 |
+
$REPLACE_baby
|
| 4947 |
+
$APPEND_cherry
|
| 4948 |
+
$REPLACE_Returning
|
| 4949 |
+
$APPEND_lack
|
| 4950 |
+
$APPEND_learnt
|
| 4951 |
+
$REPLACE_Im
|
| 4952 |
+
$APPEND_naturally
|
| 4953 |
+
$REPLACE_router
|
| 4954 |
+
$APPEND_goals
|
| 4955 |
+
$REPLACE_seaside
|
| 4956 |
+
$REPLACE_summarize
|
| 4957 |
+
$APPEND_appeared
|
| 4958 |
+
$REPLACE_claim
|
| 4959 |
+
$APPEND_ate
|
| 4960 |
+
$REPLACE_exchanging
|
| 4961 |
+
$APPEND_arrive
|
| 4962 |
+
$APPEND_art
|
| 4963 |
+
$REPLACE_participating
|
| 4964 |
+
$REPLACE_seek
|
| 4965 |
+
$REPLACE_innocent
|
| 4966 |
+
$APPEND_express
|
| 4967 |
+
$REPLACE_lunchtime
|
| 4968 |
+
$REPLACE_reaction
|
| 4969 |
+
$REPLACE_consisted
|
| 4970 |
+
$REPLACE_Eastern
|
| 4971 |
+
$APPEND_track
|
| 4972 |
+
$APPEND_baby
|
| 4973 |
+
$REPLACE_touching
|
| 4974 |
+
$REPLACE_lively
|
| 4975 |
+
$REPLACE_bridge
|
| 4976 |
+
$REPLACE_murderers
|
| 4977 |
+
$REPLACE_Brazil
|
| 4978 |
+
$REPLACE_feeding
|
| 4979 |
+
$REPLACE_honestly
|
| 4980 |
+
$REPLACE_Piece
|
| 4981 |
+
$REPLACE_springs
|
| 4982 |
+
$REPLACE_purchase
|
| 4983 |
+
$REPLACE_pray
|
| 4984 |
+
$REPLACE_washed
|
| 4985 |
+
$APPEND_sentence
|
| 4986 |
+
$REPLACE_Olympics
|
| 4987 |
+
$REPLACE_strongest
|
| 4988 |
+
$REPLACE_leads
|
| 4989 |
+
$REPLACE_stomachache
|
| 4990 |
+
$REPLACE_John
|
| 4991 |
+
$REPLACE_opponent
|
| 4992 |
+
$REPLACE_contents
|
| 4993 |
+
$REPLACE_plot
|
| 4994 |
+
$APPEND_Many
|
| 4995 |
+
$REPLACE_experiment
|
| 4996 |
+
$REPLACE_beings
|
| 4997 |
+
$REPLACE_owns
|
| 4998 |
+
$REPLACE_airline
|
| 4999 |
+
$REPLACE_severely
|
| 5000 |
+
$REPLACE_ages
|
| 5001 |
+
@@UNKNOWN@@
|
| 5002 |
+
@@PADDING@@
|
output_vocabulary/non_padded_namespaces.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*tags
|
| 2 |
+
*labels
|
requirements.txt
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
gradio
|
| 2 |
-
transformers
|
| 3 |
-
torch
|
| 4 |
spacy
|
| 5 |
nltk
|
| 6 |
gensim
|
| 7 |
pattern
|
| 8 |
textblob
|
| 9 |
-
|
| 10 |
-
|
|
|
|
| 1 |
+
torch==1.10.0
|
| 2 |
+
allennlp==0.8.4
|
| 3 |
+
python-Levenshtein==0.12.1
|
| 4 |
+
transformers==4.11.3
|
| 5 |
+
scikit-learn==0.20.0
|
| 6 |
+
sentencepiece==0.1.95
|
| 7 |
+
overrides==4.1.2
|
| 8 |
+
numpy==1.19.5
|
| 9 |
gradio
|
|
|
|
|
|
|
| 10 |
spacy
|
| 11 |
nltk
|
| 12 |
gensim
|
| 13 |
pattern
|
| 14 |
textblob
|
|
|
|
|
|
utils/filter_brackets.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
from helpers import write_lines
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def filter_line(line):
|
| 8 |
+
if "-LRB-" in line and "-RRB-" in line:
|
| 9 |
+
rep = re.sub(r'\-.*?LRB.*?\-.*?\-.*?RRB.*?\-', '', line)
|
| 10 |
+
line_cleaned = rep
|
| 11 |
+
elif ("-LRB-" in line and "-RRB-" not in line) or (
|
| 12 |
+
"-LRB-" not in line and "-RRB-" in line):
|
| 13 |
+
line_cleaned = line.replace("-LRB-", '"').replace("-RRB-", '"')
|
| 14 |
+
else:
|
| 15 |
+
line_cleaned = line
|
| 16 |
+
return line_cleaned
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def main(args):
|
| 20 |
+
with open(args.source) as f:
|
| 21 |
+
data = [row.rstrip() for row in f]
|
| 22 |
+
|
| 23 |
+
write_lines(args.output, [filter_line(row) for row in data])
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
if __name__ == '__main__':
|
| 27 |
+
parser = argparse.ArgumentParser()
|
| 28 |
+
parser.add_argument('-s', '--source',
|
| 29 |
+
help='Path to the source file',
|
| 30 |
+
required=True)
|
| 31 |
+
parser.add_argument('-o', '--output',
|
| 32 |
+
help='Path to the output file',
|
| 33 |
+
required=True)
|
| 34 |
+
args = parser.parse_args()
|
| 35 |
+
main(args)
|
utils/helpers.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
VOCAB_DIR = Path(__file__).resolve().parent.parent / "data"
|
| 6 |
+
PAD = "@@PADDING@@"
|
| 7 |
+
UNK = "@@UNKNOWN@@"
|
| 8 |
+
START_TOKEN = "$START"
|
| 9 |
+
SEQ_DELIMETERS = {"tokens": " ",
|
| 10 |
+
"labels": "SEPL|||SEPR",
|
| 11 |
+
"operations": "SEPL__SEPR"}
|
| 12 |
+
REPLACEMENTS = {
|
| 13 |
+
"''": '"',
|
| 14 |
+
'--': '—',
|
| 15 |
+
'`': "'",
|
| 16 |
+
"'ve": "' ve",
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def get_verb_form_dicts():
|
| 21 |
+
path_to_dict = os.path.join(VOCAB_DIR, "verb-form-vocab.txt")
|
| 22 |
+
encode, decode = {}, {}
|
| 23 |
+
with open(path_to_dict, encoding="utf-8") as f:
|
| 24 |
+
for line in f:
|
| 25 |
+
words, tags = line.split(":")
|
| 26 |
+
word1, word2 = words.split("_")
|
| 27 |
+
tag1, tag2 = tags.split("_")
|
| 28 |
+
decode_key = f"{word1}_{tag1}_{tag2.strip()}"
|
| 29 |
+
if decode_key not in decode:
|
| 30 |
+
encode[words] = tags
|
| 31 |
+
decode[decode_key] = word2
|
| 32 |
+
return encode, decode
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
ENCODE_VERB_DICT, DECODE_VERB_DICT = get_verb_form_dicts()
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def get_target_sent_by_edits(source_tokens, edits):
|
| 39 |
+
target_tokens = source_tokens[:]
|
| 40 |
+
shift_idx = 0
|
| 41 |
+
for edit in edits:
|
| 42 |
+
start, end, label, _ = edit
|
| 43 |
+
target_pos = start + shift_idx
|
| 44 |
+
source_token = target_tokens[target_pos] \
|
| 45 |
+
if len(target_tokens) > target_pos >= 0 else ''
|
| 46 |
+
if label == "":
|
| 47 |
+
del target_tokens[target_pos]
|
| 48 |
+
shift_idx -= 1
|
| 49 |
+
elif start == end:
|
| 50 |
+
word = label.replace("$APPEND_", "")
|
| 51 |
+
target_tokens[target_pos: target_pos] = [word]
|
| 52 |
+
shift_idx += 1
|
| 53 |
+
elif label.startswith("$TRANSFORM_"):
|
| 54 |
+
word = apply_reverse_transformation(source_token, label)
|
| 55 |
+
if word is None:
|
| 56 |
+
word = source_token
|
| 57 |
+
target_tokens[target_pos] = word
|
| 58 |
+
elif start == end - 1:
|
| 59 |
+
word = label.replace("$REPLACE_", "")
|
| 60 |
+
target_tokens[target_pos] = word
|
| 61 |
+
elif label.startswith("$MERGE_"):
|
| 62 |
+
target_tokens[target_pos + 1: target_pos + 1] = [label]
|
| 63 |
+
shift_idx += 1
|
| 64 |
+
|
| 65 |
+
return replace_merge_transforms(target_tokens)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def replace_merge_transforms(tokens):
|
| 69 |
+
if all(not x.startswith("$MERGE_") for x in tokens):
|
| 70 |
+
return tokens
|
| 71 |
+
|
| 72 |
+
target_line = " ".join(tokens)
|
| 73 |
+
target_line = target_line.replace(" $MERGE_HYPHEN ", "-")
|
| 74 |
+
target_line = target_line.replace(" $MERGE_SPACE ", "")
|
| 75 |
+
return target_line.split()
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def convert_using_case(token, smart_action):
|
| 79 |
+
if not smart_action.startswith("$TRANSFORM_CASE_"):
|
| 80 |
+
return token
|
| 81 |
+
if smart_action.endswith("LOWER"):
|
| 82 |
+
return token.lower()
|
| 83 |
+
elif smart_action.endswith("UPPER"):
|
| 84 |
+
return token.upper()
|
| 85 |
+
elif smart_action.endswith("CAPITAL"):
|
| 86 |
+
return token.capitalize()
|
| 87 |
+
elif smart_action.endswith("CAPITAL_1"):
|
| 88 |
+
return token[0] + token[1:].capitalize()
|
| 89 |
+
elif smart_action.endswith("UPPER_-1"):
|
| 90 |
+
return token[:-1].upper() + token[-1]
|
| 91 |
+
else:
|
| 92 |
+
return token
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def convert_using_verb(token, smart_action):
|
| 96 |
+
key_word = "$TRANSFORM_VERB_"
|
| 97 |
+
if not smart_action.startswith(key_word):
|
| 98 |
+
raise Exception(f"Unknown action type {smart_action}")
|
| 99 |
+
encoding_part = f"{token}_{smart_action[len(key_word):]}"
|
| 100 |
+
decoded_target_word = decode_verb_form(encoding_part)
|
| 101 |
+
return decoded_target_word
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def convert_using_split(token, smart_action):
|
| 105 |
+
key_word = "$TRANSFORM_SPLIT"
|
| 106 |
+
if not smart_action.startswith(key_word):
|
| 107 |
+
raise Exception(f"Unknown action type {smart_action}")
|
| 108 |
+
target_words = token.split("-")
|
| 109 |
+
return " ".join(target_words)
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def convert_using_plural(token, smart_action):
|
| 113 |
+
if smart_action.endswith("PLURAL"):
|
| 114 |
+
return token + "s"
|
| 115 |
+
elif smart_action.endswith("SINGULAR"):
|
| 116 |
+
return token[:-1]
|
| 117 |
+
else:
|
| 118 |
+
raise Exception(f"Unknown action type {smart_action}")
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def apply_reverse_transformation(source_token, transform):
|
| 122 |
+
if transform.startswith("$TRANSFORM"):
|
| 123 |
+
# deal with equal
|
| 124 |
+
if transform == "$KEEP":
|
| 125 |
+
return source_token
|
| 126 |
+
# deal with case
|
| 127 |
+
if transform.startswith("$TRANSFORM_CASE"):
|
| 128 |
+
return convert_using_case(source_token, transform)
|
| 129 |
+
# deal with verb
|
| 130 |
+
if transform.startswith("$TRANSFORM_VERB"):
|
| 131 |
+
return convert_using_verb(source_token, transform)
|
| 132 |
+
# deal with split
|
| 133 |
+
if transform.startswith("$TRANSFORM_SPLIT"):
|
| 134 |
+
return convert_using_split(source_token, transform)
|
| 135 |
+
# deal with single/plural
|
| 136 |
+
if transform.startswith("$TRANSFORM_AGREEMENT"):
|
| 137 |
+
return convert_using_plural(source_token, transform)
|
| 138 |
+
# raise exception if not find correct type
|
| 139 |
+
raise Exception(f"Unknown action type {transform}")
|
| 140 |
+
else:
|
| 141 |
+
return source_token
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def read_parallel_lines(fn1, fn2):
|
| 145 |
+
lines1 = read_lines(fn1, skip_strip=True)
|
| 146 |
+
lines2 = read_lines(fn2, skip_strip=True)
|
| 147 |
+
assert len(lines1) == len(lines2)
|
| 148 |
+
out_lines1, out_lines2 = [], []
|
| 149 |
+
for line1, line2 in zip(lines1, lines2):
|
| 150 |
+
if not line1.strip() or not line2.strip():
|
| 151 |
+
continue
|
| 152 |
+
else:
|
| 153 |
+
out_lines1.append(line1)
|
| 154 |
+
out_lines2.append(line2)
|
| 155 |
+
return out_lines1, out_lines2
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def read_lines(fn, skip_strip=False):
|
| 159 |
+
if not os.path.exists(fn):
|
| 160 |
+
return []
|
| 161 |
+
with open(fn, 'r', encoding='utf-8') as f:
|
| 162 |
+
lines = f.readlines()
|
| 163 |
+
return [s.strip() for s in lines if s.strip() or skip_strip]
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def write_lines(fn, lines, mode='w'):
|
| 167 |
+
if mode == 'w' and os.path.exists(fn):
|
| 168 |
+
os.remove(fn)
|
| 169 |
+
with open(fn, encoding='utf-8', mode=mode) as f:
|
| 170 |
+
f.writelines(['%s\n' % s for s in lines])
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def decode_verb_form(original):
|
| 174 |
+
return DECODE_VERB_DICT.get(original)
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def encode_verb_form(original_word, corrected_word):
|
| 178 |
+
decoding_request = original_word + "_" + corrected_word
|
| 179 |
+
decoding_response = ENCODE_VERB_DICT.get(decoding_request, "").strip()
|
| 180 |
+
if original_word and decoding_response:
|
| 181 |
+
answer = decoding_response
|
| 182 |
+
else:
|
| 183 |
+
answer = None
|
| 184 |
+
return answer
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def get_weights_name(transformer_name, lowercase):
|
| 188 |
+
if transformer_name == 'bert' and lowercase:
|
| 189 |
+
return 'bert-base-uncased'
|
| 190 |
+
if transformer_name == 'bert' and not lowercase:
|
| 191 |
+
return 'bert-base-cased'
|
| 192 |
+
if transformer_name == 'bert-large' and not lowercase:
|
| 193 |
+
return 'bert-large-cased'
|
| 194 |
+
if transformer_name == 'distilbert':
|
| 195 |
+
if not lowercase:
|
| 196 |
+
print('Warning! This model was trained only on uncased sentences.')
|
| 197 |
+
return 'distilbert-base-uncased'
|
| 198 |
+
if transformer_name == 'albert':
|
| 199 |
+
if not lowercase:
|
| 200 |
+
print('Warning! This model was trained only on uncased sentences.')
|
| 201 |
+
return 'albert-base-v1'
|
| 202 |
+
if lowercase:
|
| 203 |
+
print('Warning! This model was trained only on cased sentences.')
|
| 204 |
+
if transformer_name == 'roberta':
|
| 205 |
+
return 'roberta-base'
|
| 206 |
+
if transformer_name == 'roberta-large':
|
| 207 |
+
return 'roberta-large'
|
| 208 |
+
if transformer_name == 'gpt2':
|
| 209 |
+
return 'gpt2'
|
| 210 |
+
if transformer_name == 'transformerxl':
|
| 211 |
+
return 'transfo-xl-wt103'
|
| 212 |
+
if transformer_name == 'xlnet':
|
| 213 |
+
return 'xlnet-base-cased'
|
| 214 |
+
if transformer_name == 'xlnet-large':
|
| 215 |
+
return 'xlnet-large-cased'
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
def remove_double_tokens(sent):
|
| 219 |
+
tokens = sent.split(' ')
|
| 220 |
+
deleted_idx = []
|
| 221 |
+
for i in range(len(tokens) -1):
|
| 222 |
+
if tokens[i] == tokens[i + 1]:
|
| 223 |
+
deleted_idx.append(i + 1)
|
| 224 |
+
if deleted_idx:
|
| 225 |
+
tokens = [tokens[i] for i in range(len(tokens)) if i not in deleted_idx]
|
| 226 |
+
return ' '.join(tokens)
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def normalize(sent):
|
| 230 |
+
sent = remove_double_tokens(sent)
|
| 231 |
+
for fr, to in REPLACEMENTS.items():
|
| 232 |
+
sent = sent.replace(fr, to)
|
| 233 |
+
return sent.lower()
|
utils/prepare_clc_fce_data.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
"""
|
| 3 |
+
Convert CLC-FCE dataset (The Cambridge Learner Corpus) to the parallel sentences format.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import argparse
|
| 7 |
+
import glob
|
| 8 |
+
import os
|
| 9 |
+
import re
|
| 10 |
+
from xml.etree import cElementTree
|
| 11 |
+
|
| 12 |
+
from nltk.tokenize import sent_tokenize, word_tokenize
|
| 13 |
+
from tqdm import tqdm
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def annotate_fce_doc(xml):
|
| 17 |
+
"""Takes a FCE xml document and yields sentences with annotated errors."""
|
| 18 |
+
result = []
|
| 19 |
+
doc = cElementTree.fromstring(xml)
|
| 20 |
+
paragraphs = doc.findall('head/text/*/coded_answer/p')
|
| 21 |
+
for p in paragraphs:
|
| 22 |
+
text = _get_formatted_text(p)
|
| 23 |
+
result.append(text)
|
| 24 |
+
|
| 25 |
+
return '\n'.join(result)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _get_formatted_text(elem, ignore_tags=None):
|
| 29 |
+
text = elem.text or ''
|
| 30 |
+
ignore_tags = [tag.upper() for tag in (ignore_tags or [])]
|
| 31 |
+
correct = None
|
| 32 |
+
mistake = None
|
| 33 |
+
|
| 34 |
+
for child in elem.getchildren():
|
| 35 |
+
tag = child.tag.upper()
|
| 36 |
+
if tag == 'NS':
|
| 37 |
+
text += _get_formatted_text(child)
|
| 38 |
+
|
| 39 |
+
elif tag == 'UNKNOWN':
|
| 40 |
+
text += ' UNKNOWN '
|
| 41 |
+
|
| 42 |
+
elif tag == 'C':
|
| 43 |
+
assert correct is None
|
| 44 |
+
correct = _get_formatted_text(child)
|
| 45 |
+
|
| 46 |
+
elif tag == 'I':
|
| 47 |
+
assert mistake is None
|
| 48 |
+
mistake = _get_formatted_text(child)
|
| 49 |
+
|
| 50 |
+
elif tag in ignore_tags:
|
| 51 |
+
pass
|
| 52 |
+
|
| 53 |
+
else:
|
| 54 |
+
raise ValueError(f"Unknown tag `{child.tag}`", text)
|
| 55 |
+
|
| 56 |
+
if correct or mistake:
|
| 57 |
+
correct = correct or ''
|
| 58 |
+
mistake = mistake or ''
|
| 59 |
+
if '=>' not in mistake:
|
| 60 |
+
text += f'{{{mistake}=>{correct}}}'
|
| 61 |
+
else:
|
| 62 |
+
text += mistake
|
| 63 |
+
|
| 64 |
+
text += elem.tail or ''
|
| 65 |
+
return text
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def convert_fce(fce_dir):
|
| 69 |
+
"""Processes the whole FCE directory. Yields annotated documents (strings)."""
|
| 70 |
+
|
| 71 |
+
# Ensure we got the valid dataset path
|
| 72 |
+
if not os.path.isdir(fce_dir):
|
| 73 |
+
raise UserWarning(
|
| 74 |
+
f"{fce_dir} is not a valid path")
|
| 75 |
+
|
| 76 |
+
dataset_dir = os.path.join(fce_dir, 'dataset')
|
| 77 |
+
if not os.path.exists(dataset_dir):
|
| 78 |
+
raise UserWarning(
|
| 79 |
+
f"{fce_dir} doesn't point to a dataset's root dir")
|
| 80 |
+
|
| 81 |
+
# Convert XML docs to the corpora format
|
| 82 |
+
filenames = sorted(glob.glob(os.path.join(dataset_dir, '*/*.xml')))
|
| 83 |
+
|
| 84 |
+
docs = []
|
| 85 |
+
for filename in filenames:
|
| 86 |
+
with open(filename, encoding='utf-8') as f:
|
| 87 |
+
doc = annotate_fce_doc(f.read())
|
| 88 |
+
docs.append(doc)
|
| 89 |
+
return docs
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def main():
|
| 93 |
+
fce = convert_fce(args.fce_dataset_path)
|
| 94 |
+
with open(args.output + "/fce-original.txt", 'w', encoding='utf-8') as out_original, \
|
| 95 |
+
open(args.output + "/fce-applied.txt", 'w', encoding='utf-8') as out_applied:
|
| 96 |
+
for doc in tqdm(fce, unit='doc'):
|
| 97 |
+
sents = re.split(r"\n +\n", doc)
|
| 98 |
+
for sent in sents:
|
| 99 |
+
tokenized_sents = sent_tokenize(sent)
|
| 100 |
+
for i in range(len(tokenized_sents)):
|
| 101 |
+
if re.search(r"[{>][.?!]$", tokenized_sents[i]):
|
| 102 |
+
tokenized_sents[i + 1] = tokenized_sents[i] + " " + tokenized_sents[i + 1]
|
| 103 |
+
tokenized_sents[i] = ""
|
| 104 |
+
regexp = r'{([^{}]*?)=>([^{}]*?)}'
|
| 105 |
+
original = re.sub(regexp, r"\1", tokenized_sents[i])
|
| 106 |
+
applied = re.sub(regexp, r"\2", tokenized_sents[i])
|
| 107 |
+
# filter out nested alerts
|
| 108 |
+
if original != "" and applied != "" and not re.search(r"[{}=]", original) \
|
| 109 |
+
and not re.search(r"[{}=]", applied):
|
| 110 |
+
out_original.write(" ".join(word_tokenize(original)) + "\n")
|
| 111 |
+
out_applied.write(" ".join(word_tokenize(applied)) + "\n")
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
if __name__ == '__main__':
|
| 115 |
+
parser = argparse.ArgumentParser(description=(
|
| 116 |
+
"Convert CLC-FCE dataset to the parallel sentences format."))
|
| 117 |
+
parser.add_argument('fce_dataset_path',
|
| 118 |
+
help='Path to the folder with the FCE dataset')
|
| 119 |
+
parser.add_argument('--output',
|
| 120 |
+
help='Path to the output folder')
|
| 121 |
+
args = parser.parse_args()
|
| 122 |
+
|
| 123 |
+
main()
|
utils/preprocess_data.py
ADDED
|
@@ -0,0 +1,488 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
from difflib import SequenceMatcher
|
| 4 |
+
|
| 5 |
+
import Levenshtein
|
| 6 |
+
import numpy as np
|
| 7 |
+
from tqdm import tqdm
|
| 8 |
+
|
| 9 |
+
from helpers import write_lines, read_parallel_lines, encode_verb_form, \
|
| 10 |
+
apply_reverse_transformation, SEQ_DELIMETERS, START_TOKEN
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def perfect_align(t, T, insertions_allowed=0,
|
| 14 |
+
cost_function=Levenshtein.distance):
|
| 15 |
+
# dp[i, j, k] is a minimal cost of matching first `i` tokens of `t` with
|
| 16 |
+
# first `j` tokens of `T`, after making `k` insertions after last match of
|
| 17 |
+
# token from `t`. In other words t[:i] aligned with T[:j].
|
| 18 |
+
|
| 19 |
+
# Initialize with INFINITY (unknown)
|
| 20 |
+
shape = (len(t) + 1, len(T) + 1, insertions_allowed + 1)
|
| 21 |
+
dp = np.ones(shape, dtype=int) * int(1e9)
|
| 22 |
+
come_from = np.ones(shape, dtype=int) * int(1e9)
|
| 23 |
+
come_from_ins = np.ones(shape, dtype=int) * int(1e9)
|
| 24 |
+
|
| 25 |
+
dp[0, 0, 0] = 0 # The only known starting point. Nothing matched to nothing.
|
| 26 |
+
for i in range(len(t) + 1): # Go inclusive
|
| 27 |
+
for j in range(len(T) + 1): # Go inclusive
|
| 28 |
+
for q in range(insertions_allowed + 1): # Go inclusive
|
| 29 |
+
if i < len(t):
|
| 30 |
+
# Given matched sequence of t[:i] and T[:j], match token
|
| 31 |
+
# t[i] with following tokens T[j:k].
|
| 32 |
+
for k in range(j, len(T) + 1):
|
| 33 |
+
transform = \
|
| 34 |
+
apply_transformation(t[i], ' '.join(T[j:k]))
|
| 35 |
+
if transform:
|
| 36 |
+
cost = 0
|
| 37 |
+
else:
|
| 38 |
+
cost = cost_function(t[i], ' '.join(T[j:k]))
|
| 39 |
+
current = dp[i, j, q] + cost
|
| 40 |
+
if dp[i + 1, k, 0] > current:
|
| 41 |
+
dp[i + 1, k, 0] = current
|
| 42 |
+
come_from[i + 1, k, 0] = j
|
| 43 |
+
come_from_ins[i + 1, k, 0] = q
|
| 44 |
+
if q < insertions_allowed:
|
| 45 |
+
# Given matched sequence of t[:i] and T[:j], create
|
| 46 |
+
# insertion with following tokens T[j:k].
|
| 47 |
+
for k in range(j, len(T) + 1):
|
| 48 |
+
cost = len(' '.join(T[j:k]))
|
| 49 |
+
current = dp[i, j, q] + cost
|
| 50 |
+
if dp[i, k, q + 1] > current:
|
| 51 |
+
dp[i, k, q + 1] = current
|
| 52 |
+
come_from[i, k, q + 1] = j
|
| 53 |
+
come_from_ins[i, k, q + 1] = q
|
| 54 |
+
|
| 55 |
+
# Solution is in the dp[len(t), len(T), *]. Backtracking from there.
|
| 56 |
+
alignment = []
|
| 57 |
+
i = len(t)
|
| 58 |
+
j = len(T)
|
| 59 |
+
q = dp[i, j, :].argmin()
|
| 60 |
+
while i > 0 or q > 0:
|
| 61 |
+
is_insert = (come_from_ins[i, j, q] != q) and (q != 0)
|
| 62 |
+
j, k, q = come_from[i, j, q], j, come_from_ins[i, j, q]
|
| 63 |
+
if not is_insert:
|
| 64 |
+
i -= 1
|
| 65 |
+
|
| 66 |
+
if is_insert:
|
| 67 |
+
alignment.append(['INSERT', T[j:k], (i, i)])
|
| 68 |
+
else:
|
| 69 |
+
alignment.append([f'REPLACE_{t[i]}', T[j:k], (i, i + 1)])
|
| 70 |
+
|
| 71 |
+
assert j == 0
|
| 72 |
+
|
| 73 |
+
return dp[len(t), len(T)].min(), list(reversed(alignment))
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def _split(token):
|
| 77 |
+
if not token:
|
| 78 |
+
return []
|
| 79 |
+
parts = token.split()
|
| 80 |
+
return parts or [token]
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def apply_merge_transformation(source_tokens, target_words, shift_idx):
|
| 84 |
+
edits = []
|
| 85 |
+
if len(source_tokens) > 1 and len(target_words) == 1:
|
| 86 |
+
# check merge
|
| 87 |
+
transform = check_merge(source_tokens, target_words)
|
| 88 |
+
if transform:
|
| 89 |
+
for i in range(len(source_tokens) - 1):
|
| 90 |
+
edits.append([(shift_idx + i, shift_idx + i + 1), transform])
|
| 91 |
+
return edits
|
| 92 |
+
|
| 93 |
+
if len(source_tokens) == len(target_words) == 2:
|
| 94 |
+
# check swap
|
| 95 |
+
transform = check_swap(source_tokens, target_words)
|
| 96 |
+
if transform:
|
| 97 |
+
edits.append([(shift_idx, shift_idx + 1), transform])
|
| 98 |
+
return edits
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def is_sent_ok(sent, delimeters=SEQ_DELIMETERS):
|
| 102 |
+
for del_val in delimeters.values():
|
| 103 |
+
if del_val in sent and del_val != delimeters["tokens"]:
|
| 104 |
+
return False
|
| 105 |
+
return True
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def check_casetype(source_token, target_token):
|
| 109 |
+
if source_token.lower() != target_token.lower():
|
| 110 |
+
return None
|
| 111 |
+
if source_token.lower() == target_token:
|
| 112 |
+
return "$TRANSFORM_CASE_LOWER"
|
| 113 |
+
elif source_token.capitalize() == target_token:
|
| 114 |
+
return "$TRANSFORM_CASE_CAPITAL"
|
| 115 |
+
elif source_token.upper() == target_token:
|
| 116 |
+
return "$TRANSFORM_CASE_UPPER"
|
| 117 |
+
elif source_token[1:].capitalize() == target_token[1:] and source_token[0] == target_token[0]:
|
| 118 |
+
return "$TRANSFORM_CASE_CAPITAL_1"
|
| 119 |
+
elif source_token[:-1].upper() == target_token[:-1] and source_token[-1] == target_token[-1]:
|
| 120 |
+
return "$TRANSFORM_CASE_UPPER_-1"
|
| 121 |
+
else:
|
| 122 |
+
return None
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def check_equal(source_token, target_token):
|
| 126 |
+
if source_token == target_token:
|
| 127 |
+
return "$KEEP"
|
| 128 |
+
else:
|
| 129 |
+
return None
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def check_split(source_token, target_tokens):
|
| 133 |
+
if source_token.split("-") == target_tokens:
|
| 134 |
+
return "$TRANSFORM_SPLIT_HYPHEN"
|
| 135 |
+
else:
|
| 136 |
+
return None
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def check_merge(source_tokens, target_tokens):
|
| 140 |
+
if "".join(source_tokens) == "".join(target_tokens):
|
| 141 |
+
return "$MERGE_SPACE"
|
| 142 |
+
elif "-".join(source_tokens) == "-".join(target_tokens):
|
| 143 |
+
return "$MERGE_HYPHEN"
|
| 144 |
+
else:
|
| 145 |
+
return None
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def check_swap(source_tokens, target_tokens):
|
| 149 |
+
if source_tokens == [x for x in reversed(target_tokens)]:
|
| 150 |
+
return "$MERGE_SWAP"
|
| 151 |
+
else:
|
| 152 |
+
return None
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def check_plural(source_token, target_token):
|
| 156 |
+
if source_token.endswith("s") and source_token[:-1] == target_token:
|
| 157 |
+
return "$TRANSFORM_AGREEMENT_SINGULAR"
|
| 158 |
+
elif target_token.endswith("s") and source_token == target_token[:-1]:
|
| 159 |
+
return "$TRANSFORM_AGREEMENT_PLURAL"
|
| 160 |
+
else:
|
| 161 |
+
return None
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def check_verb(source_token, target_token):
|
| 165 |
+
encoding = encode_verb_form(source_token, target_token)
|
| 166 |
+
if encoding:
|
| 167 |
+
return f"$TRANSFORM_VERB_{encoding}"
|
| 168 |
+
else:
|
| 169 |
+
return None
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def apply_transformation(source_token, target_token):
|
| 173 |
+
target_tokens = target_token.split()
|
| 174 |
+
if len(target_tokens) > 1:
|
| 175 |
+
# check split
|
| 176 |
+
transform = check_split(source_token, target_tokens)
|
| 177 |
+
if transform:
|
| 178 |
+
return transform
|
| 179 |
+
checks = [check_equal, check_casetype, check_verb, check_plural]
|
| 180 |
+
for check in checks:
|
| 181 |
+
transform = check(source_token, target_token)
|
| 182 |
+
if transform:
|
| 183 |
+
return transform
|
| 184 |
+
return None
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def align_sequences(source_sent, target_sent):
|
| 188 |
+
# check if sent is OK
|
| 189 |
+
if not is_sent_ok(source_sent) or not is_sent_ok(target_sent):
|
| 190 |
+
return None
|
| 191 |
+
source_tokens = source_sent.split()
|
| 192 |
+
target_tokens = target_sent.split()
|
| 193 |
+
matcher = SequenceMatcher(None, source_tokens, target_tokens)
|
| 194 |
+
diffs = list(matcher.get_opcodes())
|
| 195 |
+
all_edits = []
|
| 196 |
+
for diff in diffs:
|
| 197 |
+
tag, i1, i2, j1, j2 = diff
|
| 198 |
+
source_part = _split(" ".join(source_tokens[i1:i2]))
|
| 199 |
+
target_part = _split(" ".join(target_tokens[j1:j2]))
|
| 200 |
+
if tag == 'equal':
|
| 201 |
+
continue
|
| 202 |
+
elif tag == 'delete':
|
| 203 |
+
# delete all words separatly
|
| 204 |
+
for j in range(i2 - i1):
|
| 205 |
+
edit = [(i1 + j, i1 + j + 1), '$DELETE']
|
| 206 |
+
all_edits.append(edit)
|
| 207 |
+
elif tag == 'insert':
|
| 208 |
+
# append to the previous word
|
| 209 |
+
for target_token in target_part:
|
| 210 |
+
edit = ((i1 - 1, i1), f"$APPEND_{target_token}")
|
| 211 |
+
all_edits.append(edit)
|
| 212 |
+
else:
|
| 213 |
+
# check merge first of all
|
| 214 |
+
edits = apply_merge_transformation(source_part, target_part,
|
| 215 |
+
shift_idx=i1)
|
| 216 |
+
if edits:
|
| 217 |
+
all_edits.extend(edits)
|
| 218 |
+
continue
|
| 219 |
+
|
| 220 |
+
# normalize alignments if need (make them singleton)
|
| 221 |
+
_, alignments = perfect_align(source_part, target_part,
|
| 222 |
+
insertions_allowed=0)
|
| 223 |
+
for alignment in alignments:
|
| 224 |
+
new_shift = alignment[2][0]
|
| 225 |
+
edits = convert_alignments_into_edits(alignment,
|
| 226 |
+
shift_idx=i1 + new_shift)
|
| 227 |
+
all_edits.extend(edits)
|
| 228 |
+
|
| 229 |
+
# get labels
|
| 230 |
+
labels = convert_edits_into_labels(source_tokens, all_edits)
|
| 231 |
+
# match tags to source tokens
|
| 232 |
+
sent_with_tags = add_labels_to_the_tokens(source_tokens, labels)
|
| 233 |
+
return sent_with_tags
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
def convert_edits_into_labels(source_tokens, all_edits):
|
| 237 |
+
# make sure that edits are flat
|
| 238 |
+
flat_edits = []
|
| 239 |
+
for edit in all_edits:
|
| 240 |
+
(start, end), edit_operations = edit
|
| 241 |
+
if isinstance(edit_operations, list):
|
| 242 |
+
for operation in edit_operations:
|
| 243 |
+
new_edit = [(start, end), operation]
|
| 244 |
+
flat_edits.append(new_edit)
|
| 245 |
+
elif isinstance(edit_operations, str):
|
| 246 |
+
flat_edits.append(edit)
|
| 247 |
+
else:
|
| 248 |
+
raise Exception("Unknown operation type")
|
| 249 |
+
all_edits = flat_edits[:]
|
| 250 |
+
labels = []
|
| 251 |
+
total_labels = len(source_tokens) + 1
|
| 252 |
+
if not all_edits:
|
| 253 |
+
labels = [["$KEEP"] for x in range(total_labels)]
|
| 254 |
+
else:
|
| 255 |
+
for i in range(total_labels):
|
| 256 |
+
edit_operations = [x[1] for x in all_edits if x[0][0] == i - 1
|
| 257 |
+
and x[0][1] == i]
|
| 258 |
+
if not edit_operations:
|
| 259 |
+
labels.append(["$KEEP"])
|
| 260 |
+
else:
|
| 261 |
+
labels.append(edit_operations)
|
| 262 |
+
return labels
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
def convert_alignments_into_edits(alignment, shift_idx):
|
| 266 |
+
edits = []
|
| 267 |
+
action, target_tokens, new_idx = alignment
|
| 268 |
+
source_token = action.replace("REPLACE_", "")
|
| 269 |
+
|
| 270 |
+
# check if delete
|
| 271 |
+
if not target_tokens:
|
| 272 |
+
edit = [(shift_idx, 1 + shift_idx), "$DELETE"]
|
| 273 |
+
return [edit]
|
| 274 |
+
|
| 275 |
+
# check splits
|
| 276 |
+
for i in range(1, len(target_tokens)):
|
| 277 |
+
target_token = " ".join(target_tokens[:i + 1])
|
| 278 |
+
transform = apply_transformation(source_token, target_token)
|
| 279 |
+
if transform:
|
| 280 |
+
edit = [(shift_idx, shift_idx + 1), transform]
|
| 281 |
+
edits.append(edit)
|
| 282 |
+
target_tokens = target_tokens[i + 1:]
|
| 283 |
+
for target in target_tokens:
|
| 284 |
+
edits.append([(shift_idx, shift_idx + 1), f"$APPEND_{target}"])
|
| 285 |
+
return edits
|
| 286 |
+
|
| 287 |
+
transform_costs = []
|
| 288 |
+
transforms = []
|
| 289 |
+
for target_token in target_tokens:
|
| 290 |
+
transform = apply_transformation(source_token, target_token)
|
| 291 |
+
if transform:
|
| 292 |
+
cost = 0
|
| 293 |
+
transforms.append(transform)
|
| 294 |
+
else:
|
| 295 |
+
cost = Levenshtein.distance(source_token, target_token)
|
| 296 |
+
transforms.append(None)
|
| 297 |
+
transform_costs.append(cost)
|
| 298 |
+
min_cost_idx = transform_costs.index(min(transform_costs))
|
| 299 |
+
# append to the previous word
|
| 300 |
+
for i in range(0, min_cost_idx):
|
| 301 |
+
target = target_tokens[i]
|
| 302 |
+
edit = [(shift_idx - 1, shift_idx), f"$APPEND_{target}"]
|
| 303 |
+
edits.append(edit)
|
| 304 |
+
# replace/transform target word
|
| 305 |
+
transform = transforms[min_cost_idx]
|
| 306 |
+
target = transform if transform is not None \
|
| 307 |
+
else f"$REPLACE_{target_tokens[min_cost_idx]}"
|
| 308 |
+
edit = [(shift_idx, 1 + shift_idx), target]
|
| 309 |
+
edits.append(edit)
|
| 310 |
+
# append to this word
|
| 311 |
+
for i in range(min_cost_idx + 1, len(target_tokens)):
|
| 312 |
+
target = target_tokens[i]
|
| 313 |
+
edit = [(shift_idx, 1 + shift_idx), f"$APPEND_{target}"]
|
| 314 |
+
edits.append(edit)
|
| 315 |
+
return edits
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
def add_labels_to_the_tokens(source_tokens, labels, delimeters=SEQ_DELIMETERS):
|
| 319 |
+
tokens_with_all_tags = []
|
| 320 |
+
source_tokens_with_start = [START_TOKEN] + source_tokens
|
| 321 |
+
for token, label_list in zip(source_tokens_with_start, labels):
|
| 322 |
+
all_tags = delimeters['operations'].join(label_list)
|
| 323 |
+
comb_record = token + delimeters['labels'] + all_tags
|
| 324 |
+
tokens_with_all_tags.append(comb_record)
|
| 325 |
+
return delimeters['tokens'].join(tokens_with_all_tags)
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
def convert_data_from_raw_files(source_file, target_file, output_file, chunk_size):
|
| 329 |
+
tagged = []
|
| 330 |
+
source_data, target_data = read_parallel_lines(source_file, target_file)
|
| 331 |
+
print(f"The size of raw dataset is {len(source_data)}")
|
| 332 |
+
cnt_total, cnt_all, cnt_tp = 0, 0, 0
|
| 333 |
+
for source_sent, target_sent in tqdm(zip(source_data, target_data)):
|
| 334 |
+
try:
|
| 335 |
+
aligned_sent = align_sequences(source_sent, target_sent)
|
| 336 |
+
except Exception:
|
| 337 |
+
aligned_sent = align_sequences(source_sent, target_sent)
|
| 338 |
+
if source_sent != target_sent:
|
| 339 |
+
cnt_tp += 1
|
| 340 |
+
alignments = [aligned_sent]
|
| 341 |
+
cnt_all += len(alignments)
|
| 342 |
+
try:
|
| 343 |
+
check_sent = convert_tagged_line(aligned_sent)
|
| 344 |
+
except Exception:
|
| 345 |
+
# debug mode
|
| 346 |
+
aligned_sent = align_sequences(source_sent, target_sent)
|
| 347 |
+
check_sent = convert_tagged_line(aligned_sent)
|
| 348 |
+
|
| 349 |
+
if "".join(check_sent.split()) != "".join(
|
| 350 |
+
target_sent.split()):
|
| 351 |
+
# do it again for debugging
|
| 352 |
+
aligned_sent = align_sequences(source_sent, target_sent)
|
| 353 |
+
check_sent = convert_tagged_line(aligned_sent)
|
| 354 |
+
print(f"Incorrect pair: \n{target_sent}\n{check_sent}")
|
| 355 |
+
continue
|
| 356 |
+
if alignments:
|
| 357 |
+
cnt_total += len(alignments)
|
| 358 |
+
tagged.extend(alignments)
|
| 359 |
+
if len(tagged) > chunk_size:
|
| 360 |
+
write_lines(output_file, tagged, mode='a')
|
| 361 |
+
tagged = []
|
| 362 |
+
|
| 363 |
+
print(f"Overall extracted {cnt_total}. "
|
| 364 |
+
f"Original TP {cnt_tp}."
|
| 365 |
+
f" Original TN {cnt_all - cnt_tp}")
|
| 366 |
+
if tagged:
|
| 367 |
+
write_lines(output_file, tagged, 'a')
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
def convert_labels_into_edits(labels):
|
| 371 |
+
all_edits = []
|
| 372 |
+
for i, label_list in enumerate(labels):
|
| 373 |
+
if label_list == ["$KEEP"]:
|
| 374 |
+
continue
|
| 375 |
+
else:
|
| 376 |
+
edit = [(i - 1, i), label_list]
|
| 377 |
+
all_edits.append(edit)
|
| 378 |
+
return all_edits
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
def get_target_sent_by_levels(source_tokens, labels):
|
| 382 |
+
relevant_edits = convert_labels_into_edits(labels)
|
| 383 |
+
target_tokens = source_tokens[:]
|
| 384 |
+
leveled_target_tokens = {}
|
| 385 |
+
if not relevant_edits:
|
| 386 |
+
target_sentence = " ".join(target_tokens)
|
| 387 |
+
return leveled_target_tokens, target_sentence
|
| 388 |
+
max_level = max([len(x[1]) for x in relevant_edits])
|
| 389 |
+
for level in range(max_level):
|
| 390 |
+
rest_edits = []
|
| 391 |
+
shift_idx = 0
|
| 392 |
+
for edits in relevant_edits:
|
| 393 |
+
(start, end), label_list = edits
|
| 394 |
+
label = label_list[0]
|
| 395 |
+
target_pos = start + shift_idx
|
| 396 |
+
source_token = target_tokens[target_pos] if target_pos >= 0 else START_TOKEN
|
| 397 |
+
if label == "$DELETE":
|
| 398 |
+
del target_tokens[target_pos]
|
| 399 |
+
shift_idx -= 1
|
| 400 |
+
elif label.startswith("$APPEND_"):
|
| 401 |
+
word = label.replace("$APPEND_", "")
|
| 402 |
+
target_tokens[target_pos + 1: target_pos + 1] = [word]
|
| 403 |
+
shift_idx += 1
|
| 404 |
+
elif label.startswith("$REPLACE_"):
|
| 405 |
+
word = label.replace("$REPLACE_", "")
|
| 406 |
+
target_tokens[target_pos] = word
|
| 407 |
+
elif label.startswith("$TRANSFORM"):
|
| 408 |
+
word = apply_reverse_transformation(source_token, label)
|
| 409 |
+
if word is None:
|
| 410 |
+
word = source_token
|
| 411 |
+
target_tokens[target_pos] = word
|
| 412 |
+
elif label.startswith("$MERGE_"):
|
| 413 |
+
# apply merge only on last stage
|
| 414 |
+
if level == (max_level - 1):
|
| 415 |
+
target_tokens[target_pos + 1: target_pos + 1] = [label]
|
| 416 |
+
shift_idx += 1
|
| 417 |
+
else:
|
| 418 |
+
rest_edit = [(start + shift_idx, end + shift_idx), [label]]
|
| 419 |
+
rest_edits.append(rest_edit)
|
| 420 |
+
rest_labels = label_list[1:]
|
| 421 |
+
if rest_labels:
|
| 422 |
+
rest_edit = [(start + shift_idx, end + shift_idx), rest_labels]
|
| 423 |
+
rest_edits.append(rest_edit)
|
| 424 |
+
|
| 425 |
+
leveled_tokens = target_tokens[:]
|
| 426 |
+
# update next step
|
| 427 |
+
relevant_edits = rest_edits[:]
|
| 428 |
+
if level == (max_level - 1):
|
| 429 |
+
leveled_tokens = replace_merge_transforms(leveled_tokens)
|
| 430 |
+
leveled_labels = convert_edits_into_labels(leveled_tokens,
|
| 431 |
+
relevant_edits)
|
| 432 |
+
leveled_target_tokens[level + 1] = {"tokens": leveled_tokens,
|
| 433 |
+
"labels": leveled_labels}
|
| 434 |
+
|
| 435 |
+
target_sentence = " ".join(leveled_target_tokens[max_level]["tokens"])
|
| 436 |
+
return leveled_target_tokens, target_sentence
|
| 437 |
+
|
| 438 |
+
|
| 439 |
+
def replace_merge_transforms(tokens):
|
| 440 |
+
if all(not x.startswith("$MERGE_") for x in tokens):
|
| 441 |
+
return tokens
|
| 442 |
+
target_tokens = tokens[:]
|
| 443 |
+
allowed_range = (1, len(tokens) - 1)
|
| 444 |
+
for i in range(len(tokens)):
|
| 445 |
+
target_token = tokens[i]
|
| 446 |
+
if target_token.startswith("$MERGE"):
|
| 447 |
+
if target_token.startswith("$MERGE_SWAP") and i in allowed_range:
|
| 448 |
+
target_tokens[i - 1] = tokens[i + 1]
|
| 449 |
+
target_tokens[i + 1] = tokens[i - 1]
|
| 450 |
+
target_tokens[i: i + 1] = []
|
| 451 |
+
target_line = " ".join(target_tokens)
|
| 452 |
+
target_line = target_line.replace(" $MERGE_HYPHEN ", "-")
|
| 453 |
+
target_line = target_line.replace(" $MERGE_SPACE ", "")
|
| 454 |
+
return target_line.split()
|
| 455 |
+
|
| 456 |
+
|
| 457 |
+
def convert_tagged_line(line, delimeters=SEQ_DELIMETERS):
|
| 458 |
+
label_del = delimeters['labels']
|
| 459 |
+
source_tokens = [x.split(label_del)[0]
|
| 460 |
+
for x in line.split(delimeters['tokens'])][1:]
|
| 461 |
+
labels = [x.split(label_del)[1].split(delimeters['operations'])
|
| 462 |
+
for x in line.split(delimeters['tokens'])]
|
| 463 |
+
assert len(source_tokens) + 1 == len(labels)
|
| 464 |
+
levels_dict, target_line = get_target_sent_by_levels(source_tokens, labels)
|
| 465 |
+
return target_line
|
| 466 |
+
|
| 467 |
+
|
| 468 |
+
def main(args):
|
| 469 |
+
convert_data_from_raw_files(args.source, args.target, args.output_file, args.chunk_size)
|
| 470 |
+
|
| 471 |
+
|
| 472 |
+
if __name__ == '__main__':
|
| 473 |
+
parser = argparse.ArgumentParser()
|
| 474 |
+
parser.add_argument('-s', '--source',
|
| 475 |
+
help='Path to the source file',
|
| 476 |
+
required=True)
|
| 477 |
+
parser.add_argument('-t', '--target',
|
| 478 |
+
help='Path to the target file',
|
| 479 |
+
required=True)
|
| 480 |
+
parser.add_argument('-o', '--output_file',
|
| 481 |
+
help='Path to the output file',
|
| 482 |
+
required=True)
|
| 483 |
+
parser.add_argument('--chunk_size',
|
| 484 |
+
type=int,
|
| 485 |
+
help='Dump each chunk size.',
|
| 486 |
+
default=1000000)
|
| 487 |
+
args = parser.parse_args()
|
| 488 |
+
main(args)
|