#!/bin/bash # Copyright 2020 Google and DeepMind. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. REPO=$PWD MODEL=${1:-"xlm-roberta-base"} STAGE=${2:-1} GPU=${3:-0} DATA_DIR=${4:-"$REPO/download/"} OUT_DIR=${5:-"$REPO/outputs/"} SEED=${6:-1} export CUDA_VISIBLE_DEVICES=$GPU TASK='udpos' MODEL_PATH=$DATA_DIR/$MODEL EPOCH=10 MAX_LENGTH=128 LANGS="af,ar,bg,de,el,en,es,et,eu,fa,fi,fr,he,hi,hu,id,it,ja,kk,ko,mr,nl,pt,ru,ta,te,th,tl,tr,ur,vi,yo,zh" EVALUATE_STEPS=500 BSR=0.5 SA=0.3 SNBS=-1 R1_LAMBDA=5.0 R2_LAMBDA=0.3 if [ $MODEL == "xlm-roberta-large" ]; then BATCH_SIZE=32 GRAD_ACC=1 LR=5e-6 else BATCH_SIZE=32 GRAD_ACC=1 LR=2e-5 fi TRANSLATION_PATH=$DATA_DIR/xtreme_translations/translate_train.udpos.txt DATA_DIR=$DATA_DIR/$TASK/${TASK}_processed_maxlen${MAX_LENGTH}/ if [ $STAGE == 1 ]; then OUTPUT_DIR="${OUT_DIR}/${TASK}/${MODEL}-LR${LR}-epoch${EPOCH}-MaxLen${MAXL}-SS-bsr${BSR}-sa${SA}-snbs${SNBS}-R1_LAMBDA${R1_LAMBDA}/" python src/run_tag.py --model_type xlmr \ --model_name_or_path $MODEL_PATH \ --do_train \ --do_eval \ --do_predict \ --do_predict_dev \ --predict_langs $LANGS \ --train_langs en \ --data_dir $DATA_DIR \ --labels $DATA_DIR/labels.txt \ --per_gpu_train_batch_size $BATCH_SIZE \ --gradient_accumulation_steps $GRAD_ACC \ --per_gpu_eval_batch_size 128 \ --learning_rate $LR \ --num_train_epochs $EPOCH \ --max_seq_length $MAX_LENGTH \ --noised_max_seq_length $MAX_LENGTH \ --output_dir $OUTPUT_DIR \ --overwrite_output_dir \ --evaluate_during_training \ --logging_steps 50 \ --evaluate_steps $EVALUATE_STEPS \ --seed $SEED \ --warmup_steps -1 \ --save_only_best_checkpoint \ --eval_all_checkpoints \ --eval_patience -1 \ --fp16 --fp16_opt_level O2 \ --hidden_dropout_prob 0.1 \ --original_loss \ --use_pooling_strategy \ --enable_r1_loss \ --r1_lambda $R1_LAMBDA \ --use_token_label_probs \ --enable_bpe_sampling \ --bpe_sampling_ratio $BSR \ --sampling_alpha $SA \ --sampling_nbest_size $SNBS elif [ $STAGE == 2 ]; then FIRST_STAGE_MODEL_PATH="${OUT_DIR}/${TASK}/${MODEL}-LR${LR}-epoch${EPOCH}-MaxLen${MAXL}-SS-bsr${BSR}-sa${SA}-snbs${SNBS}-R1_LAMBDA${R1_LAMBDA}/checkpoint-best" OUTPUT_DIR="${OUT_DIR}/${TASK}/${MODEL}-LR${LR}-epoch${EPOCH}-MaxLen${MAXL}-SS-bsr${BSR}-sa${SA}-snbs${SNBS}-R1_Lambda${R1_LAMBDA}-Aug1.0-MT-R2_Lambda${R2_LAMBDA}/" python src/run_tag.py --model_type xlmr \ --model_name_or_path $MODEL_PATH \ --do_train \ --do_eval \ --do_predict \ --do_predict_dev \ --predict_langs $LANGS \ --train_langs en \ --data_dir $DATA_DIR \ --labels $DATA_DIR/labels.txt \ --per_gpu_train_batch_size $BATCH_SIZE \ --gradient_accumulation_steps $GRAD_ACC \ --per_gpu_eval_batch_size 128 \ --learning_rate $LR \ --num_train_epochs $EPOCH \ --max_seq_length $MAX_LENGTH \ --noised_max_seq_length $MAX_LENGTH \ --output_dir $OUTPUT_DIR \ --overwrite_output_dir \ --evaluate_during_training \ --logging_steps 50 \ --evaluate_steps $EVALUATE_STEPS \ --seed $SEED \ --warmup_steps -1 \ --save_only_best_checkpoint \ --eval_all_checkpoints \ --eval_patience -1 \ --fp16 --fp16_opt_level O2 \ --hidden_dropout_prob 0.1 \ --original_loss \ --use_pooling_strategy \ --enable_r1_loss \ --r1_lambda $R1_LAMBDA \ --use_token_label_probs \ --enable_bpe_sampling \ --bpe_sampling_ratio $BSR \ --sampling_alpha $SA \ --sampling_nbest_size $SNBS \ --enable_data_augmentation \ --augment_ratio 1.0 \ --augment_method mt \ --translation_path $TRANSLATION_PATH \ --r2_lambda $R2_LAMBDA \ --first_stage_model_path $FIRST_STAGE_MODEL_PATH fi