#!/bin/bash # Copyright 2020 Google and DeepMind. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. REPO=$PWD MODEL=${1:-"xlm-roberta-base"} STAGE=${2:-1} GPU=${3:-0} DATA_DIR=${4:-"$REPO/download/"} OUT_DIR=${5:-"$REPO/outputs/"} SEED=${6:-1} export CUDA_VISIBLE_DEVICES=$GPU TASK='tydiqa' MODEL_PATH=$DATA_DIR/$MODEL TRANSLATION_PATH=$DATA_DIR/xtreme_translations/TyDiQA-GoldP/translate-train/ MAXL=384 LANGS="en,ar,bn,fi,id,ko,ru,sw,te" BSR=0.3 SA=0.3 SNBS=-1 R1_LAMBDA=5.0 R2_LAMBDA=0.3 if [ $MODEL == "xlm-roberta-large" ]; then BATCH_SIZE=4 GRAD_ACC=8 LR=1.5e-5 EPOCH=10 MAX_STEPS=2500 else BATCH_SIZE=32 GRAD_ACC=1 LR=3e-5 EPOCH=20 MAX_STEPS=5000 fi if [ $STAGE == 1 ]; then OUTPUT_DIR="${OUT_DIR}/${TASK}/${MODEL}-LR${LR}-epoch${EPOCH}-MaxLen${MAXL}-SS-bsr${BSR}-sa${SA}-snbs${SNBS}-R1_LAMBDA${R1_LAMBDA}/" python ./src/run_qa.py --model_type xlmr \ --task_name $TASK \ --model_name_or_path $MODEL_PATH \ --do_train \ --do_eval \ --language $LANGS \ --train_language en \ --data_dir $DATA_DIR/$TASK/ \ --per_gpu_train_batch_size $BATCH_SIZE \ --gradient_accumulation_steps $GRAD_ACC \ --per_gpu_eval_batch_size 128 \ --learning_rate $LR \ --num_train_epochs $EPOCH \ --save_steps 0 \ --logging_each_epoch \ --max_seq_length $MAXL \ --doc_stride 128 \ --output_dir $OUTPUT_DIR \ --overwrite_output_dir \ --evaluate_during_training \ --logging_steps 50 \ --evaluate_steps 0 \ --seed $SEED \ --fp16 --fp16_opt_level O2 \ --warmup_steps -1 \ --enable_r1_loss \ --r1_lambda $R1_LAMBDA \ --original_loss \ --overall_ratio 1.0 \ --keep_boundary_unchanged \ --enable_bpe_sampling \ --bpe_sampling_ratio $BSR \ --sampling_alpha $SA \ --sampling_nbest_size $SNBS \ --noised_max_seq_length $MAXL elif [ $STAGE == 2 ]; then FIRST_STAGE_MODEL_PATH="${OUT_DIR}/${TASK}/${MODEL}-LR${LR}-epoch${EPOCH}-MaxLen${MAXL}-SS-bsr${BSR}-sa${SA}-snbs${SNBS}-R1_LAMBDA${R1_LAMBDA}/" OUTPUT_DIR="${OUT_DIR}/${TASK}/${MODEL}-LR${LR}-epoch${EPOCH}-MaxLen${MAXL}-SS-bsr${BSR}-sa${SA}-snbs${SNBS}-R1_Lambda${R1_LAMBDA}-Aug1.0-MT-R2_Lambda${R2_LAMBDA}/" python ./src/run_qa.py --model_type xlmr \ --task_name $TASK \ --model_name_or_path $MODEL_PATH \ --do_train \ --do_eval \ --language $LANGS \ --train_language en \ --data_dir $DATA_DIR/$TASK/ \ --per_gpu_train_batch_size $BATCH_SIZE \ --gradient_accumulation_steps $GRAD_ACC \ --per_gpu_eval_batch_size 128 \ --learning_rate $LR \ --num_train_epochs $EPOCH \ --save_steps 0 \ --logging_each_epoch \ --max_seq_length $MAXL \ --doc_stride 128 \ --output_dir $OUTPUT_DIR \ --overwrite_output_dir \ --evaluate_during_training \ --logging_steps 50 \ --evaluate_steps 0 \ --seed $SEED \ --fp16 --fp16_opt_level O2 \ --warmup_steps -1 \ --enable_r1_loss \ --r1_lambda $R1_LAMBDA \ --original_loss \ --overall_ratio 1.0 \ --keep_boundary_unchanged \ --enable_bpe_sampling \ --bpe_sampling_ratio $BSR \ --sampling_alpha $SA \ --sampling_nbest_size $SNBS \ --noised_max_seq_length $MAXL \ --enable_data_augmentation \ --augment_ratio 1.0 \ --augment_method mt \ --translation_path $TRANSLATION_PATH \ --max_steps $MAX_STEPS \ --r2_lambda $R2_LAMBDA \ --first_stage_model_path $FIRST_STAGE_MODEL_PATH fi