| In this case, a list of | |
| configurations must be inserted with the benchmark args as follows. | |
| from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments, BertConfig | |
| args = PyTorchBenchmarkArguments( | |
| models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512] | |
| ) | |
| config_base = BertConfig() | |
| config_384_hid = BertConfig(hidden_size=384) | |
| config_6_lay = BertConfig(num_hidden_layers=6) | |
| benchmark = PyTorchBenchmark(args, configs=[config_base, config_384_hid, config_6_lay]) | |
| benchmark.run() | |
| ==================== INFERENCE - SPEED - RESULT ==================== | |
| Model Name Batch Size Seq Length Time in s | |
| bert-base 8 128 0.006 | |
| bert-base 8 512 0.006 | |
| bert-base 8 128 0.018 | |
| bert-base 8 512 0.088 | |
| bert-384-hid 8 8 0.006 | |
| bert-384-hid 8 32 0.006 | |
| bert-384-hid 8 128 0.011 | |
| bert-384-hid 8 512 0.054 | |
| bert-6-lay 8 8 0.003 | |
| bert-6-lay 8 32 0.004 | |
| bert-6-lay 8 128 0.009 | |
| bert-6-lay 8 512 0.044 | |
| ==================== INFERENCE - MEMORY - RESULT ==================== | |
| Model Name Batch Size Seq Length Memory in MB | |
| bert-base 8 8 1277 | |
| bert-base 8 32 1281 | |
| bert-base 8 128 1307 | |
| bert-base 8 512 1539 | |
| bert-384-hid 8 8 1005 | |
| bert-384-hid 8 32 1027 | |
| bert-384-hid 8 128 1035 | |
| bert-384-hid 8 512 1255 | |
| bert-6-lay 8 8 1097 | |
| bert-6-lay 8 32 1101 | |
| bert-6-lay 8 128 1127 | |
| bert-6-lay 8 512 1359 | |
| ==================== ENVIRONMENT INFORMATION ==================== | |
| transformers_version: 2.11.0 | |
| framework: PyTorch | |
| use_torchscript: False | |
| framework_version: 1.4.0 | |
| python_version: 3.6.10 | |
| system: Linux | |
| cpu: x86_64 | |
| architecture: 64bit | |
| date: 2020-06-29 | |
| time: 09:35:25.143267 | |
| fp16: False | |
| use_multiprocessing: True | |
| only_pretrain_model: False | |
| cpu_ram_mb: 32088 | |
| use_gpu: True | |
| num_gpus: 1 | |
| gpu: TITAN RTX | |
| gpu_ram_mb: 24217 | |
| gpu_power_watts: 280.0 | |
| gpu_performance_state: 2 | |
| use_tpu: False | |
| </pt> | |
| <tf>py | |
| from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments, BertConfig | |
| args = TensorFlowBenchmarkArguments( | |
| models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512] | |
| ) | |
| config_base = BertConfig() | |
| config_384_hid = BertConfig(hidden_size=384) | |
| config_6_lay = BertConfig(num_hidden_layers=6) | |
| benchmark = TensorFlowBenchmark(args, configs=[config_base, config_384_hid, config_6_lay]) | |
| benchmark.run() | |
| ==================== INFERENCE - SPEED - RESULT ==================== | |
| Model Name Batch Size Seq Length Time in s | |
| bert-base 8 8 0.005 | |
| bert-base 8 32 0.008 | |
| bert-base 8 128 0.022 | |
| bert-base 8 512 0.106 | |
| bert-384-hid 8 8 0.005 | |
| bert-384-hid 8 32 0.007 | |
| bert-384-hid 8 128 0.018 | |
| bert-384-hid 8 512 0.064 | |
| bert-6-lay 8 8 0.002 | |
| bert-6-lay 8 32 0.003 | |
| bert-6-lay 8 128 0.0011 | |
| bert-6-lay 8 512 0.074 | |
| ==================== INFERENCE - MEMORY - RESULT ==================== | |
| Model Name Batch Size Seq Length Memory in MB | |
| bert-base 8 8 1330 | |
| bert-base 8 32 1330 | |
| bert-base 8 128 1330 | |
| bert-base 8 512 1770 | |
| bert-384-hid 8 8 1330 | |
| bert-384-hid 8 32 1330 | |
| bert-384-hid 8 128 1330 | |
| bert-384-hid 8 512 1540 | |
| bert-6-lay 8 8 1330 | |
| bert-6-lay 8 32 1330 | |
| bert-6-lay 8 128 1330 | |
| bert-6-lay 8 512 1540 | |
| ==================== ENVIRONMENT INFORMATION ==================== | |
| transformers_version: 2.11.0 | |
| framework: Tensorflow | |
| use_xla: False | |
| framework_version: 2.2.0 | |
| python_version: 3.6.10 | |
| system: Linux | |
| cpu: x86_64 | |
| architecture: 64bit | |
| date: 2020-06-29 | |
| time: 09:38:15.487125 | |
| fp16: False | |
| use_multiprocessing: True | |
| only_pretrain_model: False | |
| cpu_ram_mb: 32088 | |
| use_gpu: True | |
| num_gpus: 1 | |
| gpu: TITAN RTX | |
| gpu_ram_mb: 24217 | |
| gpu_power_watts: 280.0 | |
| gpu_performance_state: 2 | |
| use_tpu: False | |
| Again, inference time and required memory for inference are measured, but this time for customized configurations | |
| of the BertModel class. |