{"max_batch_size": 24, "max_beam_width": 1, "max_input_len": 4, "max_output_len": 448, "world_size": 1, "dtype": "float16", "quantize_dir": "quantize/1-gpu", "use_gpt_attention_plugin": "float16", "use_bert_attention_plugin": null, "use_context_fmha_enc": false, "use_context_fmha_dec": false, "use_gemm_plugin": "float16", "use_layernorm_plugin": false, "remove_input_padding": false, "use_weight_only_enc": false, "use_weight_only_dec": false, "weight_only_precision": "int8", "int8_kv_cache": false, "debug_mode": false, "cuda_compute_capability": [9, 0], "output_dir": "/root/.cache/whisper_s2t/models/trt/large-v2/2afd60b3d1286616bdea7dc13d15e906", "model_path": "/root/.cache/whisper_s2t/models/trt/large-v2/pt_ckpt.pt"} |