diff --git "a/profile_trace/iteration_137216/rank3_trace.json" "b/profile_trace/iteration_137216/rank3_trace.json" new file mode 100644--- /dev/null +++ "b/profile_trace/iteration_137216/rank3_trace.json" @@ -0,0 +1,83385 @@ + +{ + "schemaVersion": 1, + "deviceProperties": [ + { + "id": 0, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 1, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 2, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 3, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 4, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 5, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 6, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 7, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + } + ], + "cupti_version": 22, + "cuda_runtime_version": 12040, + "cuda_driver_version": 12080, + "distributedInfo": {"backend": "nccl", "rank": 3, "world_size": 8, "pg_count": 1, "pg_config": [{"pg_name": "0", "pg_desc": "default_pg", "backend_config": "cuda:nccl", "pg_size": 8, "ranks": [0, 1, 2, 3, 4, 5, 6, 7]}], "nccl_version": "2.21.5"}, + "record_shapes": 1, + "trace_id": "3CA079C6E4DE4E9AA8EAB8FEF061307A", + "traceEvents": [ + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595854417.715, "dur": 123.021, + "args": { + "External id": 3289601,"Record function id": 0, "Sequence number": 32987334, "Fwd thread id": 1, "Ev Idx": 0 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595854445.641, "dur": 85.766, + "args": { + "External id": 3289602,"Sequence number": 32987334, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 1 + } + }, + { + "ph": "f", "id": 1, "pid": 1336756, "tid": 1381189, "ts": 1587595854445.641, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 1336756, "tid": 1381189, + "ts": 1587595854453.878, "dur": 75.443, + "args": { + "External id": 3289603,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 2 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595854557.004, "dur": 211.292, + "args": { + "External id": 3289604,"Record function id": 0, "Ev Idx": 3 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595854609.184, "dur": 88.517, + "args": { + "External id": 3289605,"Record function id": 0, "Ev Idx": 4 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.27", "pid": 1336756, "tid": 1381189, + "ts": 1587595854640.425, "dur": 46.144, + "args": { + "External id": 3289606,"Record function id": 0, "Ev Idx": 5 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595854702.542, "dur": 1.986, + "args": { + "External id": 3289607,"Sequence number": 32987333, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 6 + } + }, + { + "ph": "f", "id": 2, "pid": 1336756, "tid": 1381189, "ts": 1587595854702.542, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595854708.521, "dur": 54.741, + "args": { + "External id": 3289608,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 7 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595854717.607, "dur": 45.082, + "args": { + "External id": 3289609,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 8 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595854726.625, "dur": 3.421, + "args": { + "External id": 3289610,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595854776.670, "dur": 36339.756, + "args": { + "External id": 3289611,"Record function id": 0, "Sequence number": 32987331, "Fwd thread id": 1, "Ev Idx": 10 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595854778.884, "dur": 36323.154, + "args": { + "External id": 3289612,"Sequence number": 32987331, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11 + } + }, + { + "ph": "f", "id": 3, "pid": 1336756, "tid": 1381189, "ts": 1587595854778.884, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595854816.958, "dur": 3.784, + "args": { + "External id": 3289613,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587595854826.408, "dur": 36106.018, + "args": { + "External id": 3289614,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587595854828.585, "dur": 36103.216, + "args": { + "External id": 3289615,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 14 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595854833.468, "dur": 9.421, + "args": { + "External id": 3289616,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595854844.532, "dur": 36083.481, + "args": { + "External id": 3289617,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 1336756, "tid": 1381189, + "ts": 1587595890940.855, "dur": 0.685, + "args": { + "External id": 3289618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 17 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 1336756, "tid": 1381189, + "ts": 1587595890944.487, "dur": 4.087, + "args": { + "External id": 3289619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 18 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 1336756, "tid": 1381189, + "ts": 1587595890946.880, "dur": 1.495, + "args": { + "External id": 3289620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336756, "tid": 1381189, + "ts": 1587595890956.621, "dur": 67.631, + "args": { + "External id": 3289621,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 20 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 1336756, "tid": 1381189, + "ts": 1587595891035.556, "dur": 51.103, + "args": { + "External id": 3289622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336756, "tid": 1381189, + "ts": 1587595891037.497, "dur": 48.916, + "args": { + "External id": 3289623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 22 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336756, "tid": 1381189, + "ts": 1587595891039.284, "dur": 46.800, + "args": { + "External id": 3289624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 23 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595891132.959, "dur": 21.510, + "args": { + "External id": 3289625,"Record function id": 0, "Sequence number": 32987330, "Fwd thread id": 1, "Ev Idx": 24 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595891135.446, "dur": 16.206, + "args": { + "External id": 3289626,"Sequence number": 32987330, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 25 + } + }, + { + "ph": "f", "id": 4, "pid": 1336756, "tid": 1381189, "ts": 1587595891135.446, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595891140.297, "dur": 11.110, + "args": { + "External id": 3289627,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 26 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595891144.500, "dur": 6.734, + "args": { + "External id": 3289628,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 27 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595891158.582, "dur": 101.475, + "args": { + "External id": 3289629,"Record function id": 0, "Sequence number": 32987329, "Fwd thread id": 1, "Ev Idx": 28 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595891159.638, "dur": 92.986, + "args": { + "External id": 3289630,"Sequence number": 32987329, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 29 + } + }, + { + "ph": "f", "id": 5, "pid": 1336756, "tid": 1381189, "ts": 1587595891159.638, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595891163.324, "dur": 88.793, + "args": { + "External id": 3289631,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 30 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595891169.074, "dur": 37.358, + "args": { + "External id": 3289632,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 31 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595891173.952, "dur": 5.345, + "args": { + "External id": 3289633,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 32 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595891180.724, "dur": 25.402, + "args": { + "External id": 3289634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 33 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595891185.860, "dur": 19.813, + "args": { + "External id": 3289635,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 34 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587595891208.920, "dur": 4.673, + "args": { + "External id": 3289636,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 35 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595891211.756, "dur": 1.500, + "args": { + "External id": 3289637,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[8388608, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 36 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595891215.116, "dur": 35.858, + "args": { + "External id": 3289638,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 37 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595891266.807, "dur": 66.210, + "args": { + "External id": 3289639,"Record function id": 0, "Sequence number": 32987328, "Fwd thread id": 1, "Ev Idx": 38 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595891267.958, "dur": 61.334, + "args": { + "External id": 3289640,"Sequence number": 32987328, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 39 + } + }, + { + "ph": "f", "id": 6, "pid": 1336756, "tid": 1381189, "ts": 1587595891267.958, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595891271.120, "dur": 57.845, + "args": { + "External id": 3289641,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "2", "3"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 40 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595891276.404, "dur": 20.932, + "args": { + "External id": 3289642,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 41 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595891277.838, "dur": 3.051, + "args": { + "External id": 3289643,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 42 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595891281.750, "dur": 15.325, + "args": { + "External id": 3289644,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 43 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595891285.046, "dur": 11.693, + "args": { + "External id": 3289645,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 44 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1381189, + "ts": 1587595891301.748, "dur": 5.578, + "args": { + "External id": 3289646,"Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 45 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595891305.563, "dur": 1.074, + "args": { + "External id": 3289647,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "6144"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 46 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595891307.965, "dur": 20.311, + "args": { + "External id": 3289648,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 47 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595891337.186, "dur": 132.384, + "args": { + "External id": 3289649,"Record function id": 0, "Sequence number": 32987327, "Fwd thread id": 1, "Ev Idx": 48 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595891338.130, "dur": 128.043, + "args": { + "External id": 3289650,"Sequence number": 32987327, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 49 + } + }, + { + "ph": "f", "id": 7, "pid": 1336756, "tid": 1381189, "ts": 1587595891338.130, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595891342.696, "dur": 123.121, + "args": { + "External id": 3289651,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 50 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595891346.240, "dur": 17.265, + "args": { + "External id": 3289652,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 51 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595891346.989, "dur": 2.119, + "args": { + "External id": 3289653,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 52 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595891350.208, "dur": 13.025, + "args": { + "External id": 3289654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 53 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595891351.130, "dur": 11.764, + "args": { + "External id": 3289655,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 54 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587595891367.065, "dur": 4.612, + "args": { + "External id": 3289656,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 55 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595891368.332, "dur": 3.166, + "args": { + "External id": 3289657,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 56 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595891372.853, "dur": 92.231, + "args": { + "External id": 3289658,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 57 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595891473.576, "dur": 102.189, + "args": { + "External id": 3289659,"Record function id": 0, "Sequence number": 32987326, "Fwd thread id": 1, "Ev Idx": 58 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595891474.633, "dur": 98.308, + "args": { + "External id": 3289660,"Sequence number": 32987326, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 59 + } + }, + { + "ph": "f", "id": 8, "pid": 1336756, "tid": 1381189, "ts": 1587595891474.633, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595891476.014, "dur": 96.633, + "args": { + "External id": 3289661,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 60 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595891479.544, "dur": 16.113, + "args": { + "External id": 3289662,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 61 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595891480.527, "dur": 2.202, + "args": { + "External id": 3289663,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 62 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595891483.353, "dur": 12.050, + "args": { + "External id": 3289664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 63 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595891484.684, "dur": 10.417, + "args": { + "External id": 3289665,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 64 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587595891499.141, "dur": 3.993, + "args": { + "External id": 3289666,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 65 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595891502.133, "dur": 0.888, + "args": { + "External id": 3289667,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 66 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595891503.870, "dur": 68.015, + "args": { + "External id": 3289668,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 67 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595891580.229, "dur": 41.242, + "args": { + "External id": 3289669,"Record function id": 0, "Sequence number": 32987325, "Fwd thread id": 1, "Ev Idx": 68 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595891581.440, "dur": 0.938, + "args": { + "External id": 3289670,"Sequence number": 32987325, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 69 + } + }, + { + "ph": "f", "id": 9, "pid": 1336756, "tid": 1381189, "ts": 1587595891581.440, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595891584.993, "dur": 33.290, + "args": { + "External id": 3289671,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 70 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595891587.207, "dur": 30.546, + "args": { + "External id": 3289672,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 71 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595891597.854, "dur": 0.936, + "args": { + "External id": 3289673,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 72 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595891626.429, "dur": 2268.523, + "args": { + "External id": 3289674,"Record function id": 0, "Sequence number": 32987323, "Fwd thread id": 1, "Ev Idx": 73 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595891628.219, "dur": 2223.825, + "args": { + "External id": 3289675,"Sequence number": 32987323, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 74 + } + }, + { + "ph": "f", "id": 10, "pid": 1336756, "tid": 1381189, "ts": 1587595891628.219, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595891669.731, "dur": 3.340, + "args": { + "External id": 3289676,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 75 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587595891675.592, "dur": 2085.510, + "args": { + "External id": 3289677,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 76 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587595891677.560, "dur": 2083.180, + "args": { + "External id": 3289678,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 77 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595891680.401, "dur": 5.415, + "args": { + "External id": 3289679,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 78 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595891689.471, "dur": 2070.003, + "args": { + "External id": 3289680,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 79 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 1336756, "tid": 1381189, + "ts": 1587595893764.668, "dur": 0.506, + "args": { + "External id": 3289681,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 80 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 1336756, "tid": 1381189, + "ts": 1587595893766.361, "dur": 2.745, + "args": { + "External id": 3289682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 81 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 1336756, "tid": 1381189, + "ts": 1587595893767.902, "dur": 1.071, + "args": { + "External id": 3289683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 82 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336756, "tid": 1381189, + "ts": 1587595893773.541, "dur": 23.386, + "args": { + "External id": 3289684,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 83 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 1336756, "tid": 1381189, + "ts": 1587595893802.402, "dur": 41.495, + "args": { + "External id": 3289685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 84 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336756, "tid": 1381189, + "ts": 1587595893803.713, "dur": 39.980, + "args": { + "External id": 3289686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 85 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336756, "tid": 1381189, + "ts": 1587595893804.799, "dur": 38.560, + "args": { + "External id": 3289687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 86 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595893860.724, "dur": 29.897, + "args": { + "External id": 3289688,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 87 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595893903.543, "dur": 17.989, + "args": { + "External id": 3289689,"Record function id": 0, "Sequence number": 32987322, "Fwd thread id": 1, "Ev Idx": 88 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595893908.086, "dur": 11.080, + "args": { + "External id": 3289690,"Sequence number": 32987322, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 89 + } + }, + { + "ph": "f", "id": 11, "pid": 1336756, "tid": 1381189, "ts": 1587595893908.086, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595893913.495, "dur": 5.455, + "args": { + "External id": 3289691,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 90 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595893914.852, "dur": 3.994, + "args": { + "External id": 3289692,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 91 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595893924.821, "dur": 95.391, + "args": { + "External id": 3289693,"Record function id": 0, "Sequence number": 32987321, "Fwd thread id": 1, "Ev Idx": 92 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595893925.723, "dur": 88.381, + "args": { + "External id": 3289694,"Sequence number": 32987321, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 93 + } + }, + { + "ph": "f", "id": 12, "pid": 1336756, "tid": 1381189, "ts": 1587595893925.723, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595893927.401, "dur": 86.304, + "args": { + "External id": 3289695,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 94 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595893931.889, "dur": 23.198, + "args": { + "External id": 3289696,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 95 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595893933.302, "dur": 5.491, + "args": { + "External id": 3289697,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 96 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595893939.604, "dur": 15.220, + "args": { + "External id": 3289698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 97 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595893940.694, "dur": 13.703, + "args": { + "External id": 3289699,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 98 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587595893956.303, "dur": 2.645, + "args": { + "External id": 3289700,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 99 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595893957.834, "dur": 0.924, + "args": { + "External id": 3289701,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[8388608, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595893962.548, "dur": 50.053, + "args": { + "External id": 3289702,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595894026.352, "dur": 61.452, + "args": { + "External id": 3289703,"Record function id": 0, "Sequence number": 32987320, "Fwd thread id": 1, "Ev Idx": 102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595894027.463, "dur": 57.371, + "args": { + "External id": 3289704,"Sequence number": 32987320, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 103 + } + }, + { + "ph": "f", "id": 13, "pid": 1336756, "tid": 1381189, "ts": 1587595894027.463, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595894032.299, "dur": 52.324, + "args": { + "External id": 3289705,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "2", "2"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595894034.237, "dur": 22.235, + "args": { + "External id": 3289706,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595894035.364, "dur": 3.058, + "args": { + "External id": 3289707,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595894038.986, "dur": 17.225, + "args": { + "External id": 3289708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595894039.952, "dur": 15.908, + "args": { + "External id": 3289709,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1381189, + "ts": 1587595894057.910, "dur": 9.116, + "args": { + "External id": 3289710,"Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595894065.347, "dur": 1.071, + "args": { + "External id": 3289711,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595894067.935, "dur": 16.152, + "args": { + "External id": 3289712,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595894091.743, "dur": 93.825, + "args": { + "External id": 3289713,"Record function id": 0, "Sequence number": 32987319, "Fwd thread id": 1, "Ev Idx": 112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595894092.865, "dur": 89.763, + "args": { + "External id": 3289714,"Sequence number": 32987319, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 113 + } + }, + { + "ph": "f", "id": 14, "pid": 1336756, "tid": 1381189, "ts": 1587595894092.865, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595894094.490, "dur": 87.901, + "args": { + "External id": 3289715,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595894095.656, "dur": 18.964, + "args": { + "External id": 3289716,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595894096.765, "dur": 2.266, + "args": { + "External id": 3289717,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595894099.637, "dur": 14.708, + "args": { + "External id": 3289718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595894100.820, "dur": 13.157, + "args": { + "External id": 3289719,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587595894115.817, "dur": 4.437, + "args": { + "External id": 3289720,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595894119.158, "dur": 0.886, + "args": { + "External id": 3289721,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595894120.853, "dur": 60.889, + "args": { + "External id": 3289722,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595894194.131, "dur": 92.572, + "args": { + "External id": 3289723,"Record function id": 0, "Sequence number": 32987318, "Fwd thread id": 1, "Ev Idx": 122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595894195.064, "dur": 72.829, + "args": { + "External id": 3289724,"Sequence number": 32987318, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 123 + } + }, + { + "ph": "f", "id": 15, "pid": 1336756, "tid": 1381189, "ts": 1587595894195.064, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595894199.244, "dur": 68.362, + "args": { + "External id": 3289725,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595894199.959, "dur": 16.401, + "args": { + "External id": 3289726,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595894200.846, "dur": 2.052, + "args": { + "External id": 3289727,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595894203.493, "dur": 12.635, + "args": { + "External id": 3289728,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595894204.466, "dur": 11.311, + "args": { + "External id": 3289729,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587595894217.385, "dur": 1.923, + "args": { + "External id": 3289730,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595894218.593, "dur": 0.585, + "args": { + "External id": 3289731,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595894219.778, "dur": 47.280, + "args": { + "External id": 3289732,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595894271.900, "dur": 13.653, + "args": { + "External id": 3289733,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595894293.707, "dur": 34.101, + "args": { + "External id": 3289734,"Record function id": 0, "Sequence number": 32987317, "Fwd thread id": 1, "Ev Idx": 133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595894294.617, "dur": 0.787, + "args": { + "External id": 3289735,"Sequence number": 32987317, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 134 + } + }, + { + "ph": "f", "id": 16, "pid": 1336756, "tid": 1381189, "ts": 1587595894294.617, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595894297.959, "dur": 27.642, + "args": { + "External id": 3289736,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595894299.617, "dur": 25.564, + "args": { + "External id": 3289737,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595894308.253, "dur": 0.812, + "args": { + "External id": 3289738,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595894332.316, "dur": 3062.438, + "args": { + "External id": 3289739,"Record function id": 0, "Sequence number": 32987315, "Fwd thread id": 1, "Ev Idx": 138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595894336.087, "dur": 3032.538, + "args": { + "External id": 3289740,"Sequence number": 32987315, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 139 + } + }, + { + "ph": "f", "id": 17, "pid": 1336756, "tid": 1381189, "ts": 1587595894336.087, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595894367.164, "dur": 2.687, + "args": { + "External id": 3289741,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587595894372.199, "dur": 2908.032, + "args": { + "External id": 3289742,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587595894373.683, "dur": 2906.153, + "args": { + "External id": 3289743,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595894375.827, "dur": 3.075, + "args": { + "External id": 3289744,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595894379.869, "dur": 2898.885, + "args": { + "External id": 3289745,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 1336756, "tid": 1381189, + "ts": 1587595897283.409, "dur": 0.472, + "args": { + "External id": 3289746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897285.212, "dur": 4.674, + "args": { + "External id": 3289747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897288.886, "dur": 0.843, + "args": { + "External id": 3289748,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336756, "tid": 1381189, + "ts": 1587595897293.743, "dur": 20.954, + "args": { + "External id": 3289749,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 1336756, "tid": 1381189, + "ts": 1587595897322.382, "dur": 39.256, + "args": { + "External id": 3289750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336756, "tid": 1381189, + "ts": 1587595897323.719, "dur": 37.702, + "args": { + "External id": 3289751,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336756, "tid": 1381189, + "ts": 1587595897324.652, "dur": 36.479, + "args": { + "External id": 3289752,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897375.527, "dur": 15.973, + "args": { + "External id": 3289753,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595897402.225, "dur": 9.528, + "args": { + "External id": 3289754,"Record function id": 0, "Sequence number": 32987314, "Fwd thread id": 1, "Ev Idx": 153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595897403.339, "dur": 6.307, + "args": { + "External id": 3289755,"Sequence number": 32987314, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 154 + } + }, + { + "ph": "f", "id": 18, "pid": 1336756, "tid": 1381189, "ts": 1587595897403.339, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595897405.262, "dur": 4.182, + "args": { + "External id": 3289756,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595897406.328, "dur": 2.978, + "args": { + "External id": 3289757,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595897415.060, "dur": 66.832, + "args": { + "External id": 3289758,"Record function id": 0, "Sequence number": 32987313, "Fwd thread id": 1, "Ev Idx": 157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595897418.583, "dur": 58.882, + "args": { + "External id": 3289759,"Sequence number": 32987313, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 158 + } + }, + { + "ph": "f", "id": 19, "pid": 1336756, "tid": 1381189, "ts": 1587595897418.583, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595897422.760, "dur": 54.398, + "args": { + "External id": 3289760,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595897425.086, "dur": 21.082, + "args": { + "External id": 3289761,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595897426.693, "dur": 2.507, + "args": { + "External id": 3289762,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897430.129, "dur": 15.786, + "args": { + "External id": 3289763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897431.865, "dur": 13.715, + "args": { + "External id": 3289764,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587595897447.519, "dur": 5.092, + "args": { + "External id": 3289765,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595897451.570, "dur": 0.835, + "args": { + "External id": 3289766,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[8388608, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897453.334, "dur": 23.180, + "args": { + "External id": 3289767,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595897485.802, "dur": 53.046, + "args": { + "External id": 3289768,"Record function id": 0, "Sequence number": 32987312, "Fwd thread id": 1, "Ev Idx": 167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595897487.094, "dur": 49.225, + "args": { + "External id": 3289769,"Sequence number": 32987312, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 168 + } + }, + { + "ph": "f", "id": 20, "pid": 1336756, "tid": 1381189, "ts": 1587595897487.094, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595897488.544, "dur": 47.515, + "args": { + "External id": 3289770,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "2", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595897492.059, "dur": 16.742, + "args": { + "External id": 3289771,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595897492.603, "dur": 2.153, + "args": { + "External id": 3289772,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897495.343, "dur": 13.229, + "args": { + "External id": 3289773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897495.992, "dur": 12.176, + "args": { + "External id": 3289774,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1381189, + "ts": 1587595897509.982, "dur": 9.120, + "args": { + "External id": 3289775,"Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595897514.565, "dur": 4.027, + "args": { + "External id": 3289776,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "2048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897520.087, "dur": 15.416, + "args": { + "External id": 3289777,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595897542.422, "dur": 95.104, + "args": { + "External id": 3289778,"Record function id": 0, "Sequence number": 32987311, "Fwd thread id": 1, "Ev Idx": 177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595897543.510, "dur": 90.873, + "args": { + "External id": 3289779,"Sequence number": 32987311, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 178 + } + }, + { + "ph": "f", "id": 21, "pid": 1336756, "tid": 1381189, "ts": 1587595897543.510, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595897545.156, "dur": 88.912, + "args": { + "External id": 3289780,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595897546.279, "dur": 21.424, + "args": { + "External id": 3289781,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595897549.728, "dur": 1.988, + "args": { + "External id": 3289782,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897552.595, "dur": 14.848, + "args": { + "External id": 3289783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897553.549, "dur": 13.510, + "args": { + "External id": 3289784,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587595897568.726, "dur": 4.248, + "args": { + "External id": 3289785,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595897572.160, "dur": 0.678, + "args": { + "External id": 3289786,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897573.675, "dur": 59.776, + "args": { + "External id": 3289787,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595897641.377, "dur": 97.132, + "args": { + "External id": 3289788,"Record function id": 0, "Sequence number": 32987310, "Fwd thread id": 1, "Ev Idx": 187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595897642.502, "dur": 77.936, + "args": { + "External id": 3289789,"Sequence number": 32987310, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 188 + } + }, + { + "ph": "f", "id": 22, "pid": 1336756, "tid": 1381189, "ts": 1587595897642.502, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595897645.819, "dur": 74.323, + "args": { + "External id": 3289790,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595897646.743, "dur": 19.016, + "args": { + "External id": 3289791,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595897649.768, "dur": 1.906, + "args": { + "External id": 3289792,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897652.279, "dur": 13.218, + "args": { + "External id": 3289793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897653.258, "dur": 11.904, + "args": { + "External id": 3289794,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587595897666.647, "dur": 4.019, + "args": { + "External id": 3289795,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595897669.685, "dur": 0.825, + "args": { + "External id": 3289796,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897671.958, "dur": 47.633, + "args": { + "External id": 3289797,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897724.141, "dur": 13.227, + "args": { + "External id": 3289798,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595897742.257, "dur": 31.035, + "args": { + "External id": 3289799,"Record function id": 0, "Sequence number": 32987309, "Fwd thread id": 1, "Ev Idx": 198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595897743.510, "dur": 0.903, + "args": { + "External id": 3289800,"Sequence number": 32987309, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 199 + } + }, + { + "ph": "f", "id": 23, "pid": 1336756, "tid": 1381189, "ts": 1587595897743.510, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595897746.094, "dur": 23.562, + "args": { + "External id": 3289801,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595897748.264, "dur": 21.056, + "args": { + "External id": 3289802,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595897752.671, "dur": 0.407, + "args": { + "External id": 3289803,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595897777.157, "dur": 3134.887, + "args": { + "External id": 3289804,"Record function id": 0, "Sequence number": 32987308, "Fwd thread id": 1, "Ev Idx": 203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595897787.302, "dur": 3091.620, + "args": { + "External id": 3289805,"Sequence number": 32987308, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 204 + } + }, + { + "ph": "f", "id": 24, "pid": 1336756, "tid": 1381189, "ts": 1587595897787.302, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595897813.286, "dur": 1.969, + "args": { + "External id": 3289806,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587595897817.174, "dur": 2970.313, + "args": { + "External id": 3289807,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587595897818.498, "dur": 2968.718, + "args": { + "External id": 3289808,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595897822.841, "dur": 2.999, + "args": { + "External id": 3289809,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595897826.627, "dur": 2959.614, + "args": { + "External id": 3289810,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 1336756, "tid": 1381189, + "ts": 1587595900790.628, "dur": 0.455, + "args": { + "External id": 3289811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 1336756, "tid": 1381189, + "ts": 1587595900792.081, "dur": 1.639, + "args": { + "External id": 3289812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 1336756, "tid": 1381189, + "ts": 1587595900792.998, "dur": 0.609, + "args": { + "External id": 3289813,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336756, "tid": 1381189, + "ts": 1587595900796.732, "dur": 22.904, + "args": { + "External id": 3289814,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 1336756, "tid": 1381189, + "ts": 1587595900824.239, "dur": 39.270, + "args": { + "External id": 3289815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336756, "tid": 1381189, + "ts": 1587595900825.514, "dur": 37.794, + "args": { + "External id": 3289816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336756, "tid": 1381189, + "ts": 1587595900826.444, "dur": 36.413, + "args": { + "External id": 3289817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595900892.330, "dur": 15.876, + "args": { + "External id": 3289818,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595900924.587, "dur": 12.549, + "args": { + "External id": 3289819,"Record function id": 0, "Ev Idx": 218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595900927.045, "dur": 8.243, + "args": { + "External id": 3289820,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595900930.358, "dur": 3.798, + "args": { + "External id": 3289821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595900931.086, "dur": 2.972, + "args": { + "External id": 3289822,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595900942.884, "dur": 11.203, + "args": { + "External id": 3289823,"Record function id": 0, "Sequence number": 32987307, "Fwd thread id": 1, "Ev Idx": 222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595900944.081, "dur": 7.931, + "args": { + "External id": 3289824,"Sequence number": 32987307, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 223 + } + }, + { + "ph": "f", "id": 25, "pid": 1336756, "tid": 1381189, "ts": 1587595900944.081, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595900945.966, "dur": 5.850, + "args": { + "External id": 3289825,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595900949.034, "dur": 2.653, + "args": { + "External id": 3289826,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595900957.034, "dur": 95.371, + "args": { + "External id": 3289827,"Record function id": 0, "Sequence number": 32987306, "Fwd thread id": 1, "Ev Idx": 226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595900958.003, "dur": 89.201, + "args": { + "External id": 3289828,"Sequence number": 32987306, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 227 + } + }, + { + "ph": "f", "id": 26, "pid": 1336756, "tid": 1381189, "ts": 1587595900958.003, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595900959.754, "dur": 87.061, + "args": { + "External id": 3289829,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595900964.236, "dur": 48.549, + "args": { + "External id": 3289830,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595900965.482, "dur": 2.641, + "args": { + "External id": 3289831,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595900969.106, "dur": 43.392, + "args": { + "External id": 3289832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595900970.440, "dur": 41.280, + "args": { + "External id": 3289833,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587595901014.902, "dur": 5.515, + "args": { + "External id": 3289834,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595901019.134, "dur": 1.040, + "args": { + "External id": 3289835,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[8388608, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595901021.197, "dur": 24.798, + "args": { + "External id": 3289836,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595901057.666, "dur": 55.583, + "args": { + "External id": 3289837,"Record function id": 0, "Sequence number": 32987305, "Fwd thread id": 1, "Ev Idx": 236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595901058.814, "dur": 52.255, + "args": { + "External id": 3289838,"Sequence number": 32987305, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 237 + } + }, + { + "ph": "f", "id": 27, "pid": 1336756, "tid": 1381189, "ts": 1587595901058.814, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595901060.588, "dur": 50.213, + "args": { + "External id": 3289839,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "2", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595901062.533, "dur": 26.371, + "args": { + "External id": 3289840,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595901070.214, "dur": 2.825, + "args": { + "External id": 3289841,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595901073.869, "dur": 14.786, + "args": { + "External id": 3289842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595901075.663, "dur": 12.594, + "args": { + "External id": 3289843,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1381189, + "ts": 1587595901089.847, "dur": 4.227, + "args": { + "External id": 3289844,"Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595901092.672, "dur": 0.919, + "args": { + "External id": 3289845,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595901094.948, "dur": 15.287, + "args": { + "External id": 3289846,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595901116.869, "dur": 97.013, + "args": { + "External id": 3289847,"Record function id": 0, "Sequence number": 32987304, "Fwd thread id": 1, "Ev Idx": 246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595901118.055, "dur": 92.758, + "args": { + "External id": 3289848,"Sequence number": 32987304, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 247 + } + }, + { + "ph": "f", "id": 28, "pid": 1336756, "tid": 1381189, "ts": 1587595901118.055, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595901119.541, "dur": 90.999, + "args": { + "External id": 3289849,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595901122.699, "dur": 20.613, + "args": { + "External id": 3289850,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595901123.698, "dur": 1.976, + "args": { + "External id": 3289851,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595901129.163, "dur": 13.896, + "args": { + "External id": 3289852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595901130.357, "dur": 12.307, + "args": { + "External id": 3289853,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587595901144.504, "dur": 4.215, + "args": { + "External id": 3289854,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595901145.652, "dur": 2.882, + "args": { + "External id": 3289855,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595901149.623, "dur": 60.396, + "args": { + "External id": 3289856,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595901217.770, "dur": 101.111, + "args": { + "External id": 3289857,"Record function id": 0, "Sequence number": 32987303, "Fwd thread id": 1, "Ev Idx": 256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595901218.962, "dur": 81.954, + "args": { + "External id": 3289858,"Sequence number": 32987303, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 257 + } + }, + { + "ph": "f", "id": 29, "pid": 1336756, "tid": 1381189, "ts": 1587595901218.962, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595901220.282, "dur": 80.370, + "args": { + "External id": 3289859,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], [], []], "Ev Idx": 258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1381189, + "ts": 1587595901221.081, "dur": 29.237, + "args": { + "External id": 3289860,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595901226.000, "dur": 1.661, + "args": { + "External id": 3289861,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 4, 2048]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1381189, + "ts": 1587595901231.922, "dur": 18.140, + "args": { + "External id": 3289862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1381189, + "ts": 1587595901236.937, "dur": 12.579, + "args": { + "External id": 3289863,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], []], "Ev Idx": 262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587595901251.162, "dur": 2.073, + "args": { + "External id": 3289864,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595901252.400, "dur": 0.707, + "args": { + "External id": 3289865,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595901253.927, "dur": 46.122, + "args": { + "External id": 3289866,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595901304.679, "dur": 12.622, + "args": { + "External id": 3289867,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [33554432, 8192, 2048, 1], []], "Input Dims": [[16, 4096, 4, 2048], [16, 4096, 4, 2048], []], "Ev Idx": 266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595901323.931, "dur": 351.891, + "args": { + "External id": 3289868,"Record function id": 0, "Sequence number": 32987302, "Fwd thread id": 1, "Ev Idx": 267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595901325.501, "dur": 342.041, + "args": { + "External id": 3289869,"Sequence number": 32987302, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 268 + } + }, + { + "ph": "f", "id": 30, "pid": 1336756, "tid": 1381189, "ts": 1587595901325.501, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587595901497.169, "dur": 41.976, + "args": { + "External id": 3289870,"kernel_hash": "coj6y6gy5rvprivwlbvxp47673cukqlcygvp5cpbrgwb4yp2fdt6", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "262144", "2048", "1", "1986", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/oj/coj6y6gy5rvprivwlbvxp47673cukqlcygvp5cpbrgwb4yp2fdt6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[262144, 2048], [2048], [262144, 2048], [262144, 2048], [132, 2048], [262144], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 1336756, "tid": 1381189, + "ts": 1587595901577.800, "dur": 28.789, + "args": { + "External id": 3289871,"kernel_hash": "crljzlkpbfozvboy2akosvc2vfrg5cb56xolbkj7jk22iz5mrjtx", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/rl/crljzlkpbfozvboy2akosvc2vfrg5cb56xolbkj7jk22iz5mrjtx.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 1336756, "tid": 1381189, + "ts": 1587595901623.400, "dur": 19.976, + "args": { + "External id": 3289872,"kernel_hash": "cwikyxkzux4tjcfntxriioajhoy22fwo4vn7g7obctqs5aqrtd3v", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/wi/cwikyxkzux4tjcfntxriioajhoy22fwo4vn7g7obctqs5aqrtd3v.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595901685.152, "dur": 12.660, + "args": { + "External id": 3289873,"Record function id": 0, "Ev Idx": 272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595901687.094, "dur": 9.787, + "args": { + "External id": 3289874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595901689.749, "dur": 6.292, + "args": { + "External id": 3289875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595901693.201, "dur": 2.741, + "args": { + "External id": 3289876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: StackBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595901701.782, "dur": 35.380, + "args": { + "External id": 3289877,"Record function id": 0, "Sequence number": 32987301, "Fwd thread id": 1, "Ev Idx": 276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "StackBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595901702.736, "dur": 26.328, + "args": { + "External id": 3289878,"Sequence number": 32987301, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 2048, 1]], "Input Dims": [[16, 4096, 4, 2048]], "Ev Idx": 277 + } + }, + { + "ph": "f", "id": 31, "pid": 1336756, "tid": 1381189, "ts": 1587595901702.736, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1381189, + "ts": 1587595901705.000, "dur": 6.637, + "args": { + "External id": 3289879,"Record function id": 0, "Concrete Inputs": ["", "-2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595901708.926, "dur": 1.181, + "args": { + "External id": 3289880,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1381189, + "ts": 1587595901712.272, "dur": 7.213, + "args": { + "External id": 3289881,"Record function id": 0, "Concrete Inputs": ["", "-2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595901718.151, "dur": 0.687, + "args": { + "External id": 3289882,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "2048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1381189, + "ts": 1587595901720.056, "dur": 2.957, + "args": { + "External id": 3289883,"Record function id": 0, "Concrete Inputs": ["", "-2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595901721.480, "dur": 0.863, + "args": { + "External id": 3289884,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1381189, + "ts": 1587595901723.476, "dur": 4.887, + "args": { + "External id": 3289885,"Record function id": 0, "Concrete Inputs": ["", "-2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595901727.054, "dur": 0.601, + "args": { + "External id": 3289886,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "6144"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595901741.337, "dur": 5.285, + "args": { + "External id": 3289887,"Record function id": 0, "Sequence number": 32987300, "Fwd thread id": 1, "Ev Idx": 286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595901742.542, "dur": 1.248, + "args": { + "External id": 3289888,"Sequence number": 32987300, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 287 + } + }, + { + "ph": "f", "id": 32, "pid": 1336756, "tid": 1381189, "ts": 1587595901742.542, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595901750.650, "dur": 589.831, + "args": { + "External id": 3289889,"Record function id": 0, "Sequence number": 32987299, "Fwd thread id": 1, "Ev Idx": 288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595901751.844, "dur": 575.979, + "args": { + "External id": 3289890,"Sequence number": 32987299, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 289 + } + }, + { + "ph": "f", "id": 33, "pid": 1336756, "tid": 1381189, "ts": 1587595901751.844, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595901790.706, "dur": 12.442, + "args": { + "External id": 3289891,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595901798.424, "dur": 4.424, + "args": { + "External id": 3289892,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]", "[8192, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[33554432, 8192, 1], [], []], "Input Dims": [[16, 4096, 2048], [], []], "Ev Idx": 291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595901807.939, "dur": 6.297, + "args": { + "External id": 3289893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595901810.508, "dur": 2.726, + "args": { + "External id": 3289894,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595901812.188, "dur": 0.808, + "args": { + "External id": 3289895,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1381189, + "ts": 1587595901820.256, "dur": 149.893, + "args": { + "External id": 3289896,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8192, 1], [1, 5632], []], "Input Dims": [[65536, 2048], [5632, 2048], []], "Ev Idx": 295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595901821.572, "dur": 7.729, + "args": { + "External id": 3289897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 5632]], "Input Dims": [[5632, 2048]], "Ev Idx": 296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595901822.352, "dur": 6.202, + "args": { + "External id": 3289898,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 5632], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595901825.039, "dur": 3.451, + "args": { + "External id": 3289899,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[5632, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 5632], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1381189, + "ts": 1587595901830.748, "dur": 138.520, + "args": { + "External id": 3289900,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595901832.729, "dur": 134.942, + "args": { + "External id": 3289901,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1381189, + "ts": 1587595901976.857, "dur": 43.722, + "args": { + "External id": 3289902,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [23068672, 5632, 1]], "Input Dims": [[65536, 5632], [16, 4096, 5632]], "Ev Idx": 301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595902016.485, "dur": 3.685, + "args": { + "External id": 3289903,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595902054.395, "dur": 6.193, + "args": { + "External id": 3289904,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595902061.639, "dur": 4.897, + "args": { + "External id": 3289905,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595902069.202, "dur": 1.425, + "args": { + "External id": 3289906,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595902109.856, "dur": 2.668, + "args": { + "External id": 3289907,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595902110.776, "dur": 1.572, + "args": { + "External id": 3289908,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 1336756, "tid": 1381189, + "ts": 1587595902144.827, "dur": 159.676, + "args": { + "External id": 3289909,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[8192, 1], [5632, 1]], []], "Input Dims": [[], [[65536, 2048], [65536, 5632]], []], "Ev Idx": 308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1381189, + "ts": 1587595902152.257, "dur": 9.513, + "args": { + "External id": 3289910,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595902158.132, "dur": 1.222, + "args": { + "External id": 3289911,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048, 1]", "[8192, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595902163.504, "dur": 9.158, + "args": { + "External id": 3289912,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8192, 1, 1], []], "Input Dims": [[65536, 2048, 1], []], "Ev Idx": 311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595902168.917, "dur": 3.069, + "args": { + "External id": 3289913,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 65536]", "[1, 1, 8192]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[65536, 2048, 1], [], [], []], "Ev Idx": 312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1381189, + "ts": 1587595902174.609, "dur": 4.649, + "args": { + "External id": 3289914,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595902178.142, "dur": 0.750, + "args": { + "External id": 3289915,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595902180.030, "dur": 4.663, + "args": { + "External id": 3289916,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595902183.726, "dur": 0.537, + "args": { + "External id": 3289917,"Record function id": 0, "Concrete Inputs": ["", "[1, 5632, 65536]", "[1, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1, 1], [], [], []], "Input Dims": [[65536, 5632, 1], [], [], []], "Ev Idx": 316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595902188.899, "dur": 4.568, + "args": { + "External id": 3289918,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 8192], []], "Input Dims": [[2048, 1, 65536], []], "Ev Idx": 317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595902192.550, "dur": 0.631, + "args": { + "External id": 3289919,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536, 1]", "[1, 8192, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 8192], [], [], []], "Input Dims": [[2048, 1, 65536], [], [], []], "Ev Idx": 318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595902197.208, "dur": 7.925, + "args": { + "External id": 3289920,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 8192, 1], []], "Input Dims": [[2048, 65536, 1], []], "Ev Idx": 319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595902202.770, "dur": 2.182, + "args": { + "External id": 3289921,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]", "[2048, 1, 8192]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 8192, 1], [], []], "Input Dims": [[2048, 65536, 1], [], []], "Ev Idx": 320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595902205.866, "dur": 4.067, + "args": { + "External id": 3289922,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 5632], []], "Input Dims": [[1, 5632, 65536], []], "Ev Idx": 321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595902209.070, "dur": 0.554, + "args": { + "External id": 3289923,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 5632], [], [], []], "Input Dims": [[1, 5632, 65536], [], [], []], "Ev Idx": 322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595902210.578, "dur": 2.227, + "args": { + "External id": 3289924,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595902211.380, "dur": 1.326, + "args": { + "External id": 3289925,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587595902214.478, "dur": 74.635, + "args": { + "External id": 3289926,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1, 8192], [369098752, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632]], "Ev Idx": 325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595902291.046, "dur": 3.926, + "args": { + "External id": 3289927,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 5632, 1], []], "Input Dims": [[1, 2048, 5632], []], "Ev Idx": 326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595902297.653, "dur": 2.517, + "args": { + "External id": 3289928,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 5632, 1], []], "Input Dims": [[2048, 1, 5632], []], "Ev Idx": 327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595902298.957, "dur": 0.696, + "args": { + "External id": 3289929,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632, 1]", "[5632, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 5632, 1], [], [], []], "Input Dims": [[2048, 1, 5632], [], [], []], "Ev Idx": 328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595902302.236, "dur": 1.203, + "args": { + "External id": 3289930,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 5632], []], "Input Dims": [[2048, 5632, 1], []], "Ev Idx": 329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595902353.345, "dur": 11.048, + "args": { + "External id": 3289931,"Record function id": 0, "Ev Idx": 330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595902355.670, "dur": 7.794, + "args": { + "External id": 3289932,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595902358.299, "dur": 4.240, + "args": { + "External id": 3289933,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595902359.880, "dur": 2.534, + "args": { + "External id": 3289934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902368.332, "dur": 9.742, + "args": { + "External id": 3289935,"Record function id": 0, "Sequence number": 32987298, "Fwd thread id": 1, "Ev Idx": 334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902369.510, "dur": 6.516, + "args": { + "External id": 3289936,"Sequence number": 32987298, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 335 + } + }, + { + "ph": "f", "id": 34, "pid": 1336756, "tid": 1381189, "ts": 1587595902369.510, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595902373.472, "dur": 2.316, + "args": { + "External id": 3289937,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595902374.596, "dur": 1.059, + "args": { + "External id": 3289938,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902381.743, "dur": 165.612, + "args": { + "External id": 3289939,"Record function id": 0, "Sequence number": 32987297, "Fwd thread id": 1, "Ev Idx": 338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902382.903, "dur": 157.714, + "args": { + "External id": 3289940,"Sequence number": 32987297, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 339 + } + }, + { + "ph": "f", "id": 35, "pid": 1336756, "tid": 1381189, "ts": 1587595902382.903, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595902389.412, "dur": 4.677, + "args": { + "External id": 3289941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595902390.872, "dur": 2.633, + "args": { + "External id": 3289942,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595902392.444, "dur": 0.791, + "args": { + "External id": 3289943,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595902395.395, "dur": 56.296, + "args": { + "External id": 3289944,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595902452.796, "dur": 6.363, + "args": { + "External id": 3289945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595902453.762, "dur": 4.798, + "args": { + "External id": 3289946,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595902456.983, "dur": 1.415, + "args": { + "External id": 3289947,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595902460.651, "dur": 4.964, + "args": { + "External id": 3289948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595902461.955, "dur": 3.218, + "args": { + "External id": 3289949,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595902464.548, "dur": 0.531, + "args": { + "External id": 3289950,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595902466.279, "dur": 73.381, + "args": { + "External id": 3289951,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902552.325, "dur": 6.442, + "args": { + "External id": 3289952,"Record function id": 0, "Sequence number": 32987296, "Fwd thread id": 1, "Ev Idx": 351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902553.290, "dur": 3.393, + "args": { + "External id": 3289953,"Sequence number": 32987296, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 352 + } + }, + { + "ph": "f", "id": 36, "pid": 1336756, "tid": 1381189, "ts": 1587595902553.290, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595902554.526, "dur": 2.000, + "args": { + "External id": 3289954,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595902555.266, "dur": 1.122, + "args": { + "External id": 3289955,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902562.197, "dur": 12.590, + "args": { + "External id": 3289956,"Record function id": 0, "Sequence number": 32987295, "Fwd thread id": 1, "Ev Idx": 355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902566.592, "dur": 6.021, + "args": { + "External id": 3289957,"Sequence number": 32987295, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 356 + } + }, + { + "ph": "f", "id": 37, "pid": 1336756, "tid": 1381189, "ts": 1587595902566.592, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595902567.457, "dur": 4.932, + "args": { + "External id": 3289958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595902568.182, "dur": 3.749, + "args": { + "External id": 3289959,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595902571.281, "dur": 0.509, + "args": { + "External id": 3289960,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595902578.557, "dur": 5.230, + "args": { + "External id": 3289961,"Record function id": 0, "Ev Idx": 360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595902579.990, "dur": 3.325, + "args": { + "External id": 3289962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595902581.091, "dur": 1.883, + "args": { + "External id": 3289963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595902581.709, "dur": 1.132, + "args": { + "External id": 3289964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902586.907, "dur": 7.584, + "args": { + "External id": 3289965,"Record function id": 0, "Sequence number": 32987294, "Fwd thread id": 1, "Ev Idx": 364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902587.741, "dur": 4.405, + "args": { + "External id": 3289966,"Sequence number": 32987294, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 365 + } + }, + { + "ph": "f", "id": 38, "pid": 1336756, "tid": 1381189, "ts": 1587595902587.741, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595902588.982, "dur": 3.014, + "args": { + "External id": 3289967,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595902591.231, "dur": 0.625, + "args": { + "External id": 3289968,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902599.872, "dur": 140.106, + "args": { + "External id": 3289969,"Record function id": 0, "Sequence number": 32987293, "Fwd thread id": 1, "Ev Idx": 368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902600.569, "dur": 131.380, + "args": { + "External id": 3289970,"Sequence number": 32987293, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 369 + } + }, + { + "ph": "f", "id": 39, "pid": 1336756, "tid": 1381189, "ts": 1587595902600.569, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595902602.969, "dur": 2.772, + "args": { + "External id": 3289971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595902603.687, "dur": 1.664, + "args": { + "External id": 3289972,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595902604.645, "dur": 0.611, + "args": { + "External id": 3289973,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595902608.918, "dur": 45.137, + "args": { + "External id": 3289974,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595902655.136, "dur": 6.490, + "args": { + "External id": 3289975,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595902655.825, "dur": 5.285, + "args": { + "External id": 3289976,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595902659.886, "dur": 1.110, + "args": { + "External id": 3289977,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595902662.653, "dur": 8.584, + "args": { + "External id": 3289978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595902663.704, "dur": 7.126, + "args": { + "External id": 3289979,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595902667.802, "dur": 2.965, + "args": { + "External id": 3289980,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595902671.982, "dur": 59.233, + "args": { + "External id": 3289981,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902744.368, "dur": 39.083, + "args": { + "External id": 3289982,"Record function id": 0, "Sequence number": 32987292, "Fwd thread id": 1, "Ev Idx": 381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902745.531, "dur": 6.231, + "args": { + "External id": 3289983,"Sequence number": 32987292, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 382 + } + }, + { + "ph": "f", "id": 40, "pid": 1336756, "tid": 1381189, "ts": 1587595902745.531, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595902746.877, "dur": 4.731, + "args": { + "External id": 3289984,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595902750.370, "dur": 1.123, + "args": { + "External id": 3289985,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1381189, + "ts": 1587595902754.801, "dur": 25.542, + "args": { + "External id": 3289986,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902787.180, "dur": 10.805, + "args": { + "External id": 3289987,"Record function id": 0, "Sequence number": 32987291, "Fwd thread id": 1, "Ev Idx": 386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595902788.452, "dur": 7.660, + "args": { + "External id": 3289988,"Sequence number": 32987291, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 387 + } + }, + { + "ph": "f", "id": 41, "pid": 1336756, "tid": 1381189, "ts": 1587595902788.452, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595902789.664, "dur": 6.226, + "args": { + "External id": 3289989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595902793.673, "dur": 1.776, + "args": { + "External id": 3289990,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595902794.760, "dur": 0.589, + "args": { + "External id": 3289991,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595902801.856, "dur": 5.182, + "args": { + "External id": 3289992,"Record function id": 0, "Ev Idx": 391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595902803.084, "dur": 3.436, + "args": { + "External id": 3289993,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595902804.104, "dur": 1.950, + "args": { + "External id": 3289994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595902804.694, "dur": 1.267, + "args": { + "External id": 3289995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595902810.841, "dur": 493.269, + "args": { + "External id": 3289996,"Record function id": 0, "Sequence number": 32987290, "Fwd thread id": 1, "Ev Idx": 395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595902812.386, "dur": 475.556, + "args": { + "External id": 3289997,"Sequence number": 32987290, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 396 + } + }, + { + "ph": "f", "id": 42, "pid": 1336756, "tid": 1381189, "ts": 1587595902812.386, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336756, "tid": 1381189, + "ts": 1587595902835.891, "dur": 32.631, + "args": { + "External id": 3289998,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1381189, + "ts": 1587595902837.382, "dur": 30.944, + "args": { + "External id": 3289999,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595902840.300, "dur": 5.294, + "args": { + "External id": 3290000,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595902842.552, "dur": 2.543, + "args": { + "External id": 3290001,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595902846.934, "dur": 20.914, + "args": { + "External id": 3290002,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595902899.134, "dur": 5.431, + "args": { + "External id": 3290003,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595902902.198, "dur": 2.119, + "args": { + "External id": 3290004,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595902908.457, "dur": 3.684, + "args": { + "External id": 3290005,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595902909.014, "dur": 3.018, + "args": { + "External id": 3290006,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595902926.639, "dur": 3.282, + "args": { + "External id": 3290007,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595902944.365, "dur": 4.159, + "args": { + "External id": 3290008,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595903168.707, "dur": 4.039, + "args": { + "External id": 3290009,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595903177.546, "dur": 37.689, + "args": { + "External id": 3290010,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595903189.892, "dur": 1.027, + "args": { + "External id": 3290011,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587595903220.517, "dur": 29.944, + "args": { + "External id": 3290012,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587595903222.366, "dur": 27.868, + "args": { + "External id": 3290013,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595903226.403, "dur": 4.414, + "args": { + "External id": 3290014,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595903232.174, "dur": 17.606, + "args": { + "External id": 3290015,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1381189, + "ts": 1587595903258.216, "dur": 2.485, + "args": { + "External id": 3290016,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595903259.474, "dur": 1.103, + "args": { + "External id": 3290017,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595903266.636, "dur": 5.503, + "args": { + "External id": 3290018,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595903270.552, "dur": 1.440, + "args": { + "External id": 3290019,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595903274.050, "dur": 4.165, + "args": { + "External id": 3290020,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595903274.803, "dur": 3.306, + "args": { + "External id": 3290021,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595903317.688, "dur": 9.721, + "args": { + "External id": 3290022,"Record function id": 0, "Ev Idx": 421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595903319.542, "dur": 6.969, + "args": { + "External id": 3290023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595903321.911, "dur": 3.629, + "args": { + "External id": 3290024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595903323.394, "dur": 2.052, + "args": { + "External id": 3290025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595903331.251, "dur": 11.323, + "args": { + "External id": 3290026,"Record function id": 0, "Sequence number": 32987289, "Fwd thread id": 1, "Ev Idx": 425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595903332.268, "dur": 7.165, + "args": { + "External id": 3290027,"Sequence number": 32987289, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 426 + } + }, + { + "ph": "f", "id": 43, "pid": 1336756, "tid": 1381189, "ts": 1587595903332.268, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595903337.014, "dur": 2.203, + "args": { + "External id": 3290028,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595903337.899, "dur": 1.198, + "args": { + "External id": 3290029,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595903345.796, "dur": 147.016, + "args": { + "External id": 3290030,"Record function id": 0, "Sequence number": 32987288, "Fwd thread id": 1, "Ev Idx": 429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595903348.751, "dur": 138.913, + "args": { + "External id": 3290031,"Sequence number": 32987288, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 430 + } + }, + { + "ph": "f", "id": 44, "pid": 1336756, "tid": 1381189, "ts": 1587595903348.751, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595903352.404, "dur": 4.502, + "args": { + "External id": 3290032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595903354.087, "dur": 2.266, + "args": { + "External id": 3290033,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595903355.437, "dur": 0.727, + "args": { + "External id": 3290034,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595903357.844, "dur": 59.812, + "args": { + "External id": 3290035,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595903418.973, "dur": 7.166, + "args": { + "External id": 3290036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595903419.696, "dur": 5.912, + "args": { + "External id": 3290037,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595903422.857, "dur": 2.585, + "args": { + "External id": 3290038,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595903427.722, "dur": 5.475, + "args": { + "External id": 3290039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595903429.080, "dur": 3.713, + "args": { + "External id": 3290040,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595903432.186, "dur": 0.505, + "args": { + "External id": 3290041,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595903433.799, "dur": 52.979, + "args": { + "External id": 3290042,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595903497.460, "dur": 11.094, + "args": { + "External id": 3290043,"Record function id": 0, "Sequence number": 32987287, "Fwd thread id": 1, "Ev Idx": 442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595903498.659, "dur": 7.589, + "args": { + "External id": 3290044,"Sequence number": 32987287, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 443 + } + }, + { + "ph": "f", "id": 45, "pid": 1336756, "tid": 1381189, "ts": 1587595903498.659, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595903499.765, "dur": 6.341, + "args": { + "External id": 3290045,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595903502.879, "dur": 3.116, + "args": { + "External id": 3290046,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595903512.184, "dur": 8.841, + "args": { + "External id": 3290047,"Record function id": 0, "Sequence number": 32987286, "Fwd thread id": 1, "Ev Idx": 446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595903513.273, "dur": 6.352, + "args": { + "External id": 3290048,"Sequence number": 32987286, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 447 + } + }, + { + "ph": "f", "id": 46, "pid": 1336756, "tid": 1381189, "ts": 1587595903513.273, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595903514.465, "dur": 4.949, + "args": { + "External id": 3290049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595903515.266, "dur": 3.674, + "args": { + "External id": 3290050,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595903518.237, "dur": 0.624, + "args": { + "External id": 3290051,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595903524.755, "dur": 4.686, + "args": { + "External id": 3290052,"Record function id": 0, "Ev Idx": 451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595903526.054, "dur": 2.911, + "args": { + "External id": 3290053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595903526.935, "dur": 1.768, + "args": { + "External id": 3290054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595903527.654, "dur": 0.900, + "args": { + "External id": 3290055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595903532.311, "dur": 8.596, + "args": { + "External id": 3290056,"Record function id": 0, "Sequence number": 32987285, "Fwd thread id": 1, "Ev Idx": 455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595903535.578, "dur": 3.302, + "args": { + "External id": 3290057,"Sequence number": 32987285, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 456 + } + }, + { + "ph": "f", "id": 47, "pid": 1336756, "tid": 1381189, "ts": 1587595903535.578, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595903536.681, "dur": 2.067, + "args": { + "External id": 3290058,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595903537.242, "dur": 1.373, + "args": { + "External id": 3290059,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595903545.040, "dur": 317.158, + "args": { + "External id": 3290060,"Record function id": 0, "Sequence number": 32987284, "Fwd thread id": 1, "Ev Idx": 459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595903546.449, "dur": 297.689, + "args": { + "External id": 3290061,"Sequence number": 32987284, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 460 + } + }, + { + "ph": "f", "id": 48, "pid": 1336756, "tid": 1381189, "ts": 1587595903546.449, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595903562.672, "dur": 9.830, + "args": { + "External id": 3290062,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595903568.836, "dur": 3.239, + "args": { + "External id": 3290063,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595903574.411, "dur": 3.019, + "args": { + "External id": 3290064,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595903575.466, "dur": 1.766, + "args": { + "External id": 3290065,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595903579.196, "dur": 7.086, + "args": { + "External id": 3290066,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595903582.302, "dur": 3.824, + "args": { + "External id": 3290067,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595903616.530, "dur": 202.398, + "args": { + "External id": 3290068,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595903710.316, "dur": 3.129, + "args": { + "External id": 3290069,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595903717.513, "dur": 3.464, + "args": { + "External id": 3290070,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595903831.231, "dur": 3.578, + "args": { + "External id": 3290071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595903837.847, "dur": 0.838, + "args": { + "External id": 3290072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595903840.547, "dur": 0.774, + "args": { + "External id": 3290073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595903869.015, "dur": 298.749, + "args": { + "External id": 3290074,"Record function id": 0, "Sequence number": 32987283, "Fwd thread id": 1, "Ev Idx": 473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595903891.233, "dur": 267.575, + "args": { + "External id": 3290075,"Sequence number": 32987283, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 474 + } + }, + { + "ph": "f", "id": 49, "pid": 1336756, "tid": 1381189, "ts": 1587595903891.233, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1381189, + "ts": 1587595903913.848, "dur": 53.927, + "args": { + "External id": 3290076,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595903921.184, "dur": 3.889, + "args": { + "External id": 3290077,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595903926.817, "dur": 40.373, + "args": { + "External id": 3290078,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595903977.006, "dur": 39.004, + "args": { + "External id": 3290079,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595903978.601, "dur": 1.931, + "args": { + "External id": 3290080,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595904178.361, "dur": 165.036, + "args": { + "External id": 3290081,"Record function id": 0, "Sequence number": 32987282, "Fwd thread id": 1, "Ev Idx": 480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595904180.500, "dur": 156.044, + "args": { + "External id": 3290082,"Sequence number": 32987282, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 481 + } + }, + { + "ph": "f", "id": 50, "pid": 1336756, "tid": 1381189, "ts": 1587595904180.500, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1381189, + "ts": 1587595904194.189, "dur": 38.792, + "args": { + "External id": 3290083,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595904198.939, "dur": 3.670, + "args": { + "External id": 3290084,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595904203.801, "dur": 28.628, + "args": { + "External id": 3290085,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595904240.321, "dur": 4.595, + "args": { + "External id": 3290086,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595904241.907, "dur": 2.685, + "args": { + "External id": 3290087,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904349.417, "dur": 16.744, + "args": { + "External id": 3290088,"Record function id": 0, "Sequence number": 32987281, "Fwd thread id": 1, "Ev Idx": 487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904353.170, "dur": 10.476, + "args": { + "External id": 3290089,"Sequence number": 32987281, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 488 + } + }, + { + "ph": "f", "id": 51, "pid": 1336756, "tid": 1381189, "ts": 1587595904353.170, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595904355.319, "dur": 8.064, + "args": { + "External id": 3290090,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595904356.686, "dur": 6.482, + "args": { + "External id": 3290091,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904369.419, "dur": 7.493, + "args": { + "External id": 3290092,"Record function id": 0, "Sequence number": 32987280, "Fwd thread id": 1, "Ev Idx": 491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904370.233, "dur": 4.721, + "args": { + "External id": 3290093,"Sequence number": 32987280, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 492 + } + }, + { + "ph": "f", "id": 52, "pid": 1336756, "tid": 1381189, "ts": 1587595904370.233, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595904372.858, "dur": 1.941, + "args": { + "External id": 3290094,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595904373.693, "dur": 1.002, + "args": { + "External id": 3290095,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904380.016, "dur": 8.389, + "args": { + "External id": 3290096,"Record function id": 0, "Sequence number": 32987279, "Fwd thread id": 1, "Ev Idx": 495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904381.075, "dur": 5.339, + "args": { + "External id": 3290097,"Sequence number": 32987279, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 496 + } + }, + { + "ph": "f", "id": 53, "pid": 1336756, "tid": 1381189, "ts": 1587595904381.075, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595904382.223, "dur": 4.053, + "args": { + "External id": 3290098,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595904385.270, "dur": 0.873, + "args": { + "External id": 3290099,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904393.603, "dur": 5.743, + "args": { + "External id": 3290100,"Record function id": 0, "Sequence number": 32987278, "Fwd thread id": 1, "Ev Idx": 499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904394.696, "dur": 2.576, + "args": { + "External id": 3290101,"Sequence number": 32987278, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 500 + } + }, + { + "ph": "f", "id": 54, "pid": 1336756, "tid": 1381189, "ts": 1587595904394.696, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595904395.600, "dur": 1.532, + "args": { + "External id": 3290102,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595904396.076, "dur": 0.973, + "args": { + "External id": 3290103,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904402.759, "dur": 178.439, + "args": { + "External id": 3290104,"Record function id": 0, "Sequence number": 32987277, "Fwd thread id": 1, "Ev Idx": 503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904403.814, "dur": 170.562, + "args": { + "External id": 3290105,"Sequence number": 32987277, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 504 + } + }, + { + "ph": "f", "id": 55, "pid": 1336756, "tid": 1381189, "ts": 1587595904403.814, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595904408.145, "dur": 8.352, + "args": { + "External id": 3290106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595904410.077, "dur": 5.783, + "args": { + "External id": 3290107,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595904414.029, "dur": 1.572, + "args": { + "External id": 3290108,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595904417.931, "dur": 66.780, + "args": { + "External id": 3290109,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595904486.236, "dur": 7.085, + "args": { + "External id": 3290110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595904487.216, "dur": 5.282, + "args": { + "External id": 3290111,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595904491.177, "dur": 1.128, + "args": { + "External id": 3290112,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595904521.166, "dur": 5.121, + "args": { + "External id": 3290113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595904522.325, "dur": 3.354, + "args": { + "External id": 3290114,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595904525.302, "dur": 0.298, + "args": { + "External id": 3290115,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595904526.854, "dur": 46.731, + "args": { + "External id": 3290116,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904586.309, "dur": 6.200, + "args": { + "External id": 3290117,"Record function id": 0, "Sequence number": 32987276, "Fwd thread id": 1, "Ev Idx": 516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904587.198, "dur": 3.315, + "args": { + "External id": 3290118,"Sequence number": 32987276, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 517 + } + }, + { + "ph": "f", "id": 56, "pid": 1336756, "tid": 1381189, "ts": 1587595904587.198, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595904588.747, "dur": 1.603, + "args": { + "External id": 3290119,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595904589.242, "dur": 1.011, + "args": { + "External id": 3290120,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904595.781, "dur": 11.239, + "args": { + "External id": 3290121,"Record function id": 0, "Sequence number": 32987275, "Fwd thread id": 1, "Ev Idx": 520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904596.517, "dur": 8.383, + "args": { + "External id": 3290122,"Sequence number": 32987275, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 521 + } + }, + { + "ph": "f", "id": 57, "pid": 1336756, "tid": 1381189, "ts": 1587595904596.517, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595904599.520, "dur": 5.155, + "args": { + "External id": 3290123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595904600.067, "dur": 4.126, + "args": { + "External id": 3290124,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595904603.737, "dur": 0.363, + "args": { + "External id": 3290125,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595904612.905, "dur": 8.009, + "args": { + "External id": 3290126,"Record function id": 0, "Ev Idx": 525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595904614.278, "dur": 5.968, + "args": { + "External id": 3290127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595904616.557, "dur": 3.317, + "args": { + "External id": 3290128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595904617.477, "dur": 2.266, + "args": { + "External id": 3290129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904626.348, "dur": 8.138, + "args": { + "External id": 3290130,"Record function id": 0, "Sequence number": 32987274, "Fwd thread id": 1, "Ev Idx": 529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904627.087, "dur": 5.142, + "args": { + "External id": 3290131,"Sequence number": 32987274, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 530 + } + }, + { + "ph": "f", "id": 58, "pid": 1336756, "tid": 1381189, "ts": 1587595904627.087, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595904627.881, "dur": 4.171, + "args": { + "External id": 3290132,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595904631.264, "dur": 0.700, + "args": { + "External id": 3290133,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904637.710, "dur": 94.431, + "args": { + "External id": 3290134,"Record function id": 0, "Sequence number": 32987273, "Fwd thread id": 1, "Ev Idx": 533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904638.530, "dur": 87.511, + "args": { + "External id": 3290135,"Sequence number": 32987273, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 534 + } + }, + { + "ph": "f", "id": 59, "pid": 1336756, "tid": 1381189, "ts": 1587595904638.530, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595904640.897, "dur": 4.467, + "args": { + "External id": 3290136,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595904641.352, "dur": 3.597, + "args": { + "External id": 3290137,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595904644.221, "dur": 0.603, + "args": { + "External id": 3290138,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595904645.966, "dur": 29.827, + "args": { + "External id": 3290139,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595904679.386, "dur": 2.734, + "args": { + "External id": 3290140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595904680.123, "dur": 1.505, + "args": { + "External id": 3290141,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595904680.890, "dur": 0.626, + "args": { + "External id": 3290142,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595904683.372, "dur": 5.079, + "args": { + "External id": 3290143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595904684.395, "dur": 3.661, + "args": { + "External id": 3290144,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595904685.195, "dur": 2.783, + "args": { + "External id": 3290145,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595904691.761, "dur": 33.529, + "args": { + "External id": 3290146,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904736.592, "dur": 32.643, + "args": { + "External id": 3290147,"Record function id": 0, "Sequence number": 32987272, "Fwd thread id": 1, "Ev Idx": 546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904737.500, "dur": 4.580, + "args": { + "External id": 3290148,"Sequence number": 32987272, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 547 + } + }, + { + "ph": "f", "id": 60, "pid": 1336756, "tid": 1381189, "ts": 1587595904737.500, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595904740.409, "dur": 1.525, + "args": { + "External id": 3290149,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595904740.954, "dur": 0.845, + "args": { + "External id": 3290150,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1381189, + "ts": 1587595904745.242, "dur": 21.752, + "args": { + "External id": 3290151,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904772.725, "dur": 9.298, + "args": { + "External id": 3290152,"Record function id": 0, "Sequence number": 32987271, "Fwd thread id": 1, "Ev Idx": 551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904773.790, "dur": 6.495, + "args": { + "External id": 3290153,"Sequence number": 32987271, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 552 + } + }, + { + "ph": "f", "id": 61, "pid": 1336756, "tid": 1381189, "ts": 1587595904773.790, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595904774.747, "dur": 5.329, + "args": { + "External id": 3290154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595904775.358, "dur": 4.176, + "args": { + "External id": 3290155,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595904779.019, "dur": 0.428, + "args": { + "External id": 3290156,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595904785.722, "dur": 7.205, + "args": { + "External id": 3290157,"Record function id": 0, "Ev Idx": 556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595904789.262, "dur": 3.103, + "args": { + "External id": 3290158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595904790.382, "dur": 1.520, + "args": { + "External id": 3290159,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595904790.781, "dur": 1.015, + "args": { + "External id": 3290160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904795.964, "dur": 5.590, + "args": { + "External id": 3290161,"Record function id": 0, "Sequence number": 32987270, "Fwd thread id": 1, "Ev Idx": 560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904796.915, "dur": 2.620, + "args": { + "External id": 3290162,"Sequence number": 32987270, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 561 + } + }, + { + "ph": "f", "id": 62, "pid": 1336756, "tid": 1381189, "ts": 1587595904796.915, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595904797.823, "dur": 1.566, + "args": { + "External id": 3290163,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595904798.509, "dur": 0.772, + "args": { + "External id": 3290164,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904804.781, "dur": 110.284, + "args": { + "External id": 3290165,"Record function id": 0, "Sequence number": 32987269, "Fwd thread id": 1, "Ev Idx": 564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904805.414, "dur": 100.880, + "args": { + "External id": 3290166,"Sequence number": 32987269, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 565 + } + }, + { + "ph": "f", "id": 63, "pid": 1336756, "tid": 1381189, "ts": 1587595904805.414, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595904809.758, "dur": 2.192, + "args": { + "External id": 3290167,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595904810.207, "dur": 1.323, + "args": { + "External id": 3290168,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595904810.986, "dur": 0.434, + "args": { + "External id": 3290169,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595904812.438, "dur": 27.992, + "args": { + "External id": 3290170,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595904841.591, "dur": 7.448, + "args": { + "External id": 3290171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595904842.006, "dur": 6.573, + "args": { + "External id": 3290172,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595904848.023, "dur": 0.452, + "args": { + "External id": 3290173,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595904849.975, "dur": 2.013, + "args": { + "External id": 3290174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595904850.562, "dur": 1.020, + "args": { + "External id": 3290175,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595904851.136, "dur": 0.373, + "args": { + "External id": 3290176,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595904852.391, "dur": 52.599, + "args": { + "External id": 3290177,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904921.821, "dur": 29.746, + "args": { + "External id": 3290178,"Record function id": 0, "Sequence number": 32987268, "Fwd thread id": 1, "Ev Idx": 577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904922.843, "dur": 6.563, + "args": { + "External id": 3290179,"Sequence number": 32987268, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 578 + } + }, + { + "ph": "f", "id": 64, "pid": 1336756, "tid": 1381189, "ts": 1587595904922.843, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595904924.421, "dur": 4.808, + "args": { + "External id": 3290180,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595904927.527, "dur": 1.614, + "args": { + "External id": 3290181,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595904931.805, "dur": 17.622, + "args": { + "External id": 3290182,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904955.087, "dur": 10.968, + "args": { + "External id": 3290183,"Record function id": 0, "Sequence number": 32987267, "Fwd thread id": 1, "Ev Idx": 582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595904955.855, "dur": 8.087, + "args": { + "External id": 3290184,"Sequence number": 32987267, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 583 + } + }, + { + "ph": "f", "id": 65, "pid": 1336756, "tid": 1381189, "ts": 1587595904955.855, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595904956.590, "dur": 7.134, + "args": { + "External id": 3290185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595904959.575, "dur": 3.534, + "args": { + "External id": 3290186,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595904962.483, "dur": 0.496, + "args": { + "External id": 3290187,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595904969.927, "dur": 6.998, + "args": { + "External id": 3290188,"Record function id": 0, "Ev Idx": 587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595904971.152, "dur": 5.250, + "args": { + "External id": 3290189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595904972.168, "dur": 3.719, + "args": { + "External id": 3290190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595904974.633, "dur": 1.168, + "args": { + "External id": 3290191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595905014.347, "dur": 373.786, + "args": { + "External id": 3290192,"Record function id": 0, "Sequence number": 32987266, "Fwd thread id": 1, "Ev Idx": 591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595905017.241, "dur": 337.344, + "args": { + "External id": 3290193,"Sequence number": 32987266, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 592 + } + }, + { + "ph": "f", "id": 66, "pid": 1336756, "tid": 1381189, "ts": 1587595905017.241, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595905050.390, "dur": 2.718, + "args": { + "External id": 3290194,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595905050.895, "dur": 1.828, + "args": { + "External id": 3290195,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595905068.915, "dur": 6.628, + "args": { + "External id": 3290196,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595905085.399, "dur": 1.854, + "args": { + "External id": 3290197,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595905247.692, "dur": 2.020, + "args": { + "External id": 3290198,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595905253.841, "dur": 41.524, + "args": { + "External id": 3290199,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595905268.314, "dur": 1.171, + "args": { + "External id": 3290200,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587595905301.154, "dur": 28.964, + "args": { + "External id": 3290201,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587595905302.963, "dur": 26.962, + "args": { + "External id": 3290202,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595905306.445, "dur": 4.067, + "args": { + "External id": 3290203,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595905312.184, "dur": 17.206, + "args": { + "External id": 3290204,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1381189, + "ts": 1587595905336.517, "dur": 2.333, + "args": { + "External id": 3290205,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595905337.549, "dur": 1.174, + "args": { + "External id": 3290206,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595905344.561, "dur": 3.665, + "args": { + "External id": 3290207,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595905346.901, "dur": 1.229, + "args": { + "External id": 3290208,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1381189, + "ts": 1587595905364.537, "dur": 18.144, + "args": { + "External id": 3290209,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595905399.888, "dur": 10.591, + "args": { + "External id": 3290210,"Record function id": 0, "Ev Idx": 609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595905401.923, "dur": 7.794, + "args": { + "External id": 3290211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595905403.998, "dur": 4.757, + "args": { + "External id": 3290212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595905406.881, "dur": 1.722, + "args": { + "External id": 3290213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595905415.836, "dur": 6.101, + "args": { + "External id": 3290214,"Record function id": 0, "Sequence number": 32987265, "Fwd thread id": 1, "Ev Idx": 613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595905417.963, "dur": 1.412, + "args": { + "External id": 3290215,"Sequence number": 32987265, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 614 + } + }, + { + "ph": "f", "id": 67, "pid": 1336756, "tid": 1381189, "ts": 1587595905417.963, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595905425.412, "dur": 409.418, + "args": { + "External id": 3290216,"Record function id": 0, "Sequence number": 32987264, "Fwd thread id": 1, "Ev Idx": 615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595905426.696, "dur": 397.790, + "args": { + "External id": 3290217,"Sequence number": 32987264, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 616 + } + }, + { + "ph": "f", "id": 68, "pid": 1336756, "tid": 1381189, "ts": 1587595905426.696, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595905454.781, "dur": 8.774, + "args": { + "External id": 3290218,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595905459.841, "dur": 3.420, + "args": { + "External id": 3290219,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]", "[8192, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[33554432, 8192, 1], [], []], "Input Dims": [[16, 4096, 2048], [], []], "Ev Idx": 618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595905466.655, "dur": 7.961, + "args": { + "External id": 3290220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595905468.246, "dur": 5.744, + "args": { + "External id": 3290221,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595905473.256, "dur": 0.560, + "args": { + "External id": 3290222,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1381189, + "ts": 1587595905478.550, "dur": 92.336, + "args": { + "External id": 3290223,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8192, 1], [1, 5632], []], "Input Dims": [[65536, 2048], [5632, 2048], []], "Ev Idx": 622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595905479.432, "dur": 4.768, + "args": { + "External id": 3290224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 5632]], "Input Dims": [[5632, 2048]], "Ev Idx": 623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595905480.229, "dur": 3.561, + "args": { + "External id": 3290225,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 5632], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595905480.970, "dur": 2.744, + "args": { + "External id": 3290226,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[5632, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 5632], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1381189, + "ts": 1587595905485.677, "dur": 84.742, + "args": { + "External id": 3290227,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595905490.198, "dur": 79.248, + "args": { + "External id": 3290228,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1381189, + "ts": 1587595905574.750, "dur": 2.771, + "args": { + "External id": 3290229,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [23068672, 5632, 1]], "Input Dims": [[65536, 5632], [16, 4096, 5632]], "Ev Idx": 628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595905575.942, "dur": 1.461, + "args": { + "External id": 3290230,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595905609.312, "dur": 4.163, + "args": { + "External id": 3290231,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595905614.849, "dur": 2.019, + "args": { + "External id": 3290232,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595905617.805, "dur": 1.912, + "args": { + "External id": 3290233,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595905654.669, "dur": 1.819, + "args": { + "External id": 3290234,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595905655.299, "dur": 1.027, + "args": { + "External id": 3290235,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 1336756, "tid": 1381189, + "ts": 1587595905678.632, "dur": 129.011, + "args": { + "External id": 3290236,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[8192, 1], [5632, 1]], []], "Input Dims": [[], [[65536, 2048], [65536, 5632]], []], "Ev Idx": 635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1381189, + "ts": 1587595905683.656, "dur": 4.883, + "args": { + "External id": 3290237,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595905687.056, "dur": 0.618, + "args": { + "External id": 3290238,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048, 1]", "[8192, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595905689.804, "dur": 10.519, + "args": { + "External id": 3290239,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8192, 1, 1], []], "Input Dims": [[65536, 2048, 1], []], "Ev Idx": 638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595905697.360, "dur": 2.328, + "args": { + "External id": 3290240,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 65536]", "[1, 1, 8192]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[65536, 2048, 1], [], [], []], "Ev Idx": 639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1381189, + "ts": 1587595905701.770, "dur": 3.931, + "args": { + "External id": 3290241,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595905704.783, "dur": 0.564, + "args": { + "External id": 3290242,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595905706.467, "dur": 4.345, + "args": { + "External id": 3290243,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595905709.375, "dur": 0.759, + "args": { + "External id": 3290244,"Record function id": 0, "Concrete Inputs": ["", "[1, 5632, 65536]", "[1, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1, 1], [], [], []], "Input Dims": [[65536, 5632, 1], [], [], []], "Ev Idx": 643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595905716.148, "dur": 2.153, + "args": { + "External id": 3290245,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 8192], []], "Input Dims": [[2048, 1, 65536], []], "Ev Idx": 644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595905717.366, "dur": 0.680, + "args": { + "External id": 3290246,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536, 1]", "[1, 8192, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 8192], [], [], []], "Input Dims": [[2048, 1, 65536], [], [], []], "Ev Idx": 645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595905721.159, "dur": 4.851, + "args": { + "External id": 3290247,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 8192, 1], []], "Input Dims": [[2048, 65536, 1], []], "Ev Idx": 646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595905724.340, "dur": 1.493, + "args": { + "External id": 3290248,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]", "[2048, 1, 8192]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 8192, 1], [], []], "Input Dims": [[2048, 65536, 1], [], []], "Ev Idx": 647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595905729.014, "dur": 1.877, + "args": { + "External id": 3290249,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 5632], []], "Input Dims": [[1, 5632, 65536], []], "Ev Idx": 648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595905729.952, "dur": 0.672, + "args": { + "External id": 3290250,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 5632], [], [], []], "Input Dims": [[1, 5632, 65536], [], [], []], "Ev Idx": 649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595905731.540, "dur": 4.978, + "args": { + "External id": 3290251,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595905733.693, "dur": 2.743, + "args": { + "External id": 3290252,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587595905737.849, "dur": 52.739, + "args": { + "External id": 3290253,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1, 8192], [369098752, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632]], "Ev Idx": 652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595905794.617, "dur": 3.132, + "args": { + "External id": 3290254,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 5632, 1], []], "Input Dims": [[1, 2048, 5632], []], "Ev Idx": 653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595905798.650, "dur": 5.021, + "args": { + "External id": 3290255,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 5632, 1], []], "Input Dims": [[2048, 1, 5632], []], "Ev Idx": 654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595905802.514, "dur": 0.667, + "args": { + "External id": 3290256,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632, 1]", "[5632, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 5632, 1], [], [], []], "Input Dims": [[2048, 1, 5632], [], [], []], "Ev Idx": 655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595905805.699, "dur": 0.835, + "args": { + "External id": 3290257,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 5632], []], "Input Dims": [[2048, 5632, 1], []], "Ev Idx": 656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595905843.709, "dur": 7.528, + "args": { + "External id": 3290258,"Record function id": 0, "Ev Idx": 657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595905845.692, "dur": 4.746, + "args": { + "External id": 3290259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595905847.326, "dur": 2.436, + "args": { + "External id": 3290260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595905848.061, "dur": 1.609, + "args": { + "External id": 3290261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595905854.978, "dur": 8.608, + "args": { + "External id": 3290262,"Record function id": 0, "Sequence number": 32987263, "Fwd thread id": 1, "Ev Idx": 661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595905855.815, "dur": 5.789, + "args": { + "External id": 3290263,"Sequence number": 32987263, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 662 + } + }, + { + "ph": "f", "id": 69, "pid": 1336756, "tid": 1381189, "ts": 1587595905855.815, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595905857.204, "dur": 4.240, + "args": { + "External id": 3290264,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595905860.443, "dur": 0.881, + "args": { + "External id": 3290265,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595905866.961, "dur": 195.042, + "args": { + "External id": 3290266,"Record function id": 0, "Sequence number": 32987262, "Fwd thread id": 1, "Ev Idx": 665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595905867.927, "dur": 185.796, + "args": { + "External id": 3290267,"Sequence number": 32987262, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 666 + } + }, + { + "ph": "f", "id": 70, "pid": 1336756, "tid": 1381189, "ts": 1587595905867.927, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595905889.925, "dur": 7.339, + "args": { + "External id": 3290268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595905891.182, "dur": 5.341, + "args": { + "External id": 3290269,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595905895.278, "dur": 0.913, + "args": { + "External id": 3290270,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595905898.492, "dur": 49.434, + "args": { + "External id": 3290271,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595905951.241, "dur": 3.978, + "args": { + "External id": 3290272,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595905951.948, "dur": 2.622, + "args": { + "External id": 3290273,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595905953.427, "dur": 0.999, + "args": { + "External id": 3290274,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595905956.625, "dur": 2.707, + "args": { + "External id": 3290275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595905957.683, "dur": 1.226, + "args": { + "External id": 3290276,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595905958.485, "dur": 0.344, + "args": { + "External id": 3290277,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595905962.139, "dur": 90.081, + "args": { + "External id": 3290278,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906070.032, "dur": 9.582, + "args": { + "External id": 3290279,"Record function id": 0, "Sequence number": 32987261, "Fwd thread id": 1, "Ev Idx": 678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906071.187, "dur": 7.300, + "args": { + "External id": 3290280,"Sequence number": 32987261, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 679 + } + }, + { + "ph": "f", "id": 71, "pid": 1336756, "tid": 1381189, "ts": 1587595906071.187, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595906075.416, "dur": 2.926, + "args": { + "External id": 3290281,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595906076.280, "dur": 1.947, + "args": { + "External id": 3290282,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906083.395, "dur": 6.900, + "args": { + "External id": 3290283,"Record function id": 0, "Sequence number": 32987260, "Fwd thread id": 1, "Ev Idx": 682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906084.203, "dur": 4.189, + "args": { + "External id": 3290284,"Sequence number": 32987260, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 683 + } + }, + { + "ph": "f", "id": 72, "pid": 1336756, "tid": 1381189, "ts": 1587595906084.203, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595906085.147, "dur": 3.023, + "args": { + "External id": 3290285,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595906086.155, "dur": 1.598, + "args": { + "External id": 3290286,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595906086.993, "dur": 0.655, + "args": { + "External id": 3290287,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595906094.479, "dur": 8.099, + "args": { + "External id": 3290288,"Record function id": 0, "Ev Idx": 687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595906095.642, "dur": 6.188, + "args": { + "External id": 3290289,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595906097.284, "dur": 4.308, + "args": { + "External id": 3290290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595906099.950, "dur": 1.552, + "args": { + "External id": 3290291,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906105.817, "dur": 7.103, + "args": { + "External id": 3290292,"Record function id": 0, "Sequence number": 32987259, "Fwd thread id": 1, "Ev Idx": 691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906108.565, "dur": 2.801, + "args": { + "External id": 3290293,"Sequence number": 32987259, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 692 + } + }, + { + "ph": "f", "id": 73, "pid": 1336756, "tid": 1381189, "ts": 1587595906108.565, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595906109.673, "dur": 1.565, + "args": { + "External id": 3290294,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595906110.436, "dur": 0.682, + "args": { + "External id": 3290295,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906115.984, "dur": 122.575, + "args": { + "External id": 3290296,"Record function id": 0, "Sequence number": 32987258, "Fwd thread id": 1, "Ev Idx": 695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906116.678, "dur": 113.456, + "args": { + "External id": 3290297,"Sequence number": 32987258, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 696 + } + }, + { + "ph": "f", "id": 74, "pid": 1336756, "tid": 1381189, "ts": 1587595906116.678, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595906118.484, "dur": 8.075, + "args": { + "External id": 3290298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595906121.070, "dur": 5.071, + "args": { + "External id": 3290299,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595906125.562, "dur": 0.473, + "args": { + "External id": 3290300,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595906127.419, "dur": 38.740, + "args": { + "External id": 3290301,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595906167.317, "dur": 4.906, + "args": { + "External id": 3290302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595906167.992, "dur": 3.744, + "args": { + "External id": 3290303,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595906169.174, "dur": 2.411, + "args": { + "External id": 3290304,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595906175.757, "dur": 7.380, + "args": { + "External id": 3290305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595906176.883, "dur": 5.867, + "args": { + "External id": 3290306,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595906179.937, "dur": 2.740, + "args": { + "External id": 3290307,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595906184.107, "dur": 45.148, + "args": { + "External id": 3290308,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906243.073, "dur": 32.436, + "args": { + "External id": 3290309,"Record function id": 0, "Sequence number": 32987257, "Fwd thread id": 1, "Ev Idx": 708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906244.328, "dur": 4.083, + "args": { + "External id": 3290310,"Sequence number": 32987257, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 709 + } + }, + { + "ph": "f", "id": 75, "pid": 1336756, "tid": 1381189, "ts": 1587595906244.328, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595906245.820, "dur": 2.430, + "args": { + "External id": 3290311,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595906246.672, "dur": 1.450, + "args": { + "External id": 3290312,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1381189, + "ts": 1587595906251.142, "dur": 21.580, + "args": { + "External id": 3290313,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906279.088, "dur": 11.666, + "args": { + "External id": 3290314,"Record function id": 0, "Sequence number": 32987256, "Fwd thread id": 1, "Ev Idx": 713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906282.831, "dur": 6.289, + "args": { + "External id": 3290315,"Sequence number": 32987256, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 714 + } + }, + { + "ph": "f", "id": 76, "pid": 1336756, "tid": 1381189, "ts": 1587595906282.831, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595906283.733, "dur": 5.174, + "args": { + "External id": 3290316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595906284.397, "dur": 4.018, + "args": { + "External id": 3290317,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595906287.714, "dur": 0.592, + "args": { + "External id": 3290318,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595906294.465, "dur": 4.952, + "args": { + "External id": 3290319,"Record function id": 0, "Ev Idx": 718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595906295.679, "dur": 3.241, + "args": { + "External id": 3290320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595906296.878, "dur": 1.578, + "args": { + "External id": 3290321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595906297.519, "dur": 0.838, + "args": { + "External id": 3290322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595906305.153, "dur": 403.380, + "args": { + "External id": 3290323,"Record function id": 0, "Sequence number": 32987255, "Fwd thread id": 1, "Ev Idx": 722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595906306.487, "dur": 370.301, + "args": { + "External id": 3290324,"Sequence number": 32987255, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 723 + } + }, + { + "ph": "f", "id": 77, "pid": 1336756, "tid": 1381189, "ts": 1587595906306.487, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336756, "tid": 1381189, + "ts": 1587595906328.852, "dur": 35.378, + "args": { + "External id": 3290325,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1381189, + "ts": 1587595906330.205, "dur": 33.820, + "args": { + "External id": 3290326,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595906332.795, "dur": 7.017, + "args": { + "External id": 3290327,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595906336.634, "dur": 2.668, + "args": { + "External id": 3290328,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595906341.325, "dur": 22.162, + "args": { + "External id": 3290329,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595906377.059, "dur": 2.141, + "args": { + "External id": 3290330,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595906377.972, "dur": 1.111, + "args": { + "External id": 3290331,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595906382.925, "dur": 3.711, + "args": { + "External id": 3290332,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595906383.405, "dur": 3.137, + "args": { + "External id": 3290333,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595906399.294, "dur": 2.023, + "args": { + "External id": 3290334,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595906411.600, "dur": 2.167, + "args": { + "External id": 3290335,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595906568.538, "dur": 2.063, + "args": { + "External id": 3290336,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595906574.731, "dur": 31.234, + "args": { + "External id": 3290337,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595906583.206, "dur": 0.760, + "args": { + "External id": 3290338,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587595906611.700, "dur": 31.581, + "args": { + "External id": 3290339,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587595906613.657, "dur": 29.404, + "args": { + "External id": 3290340,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595906619.002, "dur": 3.955, + "args": { + "External id": 3290341,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595906626.483, "dur": 15.899, + "args": { + "External id": 3290342,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1381189, + "ts": 1587595906647.347, "dur": 2.626, + "args": { + "External id": 3290343,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595906648.647, "dur": 1.158, + "args": { + "External id": 3290344,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595906655.906, "dur": 2.171, + "args": { + "External id": 3290345,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595906656.925, "dur": 1.044, + "args": { + "External id": 3290346,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595906662.414, "dur": 3.952, + "args": { + "External id": 3290347,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595906663.178, "dur": 3.091, + "args": { + "External id": 3290348,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595906689.883, "dur": 17.305, + "args": { + "External id": 3290349,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595906717.934, "dur": 7.297, + "args": { + "External id": 3290350,"Record function id": 0, "Ev Idx": 749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595906720.010, "dur": 4.424, + "args": { + "External id": 3290351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595906721.510, "dur": 2.180, + "args": { + "External id": 3290352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595906722.376, "dur": 1.220, + "args": { + "External id": 3290353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906728.697, "dur": 6.877, + "args": { + "External id": 3290354,"Record function id": 0, "Sequence number": 32987254, "Fwd thread id": 1, "Ev Idx": 753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906729.594, "dur": 3.643, + "args": { + "External id": 3290355,"Sequence number": 32987254, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 754 + } + }, + { + "ph": "f", "id": 78, "pid": 1336756, "tid": 1381189, "ts": 1587595906729.594, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595906731.013, "dur": 2.043, + "args": { + "External id": 3290356,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595906731.948, "dur": 0.984, + "args": { + "External id": 3290357,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906740.985, "dur": 115.068, + "args": { + "External id": 3290358,"Record function id": 0, "Sequence number": 32987253, "Fwd thread id": 1, "Ev Idx": 757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906741.742, "dur": 108.771, + "args": { + "External id": 3290359,"Sequence number": 32987253, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 758 + } + }, + { + "ph": "f", "id": 79, "pid": 1336756, "tid": 1381189, "ts": 1587595906741.742, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595906743.992, "dur": 5.721, + "args": { + "External id": 3290360,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595906745.141, "dur": 3.996, + "args": { + "External id": 3290361,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595906748.371, "dur": 0.589, + "args": { + "External id": 3290362,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595906750.584, "dur": 48.081, + "args": { + "External id": 3290363,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595906799.779, "dur": 6.596, + "args": { + "External id": 3290364,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595906802.619, "dur": 3.213, + "args": { + "External id": 3290365,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595906803.595, "dur": 2.071, + "args": { + "External id": 3290366,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595906807.662, "dur": 2.947, + "args": { + "External id": 3290367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595906808.965, "dur": 1.192, + "args": { + "External id": 3290368,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595906809.750, "dur": 0.339, + "args": { + "External id": 3290369,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595906811.422, "dur": 38.291, + "args": { + "External id": 3290370,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906860.691, "dur": 36.602, + "args": { + "External id": 3290371,"Record function id": 0, "Sequence number": 32987252, "Fwd thread id": 1, "Ev Idx": 770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906864.599, "dur": 29.869, + "args": { + "External id": 3290372,"Sequence number": 32987252, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 771 + } + }, + { + "ph": "f", "id": 80, "pid": 1336756, "tid": 1381189, "ts": 1587595906864.599, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595906868.526, "dur": 25.756, + "args": { + "External id": 3290373,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595906889.538, "dur": 4.310, + "args": { + "External id": 3290374,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906904.952, "dur": 7.309, + "args": { + "External id": 3290375,"Record function id": 0, "Sequence number": 32987251, "Fwd thread id": 1, "Ev Idx": 774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906905.868, "dur": 4.571, + "args": { + "External id": 3290376,"Sequence number": 32987251, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 775 + } + }, + { + "ph": "f", "id": 81, "pid": 1336756, "tid": 1381189, "ts": 1587595906905.868, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595906906.956, "dur": 3.280, + "args": { + "External id": 3290377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595906907.789, "dur": 1.957, + "args": { + "External id": 3290378,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595906909.104, "dur": 0.525, + "args": { + "External id": 3290379,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595906916.544, "dur": 7.817, + "args": { + "External id": 3290380,"Record function id": 0, "Ev Idx": 779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595906917.808, "dur": 5.790, + "args": { + "External id": 3290381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595906919.130, "dur": 4.176, + "args": { + "External id": 3290382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595906921.499, "dur": 1.722, + "args": { + "External id": 3290383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906927.276, "dur": 7.723, + "args": { + "External id": 3290384,"Record function id": 0, "Sequence number": 32987250, "Fwd thread id": 1, "Ev Idx": 783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595906927.994, "dur": 5.255, + "args": { + "External id": 3290385,"Sequence number": 32987250, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 784 + } + }, + { + "ph": "f", "id": 82, "pid": 1336756, "tid": 1381189, "ts": 1587595906927.994, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595906928.827, "dur": 4.268, + "args": { + "External id": 3290386,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595906931.834, "dur": 1.123, + "args": { + "External id": 3290387,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595906939.027, "dur": 321.632, + "args": { + "External id": 3290388,"Record function id": 0, "Sequence number": 32987249, "Fwd thread id": 1, "Ev Idx": 787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595906940.078, "dur": 304.366, + "args": { + "External id": 3290389,"Sequence number": 32987249, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 788 + } + }, + { + "ph": "f", "id": 83, "pid": 1336756, "tid": 1381189, "ts": 1587595906940.078, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595906953.934, "dur": 6.050, + "args": { + "External id": 3290390,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595906955.747, "dur": 3.806, + "args": { + "External id": 3290391,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595906961.661, "dur": 5.083, + "args": { + "External id": 3290392,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595906964.731, "dur": 1.833, + "args": { + "External id": 3290393,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595906968.010, "dur": 4.441, + "args": { + "External id": 3290394,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595906968.800, "dur": 3.488, + "args": { + "External id": 3290395,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595907034.117, "dur": 184.951, + "args": { + "External id": 3290396,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595907121.482, "dur": 4.076, + "args": { + "External id": 3290397,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595907127.756, "dur": 4.552, + "args": { + "External id": 3290398,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595907231.687, "dur": 4.099, + "args": { + "External id": 3290399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595907238.611, "dur": 0.680, + "args": { + "External id": 3290400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595907241.198, "dur": 0.698, + "args": { + "External id": 3290401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595907269.727, "dur": 221.227, + "args": { + "External id": 3290402,"Record function id": 0, "Sequence number": 32987248, "Fwd thread id": 1, "Ev Idx": 801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595907271.629, "dur": 213.320, + "args": { + "External id": 3290403,"Sequence number": 32987248, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 802 + } + }, + { + "ph": "f", "id": 84, "pid": 1336756, "tid": 1381189, "ts": 1587595907271.629, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1381189, + "ts": 1587595907290.108, "dur": 45.957, + "args": { + "External id": 3290404,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595907292.767, "dur": 3.040, + "args": { + "External id": 3290405,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595907297.431, "dur": 38.089, + "args": { + "External id": 3290406,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595907345.984, "dur": 6.491, + "args": { + "External id": 3290407,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595907350.219, "dur": 1.931, + "args": { + "External id": 3290408,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595907497.322, "dur": 147.422, + "args": { + "External id": 3290409,"Record function id": 0, "Sequence number": 32987247, "Fwd thread id": 1, "Ev Idx": 808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595907498.634, "dur": 140.638, + "args": { + "External id": 3290410,"Sequence number": 32987247, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 809 + } + }, + { + "ph": "f", "id": 85, "pid": 1336756, "tid": 1381189, "ts": 1587595907498.634, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1381189, + "ts": 1587595907509.832, "dur": 32.002, + "args": { + "External id": 3290411,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595907512.071, "dur": 2.669, + "args": { + "External id": 3290412,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595907515.645, "dur": 25.584, + "args": { + "External id": 3290413,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595907548.476, "dur": 7.838, + "args": { + "External id": 3290414,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595907553.752, "dur": 2.288, + "args": { + "External id": 3290415,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907650.972, "dur": 17.057, + "args": { + "External id": 3290416,"Record function id": 0, "Sequence number": 32987246, "Fwd thread id": 1, "Ev Idx": 815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907655.306, "dur": 10.033, + "args": { + "External id": 3290417,"Sequence number": 32987246, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 816 + } + }, + { + "ph": "f", "id": 86, "pid": 1336756, "tid": 1381189, "ts": 1587595907655.306, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595907657.497, "dur": 7.603, + "args": { + "External id": 3290418,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595907659.157, "dur": 5.757, + "args": { + "External id": 3290419,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907671.828, "dur": 7.788, + "args": { + "External id": 3290420,"Record function id": 0, "Sequence number": 32987245, "Fwd thread id": 1, "Ev Idx": 819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907672.967, "dur": 4.931, + "args": { + "External id": 3290421,"Sequence number": 32987245, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 820 + } + }, + { + "ph": "f", "id": 87, "pid": 1336756, "tid": 1381189, "ts": 1587595907672.967, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595907674.227, "dur": 3.545, + "args": { + "External id": 3290422,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595907676.767, "dur": 0.883, + "args": { + "External id": 3290423,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907685.555, "dur": 7.554, + "args": { + "External id": 3290424,"Record function id": 0, "Sequence number": 32987244, "Fwd thread id": 1, "Ev Idx": 823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907686.616, "dur": 4.870, + "args": { + "External id": 3290425,"Sequence number": 32987244, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 824 + } + }, + { + "ph": "f", "id": 88, "pid": 1336756, "tid": 1381189, "ts": 1587595907686.616, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595907687.711, "dur": 3.637, + "args": { + "External id": 3290426,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595907690.471, "dur": 0.724, + "args": { + "External id": 3290427,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907696.381, "dur": 7.274, + "args": { + "External id": 3290428,"Record function id": 0, "Sequence number": 32987243, "Fwd thread id": 1, "Ev Idx": 827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907697.089, "dur": 4.458, + "args": { + "External id": 3290429,"Sequence number": 32987243, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 828 + } + }, + { + "ph": "f", "id": 89, "pid": 1336756, "tid": 1381189, "ts": 1587595907697.089, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595907697.718, "dur": 3.702, + "args": { + "External id": 3290430,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595907700.518, "dur": 0.767, + "args": { + "External id": 3290431,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907706.964, "dur": 146.276, + "args": { + "External id": 3290432,"Record function id": 0, "Sequence number": 32987242, "Fwd thread id": 1, "Ev Idx": 831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907707.658, "dur": 138.831, + "args": { + "External id": 3290433,"Sequence number": 32987242, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 832 + } + }, + { + "ph": "f", "id": 90, "pid": 1336756, "tid": 1381189, "ts": 1587595907707.658, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595907711.015, "dur": 7.553, + "args": { + "External id": 3290434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595907712.751, "dur": 5.120, + "args": { + "External id": 3290435,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595907715.993, "dur": 1.621, + "args": { + "External id": 3290436,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595907719.897, "dur": 65.686, + "args": { + "External id": 3290437,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595907786.861, "dur": 4.102, + "args": { + "External id": 3290438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595907787.821, "dur": 2.537, + "args": { + "External id": 3290439,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595907789.152, "dur": 1.035, + "args": { + "External id": 3290440,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595907792.599, "dur": 7.090, + "args": { + "External id": 3290441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595907793.348, "dur": 5.849, + "args": { + "External id": 3290442,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595907798.599, "dur": 0.524, + "args": { + "External id": 3290443,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595907800.365, "dur": 45.400, + "args": { + "External id": 3290444,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907858.284, "dur": 6.467, + "args": { + "External id": 3290445,"Record function id": 0, "Sequence number": 32987241, "Fwd thread id": 1, "Ev Idx": 844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907859.352, "dur": 3.664, + "args": { + "External id": 3290446,"Sequence number": 32987241, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 845 + } + }, + { + "ph": "f", "id": 91, "pid": 1336756, "tid": 1381189, "ts": 1587595907859.352, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595907860.813, "dur": 2.065, + "args": { + "External id": 3290447,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595907861.603, "dur": 1.179, + "args": { + "External id": 3290448,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907890.666, "dur": 15.589, + "args": { + "External id": 3290449,"Record function id": 0, "Sequence number": 32987240, "Fwd thread id": 1, "Ev Idx": 848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907893.089, "dur": 10.051, + "args": { + "External id": 3290450,"Sequence number": 32987240, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 849 + } + }, + { + "ph": "f", "id": 92, "pid": 1336756, "tid": 1381189, "ts": 1587595907893.089, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595907894.866, "dur": 8.009, + "args": { + "External id": 3290451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595907899.045, "dur": 2.921, + "args": { + "External id": 3290452,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595907900.852, "dur": 0.762, + "args": { + "External id": 3290453,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595907918.060, "dur": 12.131, + "args": { + "External id": 3290454,"Record function id": 0, "Ev Idx": 853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595907920.405, "dur": 8.743, + "args": { + "External id": 3290455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595907923.653, "dur": 4.984, + "args": { + "External id": 3290456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595907925.173, "dur": 3.279, + "args": { + "External id": 3290457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907934.772, "dur": 9.005, + "args": { + "External id": 3290458,"Record function id": 0, "Sequence number": 32987239, "Fwd thread id": 1, "Ev Idx": 857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907936.354, "dur": 4.605, + "args": { + "External id": 3290459,"Sequence number": 32987239, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 858 + } + }, + { + "ph": "f", "id": 93, "pid": 1336756, "tid": 1381189, "ts": 1587595907936.354, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595907938.051, "dur": 2.717, + "args": { + "External id": 3290460,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595907939.223, "dur": 1.328, + "args": { + "External id": 3290461,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907950.540, "dur": 164.672, + "args": { + "External id": 3290462,"Record function id": 0, "Sequence number": 32987238, "Fwd thread id": 1, "Ev Idx": 861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595907952.197, "dur": 155.229, + "args": { + "External id": 3290463,"Sequence number": 32987238, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 862 + } + }, + { + "ph": "f", "id": 94, "pid": 1336756, "tid": 1381189, "ts": 1587595907952.197, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595907956.152, "dur": 4.025, + "args": { + "External id": 3290464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595907957.241, "dur": 2.271, + "args": { + "External id": 3290465,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595907958.588, "dur": 0.718, + "args": { + "External id": 3290466,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595907961.531, "dur": 82.638, + "args": { + "External id": 3290467,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595908047.743, "dur": 9.573, + "args": { + "External id": 3290468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595908050.860, "dur": 5.601, + "args": { + "External id": 3290469,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595908054.944, "dur": 1.380, + "args": { + "External id": 3290470,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595908058.947, "dur": 5.748, + "args": { + "External id": 3290471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595908060.137, "dur": 4.132, + "args": { + "External id": 3290472,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595908061.064, "dur": 3.141, + "args": { + "External id": 3290473,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595908065.480, "dur": 41.182, + "args": { + "External id": 3290474,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595908122.528, "dur": 41.991, + "args": { + "External id": 3290475,"Record function id": 0, "Sequence number": 32987237, "Fwd thread id": 1, "Ev Idx": 874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595908126.355, "dur": 6.900, + "args": { + "External id": 3290476,"Sequence number": 32987237, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 875 + } + }, + { + "ph": "f", "id": 95, "pid": 1336756, "tid": 1381189, "ts": 1587595908126.355, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595908127.778, "dur": 5.336, + "args": { + "External id": 3290477,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595908131.234, "dur": 1.747, + "args": { + "External id": 3290478,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1381189, + "ts": 1587595908136.771, "dur": 25.507, + "args": { + "External id": 3290479,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595908168.650, "dur": 9.586, + "args": { + "External id": 3290480,"Record function id": 0, "Sequence number": 32987236, "Fwd thread id": 1, "Ev Idx": 879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595908169.683, "dur": 6.361, + "args": { + "External id": 3290481,"Sequence number": 32987236, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 880 + } + }, + { + "ph": "f", "id": 96, "pid": 1336756, "tid": 1381189, "ts": 1587595908169.683, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595908170.642, "dur": 5.227, + "args": { + "External id": 3290482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595908171.423, "dur": 3.913, + "args": { + "External id": 3290483,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595908174.698, "dur": 0.494, + "args": { + "External id": 3290484,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595908182.545, "dur": 7.576, + "args": { + "External id": 3290485,"Record function id": 0, "Ev Idx": 884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595908184.158, "dur": 5.330, + "args": { + "External id": 3290486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595908185.732, "dur": 3.382, + "args": { + "External id": 3290487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595908186.749, "dur": 2.288, + "args": { + "External id": 3290488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595908193.414, "dur": 8.443, + "args": { + "External id": 3290489,"Record function id": 0, "Sequence number": 32987235, "Fwd thread id": 1, "Ev Idx": 888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595908194.389, "dur": 5.588, + "args": { + "External id": 3290490,"Sequence number": 32987235, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 889 + } + }, + { + "ph": "f", "id": 97, "pid": 1336756, "tid": 1381189, "ts": 1587595908194.389, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595908195.481, "dur": 4.366, + "args": { + "External id": 3290491,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595908198.324, "dur": 1.370, + "args": { + "External id": 3290492,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595908207.000, "dur": 99.257, + "args": { + "External id": 3290493,"Record function id": 0, "Sequence number": 32987234, "Fwd thread id": 1, "Ev Idx": 892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595908209.500, "dur": 88.979, + "args": { + "External id": 3290494,"Sequence number": 32987234, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 893 + } + }, + { + "ph": "f", "id": 98, "pid": 1336756, "tid": 1381189, "ts": 1587595908209.500, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595908211.298, "dur": 2.878, + "args": { + "External id": 3290495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595908212.073, "dur": 1.680, + "args": { + "External id": 3290496,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595908212.827, "dur": 0.758, + "args": { + "External id": 3290497,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595908214.784, "dur": 30.815, + "args": { + "External id": 3290498,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595908246.698, "dur": 5.219, + "args": { + "External id": 3290499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595908247.292, "dur": 4.111, + "args": { + "External id": 3290500,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595908250.336, "dur": 0.956, + "args": { + "External id": 3290501,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595908253.056, "dur": 8.426, + "args": { + "External id": 3290502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595908257.568, "dur": 3.533, + "args": { + "External id": 3290503,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595908260.394, "dur": 0.642, + "args": { + "External id": 3290504,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595908261.904, "dur": 35.668, + "args": { + "External id": 3290505,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595908310.551, "dur": 27.265, + "args": { + "External id": 3290506,"Record function id": 0, "Sequence number": 32987233, "Fwd thread id": 1, "Ev Idx": 905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595908311.644, "dur": 5.773, + "args": { + "External id": 3290507,"Sequence number": 32987233, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 906 + } + }, + { + "ph": "f", "id": 99, "pid": 1336756, "tid": 1381189, "ts": 1587595908311.644, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595908315.283, "dur": 1.981, + "args": { + "External id": 3290508,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595908316.080, "dur": 1.046, + "args": { + "External id": 3290509,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595908319.611, "dur": 16.435, + "args": { + "External id": 3290510,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595908341.600, "dur": 9.188, + "args": { + "External id": 3290511,"Record function id": 0, "Sequence number": 32987232, "Fwd thread id": 1, "Ev Idx": 910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595908342.554, "dur": 6.023, + "args": { + "External id": 3290512,"Sequence number": 32987232, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 911 + } + }, + { + "ph": "f", "id": 100, "pid": 1336756, "tid": 1381189, "ts": 1587595908342.554, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595908343.303, "dur": 5.100, + "args": { + "External id": 3290513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595908344.146, "dur": 3.757, + "args": { + "External id": 3290514,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595908347.040, "dur": 0.750, + "args": { + "External id": 3290515,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595908354.887, "dur": 7.649, + "args": { + "External id": 3290516,"Record function id": 0, "Ev Idx": 915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595908356.326, "dur": 5.640, + "args": { + "External id": 3290517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595908357.469, "dur": 4.189, + "args": { + "External id": 3290518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595908360.536, "dur": 1.035, + "args": { + "External id": 3290519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595908366.680, "dur": 372.409, + "args": { + "External id": 3290520,"Record function id": 0, "Sequence number": 32987231, "Fwd thread id": 1, "Ev Idx": 919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595908367.714, "dur": 342.036, + "args": { + "External id": 3290521,"Sequence number": 32987231, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 920 + } + }, + { + "ph": "f", "id": 101, "pid": 1336756, "tid": 1381189, "ts": 1587595908367.714, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595908408.921, "dur": 1.631, + "args": { + "External id": 3290522,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595908409.507, "dur": 0.933, + "args": { + "External id": 3290523,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595908424.621, "dur": 5.528, + "args": { + "External id": 3290524,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595908441.935, "dur": 2.049, + "args": { + "External id": 3290525,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595908601.247, "dur": 1.590, + "args": { + "External id": 3290526,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595908606.573, "dur": 34.793, + "args": { + "External id": 3290527,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595908616.554, "dur": 0.831, + "args": { + "External id": 3290528,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587595908650.752, "dur": 33.930, + "args": { + "External id": 3290529,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587595908652.758, "dur": 31.677, + "args": { + "External id": 3290530,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595908658.358, "dur": 5.830, + "args": { + "External id": 3290531,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595908665.668, "dur": 18.312, + "args": { + "External id": 3290532,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1381189, + "ts": 1587595908691.456, "dur": 2.521, + "args": { + "External id": 3290533,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595908692.660, "dur": 1.226, + "args": { + "External id": 3290534,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595908700.049, "dur": 2.218, + "args": { + "External id": 3290535,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595908700.859, "dur": 1.280, + "args": { + "External id": 3290536,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595908719.250, "dur": 16.181, + "args": { + "External id": 3290537,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595908748.938, "dur": 11.034, + "args": { + "External id": 3290538,"Record function id": 0, "Ev Idx": 937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595908752.603, "dur": 6.681, + "args": { + "External id": 3290539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595908754.209, "dur": 4.252, + "args": { + "External id": 3290540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595908756.905, "dur": 1.470, + "args": { + "External id": 3290541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595908763.745, "dur": 5.593, + "args": { + "External id": 3290542,"Record function id": 0, "Sequence number": 32987230, "Fwd thread id": 1, "Ev Idx": 941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595908765.279, "dur": 0.999, + "args": { + "External id": 3290543,"Sequence number": 32987230, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 942 + } + }, + { + "ph": "f", "id": 102, "pid": 1336756, "tid": 1381189, "ts": 1587595908765.279, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595908772.901, "dur": 566.447, + "args": { + "External id": 3290544,"Record function id": 0, "Sequence number": 32987229, "Fwd thread id": 1, "Ev Idx": 943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595908773.818, "dur": 551.553, + "args": { + "External id": 3290545,"Sequence number": 32987229, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 944 + } + }, + { + "ph": "f", "id": 103, "pid": 1336756, "tid": 1381189, "ts": 1587595908773.818, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595908801.118, "dur": 7.724, + "args": { + "External id": 3290546,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595908805.712, "dur": 2.838, + "args": { + "External id": 3290547,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]", "[8192, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[33554432, 8192, 1], [], []], "Input Dims": [[16, 4096, 2048], [], []], "Ev Idx": 946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595908812.079, "dur": 10.071, + "args": { + "External id": 3290548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595908815.380, "dur": 6.165, + "args": { + "External id": 3290549,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595908819.038, "dur": 2.306, + "args": { + "External id": 3290550,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1381189, + "ts": 1587595908825.680, "dur": 115.704, + "args": { + "External id": 3290551,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8192, 1], [1, 5632], []], "Input Dims": [[65536, 2048], [5632, 2048], []], "Ev Idx": 950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595908826.443, "dur": 2.435, + "args": { + "External id": 3290552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 5632]], "Input Dims": [[5632, 2048]], "Ev Idx": 951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595908826.942, "dur": 1.540, + "args": { + "External id": 3290553,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 5632], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595908827.748, "dur": 0.654, + "args": { + "External id": 3290554,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[5632, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 5632], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1381189, + "ts": 1587595908832.330, "dur": 108.510, + "args": { + "External id": 3290555,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595908833.563, "dur": 105.877, + "args": { + "External id": 3290556,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1381189, + "ts": 1587595908947.393, "dur": 3.590, + "args": { + "External id": 3290557,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [23068672, 5632, 1]], "Input Dims": [[65536, 5632], [16, 4096, 5632]], "Ev Idx": 956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595908948.733, "dur": 2.122, + "args": { + "External id": 3290558,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595909020.210, "dur": 8.868, + "args": { + "External id": 3290559,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595909033.231, "dur": 2.677, + "args": { + "External id": 3290560,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595909037.135, "dur": 2.804, + "args": { + "External id": 3290561,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595909091.790, "dur": 4.419, + "args": { + "External id": 3290562,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595909092.963, "dur": 2.966, + "args": { + "External id": 3290563,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 1336756, "tid": 1381189, + "ts": 1587595909121.106, "dur": 181.909, + "args": { + "External id": 3290564,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[8192, 1], [5632, 1]], []], "Input Dims": [[], [[65536, 2048], [65536, 5632]], []], "Ev Idx": 963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1381189, + "ts": 1587595909127.871, "dur": 12.423, + "args": { + "External id": 3290565,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595909134.440, "dur": 4.287, + "args": { + "External id": 3290566,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048, 1]", "[8192, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595909142.409, "dur": 9.221, + "args": { + "External id": 3290567,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8192, 1, 1], []], "Input Dims": [[65536, 2048, 1], []], "Ev Idx": 966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595909149.618, "dur": 1.073, + "args": { + "External id": 3290568,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 65536]", "[1, 1, 8192]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[65536, 2048, 1], [], [], []], "Ev Idx": 967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1381189, + "ts": 1587595909154.098, "dur": 2.944, + "args": { + "External id": 3290569,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595909155.717, "dur": 0.787, + "args": { + "External id": 3290570,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595909160.613, "dur": 3.383, + "args": { + "External id": 3290571,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595909162.354, "dur": 0.876, + "args": { + "External id": 3290572,"Record function id": 0, "Concrete Inputs": ["", "[1, 5632, 65536]", "[1, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1, 1], [], [], []], "Input Dims": [[65536, 5632, 1], [], [], []], "Ev Idx": 971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595909172.969, "dur": 2.711, + "args": { + "External id": 3290573,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 8192], []], "Input Dims": [[2048, 1, 65536], []], "Ev Idx": 972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595909174.479, "dur": 0.670, + "args": { + "External id": 3290574,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536, 1]", "[1, 8192, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 8192], [], [], []], "Input Dims": [[2048, 1, 65536], [], [], []], "Ev Idx": 973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595909177.110, "dur": 10.275, + "args": { + "External id": 3290575,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 8192, 1], []], "Input Dims": [[2048, 65536, 1], []], "Ev Idx": 974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595909184.236, "dur": 2.861, + "args": { + "External id": 3290576,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]", "[2048, 1, 8192]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 8192, 1], [], []], "Input Dims": [[2048, 65536, 1], [], []], "Ev Idx": 975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595909188.250, "dur": 3.946, + "args": { + "External id": 3290577,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 5632], []], "Input Dims": [[1, 5632, 65536], []], "Ev Idx": 976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595909191.107, "dur": 0.840, + "args": { + "External id": 3290578,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 5632], [], [], []], "Input Dims": [[1, 5632, 65536], [], [], []], "Ev Idx": 977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595909192.616, "dur": 3.884, + "args": { + "External id": 3290579,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595909193.401, "dur": 3.020, + "args": { + "External id": 3290580,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587595909198.457, "dur": 87.902, + "args": { + "External id": 3290581,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1, 8192], [369098752, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632]], "Ev Idx": 980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595909292.098, "dur": 1.289, + "args": { + "External id": 3290582,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 5632, 1], []], "Input Dims": [[1, 2048, 5632], []], "Ev Idx": 981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595909294.044, "dur": 4.273, + "args": { + "External id": 3290583,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 5632, 1], []], "Input Dims": [[2048, 1, 5632], []], "Ev Idx": 982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595909297.224, "dur": 0.648, + "args": { + "External id": 3290584,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632, 1]", "[5632, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 5632, 1], [], [], []], "Input Dims": [[2048, 1, 5632], [], [], []], "Ev Idx": 983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595909300.745, "dur": 0.979, + "args": { + "External id": 3290585,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 5632], []], "Input Dims": [[2048, 5632, 1], []], "Ev Idx": 984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595909354.919, "dur": 10.574, + "args": { + "External id": 3290586,"Record function id": 0, "Ev Idx": 985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595909357.408, "dur": 7.387, + "args": { + "External id": 3290587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595909359.982, "dur": 3.914, + "args": { + "External id": 3290588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595909361.392, "dur": 2.387, + "args": { + "External id": 3290589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909369.597, "dur": 7.884, + "args": { + "External id": 3290590,"Record function id": 0, "Sequence number": 32987228, "Fwd thread id": 1, "Ev Idx": 989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909370.603, "dur": 4.670, + "args": { + "External id": 3290591,"Sequence number": 32987228, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 990 + } + }, + { + "ph": "f", "id": 104, "pid": 1336756, "tid": 1381189, "ts": 1587595909370.603, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595909372.717, "dur": 2.236, + "args": { + "External id": 3290592,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595909373.768, "dur": 1.083, + "args": { + "External id": 3290593,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909383.230, "dur": 148.942, + "args": { + "External id": 3290594,"Record function id": 0, "Sequence number": 32987227, "Fwd thread id": 1, "Ev Idx": 993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909384.462, "dur": 140.626, + "args": { + "External id": 3290595,"Sequence number": 32987227, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 994 + } + }, + { + "ph": "f", "id": 105, "pid": 1336756, "tid": 1381189, "ts": 1587595909384.462, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595909387.846, "dur": 4.736, + "args": { + "External id": 3290596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595909389.960, "dur": 2.057, + "args": { + "External id": 3290597,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595909391.274, "dur": 0.591, + "args": { + "External id": 3290598,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595909393.819, "dur": 57.152, + "args": { + "External id": 3290599,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595909452.227, "dur": 8.193, + "args": { + "External id": 3290600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595909452.949, "dur": 6.772, + "args": { + "External id": 3290601,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 1000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595909458.786, "dur": 0.807, + "args": { + "External id": 3290602,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 1001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595909462.152, "dur": 5.390, + "args": { + "External id": 3290603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595909463.261, "dur": 3.889, + "args": { + "External id": 3290604,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595909464.335, "dur": 2.730, + "args": { + "External id": 3290605,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595909468.195, "dur": 56.054, + "args": { + "External id": 3290606,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 1005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909536.935, "dur": 10.267, + "args": { + "External id": 3290607,"Record function id": 0, "Sequence number": 32987226, "Fwd thread id": 1, "Ev Idx": 1006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909538.008, "dur": 7.794, + "args": { + "External id": 3290608,"Sequence number": 32987226, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1007 + } + }, + { + "ph": "f", "id": 106, "pid": 1336756, "tid": 1381189, "ts": 1587595909538.008, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595909539.431, "dur": 6.214, + "args": { + "External id": 3290609,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595909544.435, "dur": 1.113, + "args": { + "External id": 3290610,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909550.473, "dur": 7.667, + "args": { + "External id": 3290611,"Record function id": 0, "Sequence number": 32987225, "Fwd thread id": 1, "Ev Idx": 1010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909552.017, "dur": 4.089, + "args": { + "External id": 3290612,"Sequence number": 32987225, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1011 + } + }, + { + "ph": "f", "id": 107, "pid": 1336756, "tid": 1381189, "ts": 1587595909552.017, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595909553.142, "dur": 2.773, + "args": { + "External id": 3290613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595909553.820, "dur": 1.606, + "args": { + "External id": 3290614,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595909554.915, "dur": 0.366, + "args": { + "External id": 3290615,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595909561.943, "dur": 5.367, + "args": { + "External id": 3290616,"Record function id": 0, "Ev Idx": 1015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595909563.242, "dur": 3.583, + "args": { + "External id": 3290617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595909564.621, "dur": 1.935, + "args": { + "External id": 3290618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595909565.290, "dur": 1.138, + "args": { + "External id": 3290619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909572.204, "dur": 9.617, + "args": { + "External id": 3290620,"Record function id": 0, "Sequence number": 32987224, "Fwd thread id": 1, "Ev Idx": 1019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909573.098, "dur": 6.453, + "args": { + "External id": 3290621,"Sequence number": 32987224, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 1020 + } + }, + { + "ph": "f", "id": 108, "pid": 1336756, "tid": 1381189, "ts": 1587595909573.098, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595909576.004, "dur": 3.411, + "args": { + "External id": 3290622,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595909578.608, "dur": 0.684, + "args": { + "External id": 3290623,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909584.959, "dur": 138.722, + "args": { + "External id": 3290624,"Record function id": 0, "Sequence number": 32987223, "Fwd thread id": 1, "Ev Idx": 1023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909612.318, "dur": 104.060, + "args": { + "External id": 3290625,"Sequence number": 32987223, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 1024 + } + }, + { + "ph": "f", "id": 109, "pid": 1336756, "tid": 1381189, "ts": 1587595909612.318, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595909614.110, "dur": 4.637, + "args": { + "External id": 3290626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 1025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595909614.621, "dur": 3.629, + "args": { + "External id": 3290627,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 1026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595909617.723, "dur": 0.425, + "args": { + "External id": 3290628,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 1027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595909619.445, "dur": 35.009, + "args": { + "External id": 3290629,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 1028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595909655.624, "dur": 6.539, + "args": { + "External id": 3290630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595909656.083, "dur": 5.473, + "args": { + "External id": 3290631,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 1030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595909656.967, "dur": 4.444, + "args": { + "External id": 3290632,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 1031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595909663.218, "dur": 5.879, + "args": { + "External id": 3290633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595909663.918, "dur": 4.789, + "args": { + "External id": 3290634,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595909668.233, "dur": 0.409, + "args": { + "External id": 3290635,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595909669.514, "dur": 46.240, + "args": { + "External id": 3290636,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 1035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909727.787, "dur": 31.918, + "args": { + "External id": 3290637,"Record function id": 0, "Sequence number": 32987222, "Fwd thread id": 1, "Ev Idx": 1036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909728.652, "dur": 2.821, + "args": { + "External id": 3290638,"Sequence number": 32987222, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1037 + } + }, + { + "ph": "f", "id": 110, "pid": 1336756, "tid": 1381189, "ts": 1587595909728.652, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595909729.660, "dur": 1.661, + "args": { + "External id": 3290639,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595909730.177, "dur": 1.018, + "args": { + "External id": 3290640,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1381189, + "ts": 1587595909734.423, "dur": 22.527, + "args": { + "External id": 3290641,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909763.240, "dur": 11.549, + "args": { + "External id": 3290642,"Record function id": 0, "Sequence number": 32987221, "Fwd thread id": 1, "Ev Idx": 1041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595909764.093, "dur": 8.504, + "args": { + "External id": 3290643,"Sequence number": 32987221, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1042 + } + }, + { + "ph": "f", "id": 111, "pid": 1336756, "tid": 1381189, "ts": 1587595909764.093, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595909767.566, "dur": 4.819, + "args": { + "External id": 3290644,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595909768.194, "dur": 3.666, + "args": { + "External id": 3290645,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595909771.410, "dur": 0.332, + "args": { + "External id": 3290646,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595909778.465, "dur": 4.459, + "args": { + "External id": 3290647,"Record function id": 0, "Ev Idx": 1046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595909779.691, "dur": 2.747, + "args": { + "External id": 3290648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595909780.425, "dur": 1.727, + "args": { + "External id": 3290649,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595909780.830, "dur": 1.248, + "args": { + "External id": 3290650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595909786.618, "dur": 484.853, + "args": { + "External id": 3290651,"Record function id": 0, "Sequence number": 32987220, "Fwd thread id": 1, "Ev Idx": 1050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595909787.770, "dur": 449.229, + "args": { + "External id": 3290652,"Sequence number": 32987220, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 1051 + } + }, + { + "ph": "f", "id": 112, "pid": 1336756, "tid": 1381189, "ts": 1587595909787.770, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336756, "tid": 1381189, + "ts": 1587595909812.114, "dur": 33.495, + "args": { + "External id": 3290653,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1381189, + "ts": 1587595909813.457, "dur": 31.952, + "args": { + "External id": 3290654,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595909815.918, "dur": 5.236, + "args": { + "External id": 3290655,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 1054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595909818.357, "dur": 2.312, + "args": { + "External id": 3290656,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595909822.602, "dur": 22.276, + "args": { + "External id": 3290657,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595909860.765, "dur": 3.739, + "args": { + "External id": 3290658,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595909861.533, "dur": 2.873, + "args": { + "External id": 3290659,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595909868.273, "dur": 19.402, + "args": { + "External id": 3290660,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595909868.800, "dur": 18.451, + "args": { + "External id": 3290661,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595909903.918, "dur": 3.519, + "args": { + "External id": 3290662,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595909918.619, "dur": 1.988, + "args": { + "External id": 3290663,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595910115.507, "dur": 3.884, + "args": { + "External id": 3290664,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 1063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595910125.930, "dur": 35.097, + "args": { + "External id": 3290665,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 1064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595910135.726, "dur": 0.856, + "args": { + "External id": 3290666,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 1065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587595910166.509, "dur": 34.528, + "args": { + "External id": 3290667,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 1066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587595910168.598, "dur": 32.236, + "args": { + "External id": 3290668,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 1067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595910174.221, "dur": 3.998, + "args": { + "External id": 3290669,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595910182.289, "dur": 18.031, + "args": { + "External id": 3290670,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 1069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1381189, + "ts": 1587595910205.964, "dur": 2.481, + "args": { + "External id": 3290671,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 1070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595910207.340, "dur": 0.993, + "args": { + "External id": 3290672,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 1071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595910214.980, "dur": 4.571, + "args": { + "External id": 3290673,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595910216.313, "dur": 3.127, + "args": { + "External id": 3290674,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595910221.697, "dur": 4.176, + "args": { + "External id": 3290675,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595910224.554, "dur": 1.214, + "args": { + "External id": 3290676,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595910251.818, "dur": 18.276, + "args": { + "External id": 3290677,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595910284.818, "dur": 9.145, + "args": { + "External id": 3290678,"Record function id": 0, "Ev Idx": 1077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595910287.110, "dur": 6.144, + "args": { + "External id": 3290679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595910289.242, "dur": 3.126, + "args": { + "External id": 3290680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595910290.432, "dur": 1.852, + "args": { + "External id": 3290681,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595910297.673, "dur": 7.669, + "args": { + "External id": 3290682,"Record function id": 0, "Sequence number": 32987219, "Fwd thread id": 1, "Ev Idx": 1081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595910298.744, "dur": 3.624, + "args": { + "External id": 3290683,"Sequence number": 32987219, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1082 + } + }, + { + "ph": "f", "id": 113, "pid": 1336756, "tid": 1381189, "ts": 1587595910298.744, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595910300.342, "dur": 1.791, + "args": { + "External id": 3290684,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595910301.003, "dur": 1.012, + "args": { + "External id": 3290685,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595910311.022, "dur": 126.333, + "args": { + "External id": 3290686,"Record function id": 0, "Sequence number": 32987218, "Fwd thread id": 1, "Ev Idx": 1085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595910311.975, "dur": 118.878, + "args": { + "External id": 3290687,"Sequence number": 32987218, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1086 + } + }, + { + "ph": "f", "id": 114, "pid": 1336756, "tid": 1381189, "ts": 1587595910311.975, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595910314.651, "dur": 4.710, + "args": { + "External id": 3290688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595910315.905, "dur": 2.775, + "args": { + "External id": 3290689,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595910317.288, "dur": 1.167, + "args": { + "External id": 3290690,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595910320.585, "dur": 54.349, + "args": { + "External id": 3290691,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595910376.153, "dur": 9.982, + "args": { + "External id": 3290692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595910379.090, "dur": 6.465, + "args": { + "External id": 3290693,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595910382.603, "dur": 2.804, + "args": { + "External id": 3290694,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595910387.619, "dur": 3.075, + "args": { + "External id": 3290695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595910388.956, "dur": 1.334, + "args": { + "External id": 3290696,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595910389.814, "dur": 0.414, + "args": { + "External id": 3290697,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595910391.361, "dur": 38.779, + "args": { + "External id": 3290698,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595910442.202, "dur": 13.371, + "args": { + "External id": 3290699,"Record function id": 0, "Sequence number": 32987217, "Fwd thread id": 1, "Ev Idx": 1098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595910443.365, "dur": 10.043, + "args": { + "External id": 3290700,"Sequence number": 32987217, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1099 + } + }, + { + "ph": "f", "id": 115, "pid": 1336756, "tid": 1381189, "ts": 1587595910443.365, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595910446.824, "dur": 6.414, + "args": { + "External id": 3290701,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595910449.977, "dur": 3.118, + "args": { + "External id": 3290702,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595910461.062, "dur": 7.286, + "args": { + "External id": 3290703,"Record function id": 0, "Sequence number": 32987216, "Fwd thread id": 1, "Ev Idx": 1102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595910462.084, "dur": 4.616, + "args": { + "External id": 3290704,"Sequence number": 32987216, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1103 + } + }, + { + "ph": "f", "id": 116, "pid": 1336756, "tid": 1381189, "ts": 1587595910462.084, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595910463.257, "dur": 3.208, + "args": { + "External id": 3290705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595910464.094, "dur": 1.866, + "args": { + "External id": 3290706,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595910465.165, "dur": 0.649, + "args": { + "External id": 3290707,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595910472.198, "dur": 8.186, + "args": { + "External id": 3290708,"Record function id": 0, "Ev Idx": 1107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595910473.592, "dur": 6.316, + "args": { + "External id": 3290709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595910474.840, "dur": 4.818, + "args": { + "External id": 3290710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595910478.377, "dur": 1.155, + "args": { + "External id": 3290711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595910485.652, "dur": 6.075, + "args": { + "External id": 3290712,"Record function id": 0, "Sequence number": 32987215, "Fwd thread id": 1, "Ev Idx": 1111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595910486.688, "dur": 3.384, + "args": { + "External id": 3290713,"Sequence number": 32987215, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1112 + } + }, + { + "ph": "f", "id": 117, "pid": 1336756, "tid": 1381189, "ts": 1587595910486.688, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595910487.783, "dur": 2.129, + "args": { + "External id": 3290714,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595910488.344, "dur": 1.433, + "args": { + "External id": 3290715,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595910497.382, "dur": 283.772, + "args": { + "External id": 3290716,"Record function id": 0, "Sequence number": 32987214, "Fwd thread id": 1, "Ev Idx": 1115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595910498.808, "dur": 265.042, + "args": { + "External id": 3290717,"Sequence number": 32987214, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1116 + } + }, + { + "ph": "f", "id": 118, "pid": 1336756, "tid": 1381189, "ts": 1587595910498.808, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595910513.439, "dur": 5.592, + "args": { + "External id": 3290718,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595910515.515, "dur": 3.030, + "args": { + "External id": 3290719,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595910520.835, "dur": 6.093, + "args": { + "External id": 3290720,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595910523.361, "dur": 3.353, + "args": { + "External id": 3290721,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595910536.688, "dur": 4.412, + "args": { + "External id": 3290722,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595910539.467, "dur": 1.412, + "args": { + "External id": 3290723,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595910566.592, "dur": 173.326, + "args": { + "External id": 3290724,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595910646.302, "dur": 5.864, + "args": { + "External id": 3290725,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595910653.685, "dur": 3.978, + "args": { + "External id": 3290726,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595910751.744, "dur": 3.436, + "args": { + "External id": 3290727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595910758.038, "dur": 0.911, + "args": { + "External id": 3290728,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595910760.783, "dur": 0.718, + "args": { + "External id": 3290729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595910788.722, "dur": 293.290, + "args": { + "External id": 3290730,"Record function id": 0, "Sequence number": 32987213, "Fwd thread id": 1, "Ev Idx": 1129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595910790.209, "dur": 282.964, + "args": { + "External id": 3290731,"Sequence number": 32987213, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1130 + } + }, + { + "ph": "f", "id": 119, "pid": 1336756, "tid": 1381189, "ts": 1587595910790.209, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1381189, + "ts": 1587595910809.351, "dur": 42.975, + "args": { + "External id": 3290732,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595910812.038, "dur": 2.962, + "args": { + "External id": 3290733,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595910816.263, "dur": 35.527, + "args": { + "External id": 3290734,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 1133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595910861.003, "dur": 6.449, + "args": { + "External id": 3290735,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595910864.474, "dur": 2.673, + "args": { + "External id": 3290736,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595911092.397, "dur": 175.517, + "args": { + "External id": 3290737,"Record function id": 0, "Sequence number": 32987212, "Fwd thread id": 1, "Ev Idx": 1136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595911094.505, "dur": 168.167, + "args": { + "External id": 3290738,"Sequence number": 32987212, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1137 + } + }, + { + "ph": "f", "id": 120, "pid": 1336756, "tid": 1381189, "ts": 1587595911094.505, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1381189, + "ts": 1587595911108.762, "dur": 51.621, + "args": { + "External id": 3290739,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595911111.996, "dur": 3.926, + "args": { + "External id": 3290740,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595911121.904, "dur": 38.007, + "args": { + "External id": 3290741,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 1140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595911168.321, "dur": 6.001, + "args": { + "External id": 3290742,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595911171.833, "dur": 2.164, + "args": { + "External id": 3290743,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911276.383, "dur": 17.651, + "args": { + "External id": 3290744,"Record function id": 0, "Sequence number": 32987211, "Fwd thread id": 1, "Ev Idx": 1143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911278.334, "dur": 12.746, + "args": { + "External id": 3290745,"Sequence number": 32987211, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1144 + } + }, + { + "ph": "f", "id": 121, "pid": 1336756, "tid": 1381189, "ts": 1587595911278.334, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595911281.145, "dur": 9.661, + "args": { + "External id": 3290746,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595911282.694, "dur": 7.822, + "args": { + "External id": 3290747,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911297.777, "dur": 8.205, + "args": { + "External id": 3290748,"Record function id": 0, "Sequence number": 32987210, "Fwd thread id": 1, "Ev Idx": 1147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911298.634, "dur": 5.299, + "args": { + "External id": 3290749,"Sequence number": 32987210, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1148 + } + }, + { + "ph": "f", "id": 122, "pid": 1336756, "tid": 1381189, "ts": 1587595911298.634, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595911299.660, "dur": 4.120, + "args": { + "External id": 3290750,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595911302.801, "dur": 0.878, + "args": { + "External id": 3290751,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911309.353, "dur": 7.811, + "args": { + "External id": 3290752,"Record function id": 0, "Sequence number": 32987209, "Fwd thread id": 1, "Ev Idx": 1151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911310.301, "dur": 5.073, + "args": { + "External id": 3290753,"Sequence number": 32987209, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1152 + } + }, + { + "ph": "f", "id": 123, "pid": 1336756, "tid": 1381189, "ts": 1587595911310.301, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595911313.524, "dur": 1.713, + "args": { + "External id": 3290754,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595911314.418, "dur": 0.689, + "args": { + "External id": 3290755,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911320.717, "dur": 7.148, + "args": { + "External id": 3290756,"Record function id": 0, "Sequence number": 32987208, "Fwd thread id": 1, "Ev Idx": 1155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911321.562, "dur": 4.469, + "args": { + "External id": 3290757,"Sequence number": 32987208, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1156 + } + }, + { + "ph": "f", "id": 124, "pid": 1336756, "tid": 1381189, "ts": 1587595911321.562, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595911322.647, "dur": 3.245, + "args": { + "External id": 3290758,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595911324.882, "dur": 0.849, + "args": { + "External id": 3290759,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911331.047, "dur": 155.022, + "args": { + "External id": 3290760,"Record function id": 0, "Sequence number": 32987207, "Fwd thread id": 1, "Ev Idx": 1159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911332.092, "dur": 147.334, + "args": { + "External id": 3290761,"Sequence number": 32987207, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1160 + } + }, + { + "ph": "f", "id": 125, "pid": 1336756, "tid": 1381189, "ts": 1587595911332.092, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595911335.992, "dur": 9.177, + "args": { + "External id": 3290762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595911340.075, "dur": 4.386, + "args": { + "External id": 3290763,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595911342.581, "dur": 1.574, + "args": { + "External id": 3290764,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595911346.543, "dur": 70.291, + "args": { + "External id": 3290765,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595911417.932, "dur": 4.045, + "args": { + "External id": 3290766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595911418.635, "dur": 2.577, + "args": { + "External id": 3290767,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595911419.939, "dur": 1.098, + "args": { + "External id": 3290768,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595911423.932, "dur": 7.410, + "args": { + "External id": 3290769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595911427.385, "dur": 3.505, + "args": { + "External id": 3290770,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595911430.232, "dur": 0.536, + "args": { + "External id": 3290771,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595911432.125, "dur": 46.387, + "args": { + "External id": 3290772,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911493.324, "dur": 6.381, + "args": { + "External id": 3290773,"Record function id": 0, "Sequence number": 32987206, "Fwd thread id": 1, "Ev Idx": 1172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911494.398, "dur": 3.609, + "args": { + "External id": 3290774,"Sequence number": 32987206, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1173 + } + }, + { + "ph": "f", "id": 126, "pid": 1336756, "tid": 1381189, "ts": 1587595911494.398, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595911495.827, "dur": 2.022, + "args": { + "External id": 3290775,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595911496.485, "dur": 1.234, + "args": { + "External id": 3290776,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911502.978, "dur": 8.886, + "args": { + "External id": 3290777,"Record function id": 0, "Sequence number": 32987205, "Fwd thread id": 1, "Ev Idx": 1176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911503.818, "dur": 6.158, + "args": { + "External id": 3290778,"Sequence number": 32987205, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1177 + } + }, + { + "ph": "f", "id": 127, "pid": 1336756, "tid": 1381189, "ts": 1587595911503.818, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595911506.841, "dur": 2.948, + "args": { + "External id": 3290779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595911507.692, "dur": 1.597, + "args": { + "External id": 3290780,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595911508.622, "dur": 0.478, + "args": { + "External id": 3290781,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595911519.576, "dur": 9.268, + "args": { + "External id": 3290782,"Record function id": 0, "Ev Idx": 1181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595911521.092, "dur": 6.895, + "args": { + "External id": 3290783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595911523.614, "dur": 3.970, + "args": { + "External id": 3290784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595911524.760, "dur": 2.691, + "args": { + "External id": 3290785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911531.946, "dur": 7.740, + "args": { + "External id": 3290786,"Record function id": 0, "Sequence number": 32987204, "Fwd thread id": 1, "Ev Idx": 1185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911532.791, "dur": 5.155, + "args": { + "External id": 3290787,"Sequence number": 32987204, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1186 + } + }, + { + "ph": "f", "id": 128, "pid": 1336756, "tid": 1381189, "ts": 1587595911532.791, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595911534.178, "dur": 3.622, + "args": { + "External id": 3290788,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595911536.934, "dur": 0.739, + "args": { + "External id": 3290789,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911542.863, "dur": 94.291, + "args": { + "External id": 3290790,"Record function id": 0, "Sequence number": 32987203, "Fwd thread id": 1, "Ev Idx": 1189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911543.956, "dur": 88.330, + "args": { + "External id": 3290791,"Sequence number": 32987203, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1190 + } + }, + { + "ph": "f", "id": 129, "pid": 1336756, "tid": 1381189, "ts": 1587595911543.956, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595911546.422, "dur": 2.461, + "args": { + "External id": 3290792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595911547.127, "dur": 1.356, + "args": { + "External id": 3290793,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595911547.945, "dur": 0.387, + "args": { + "External id": 3290794,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595911549.446, "dur": 30.520, + "args": { + "External id": 3290795,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595911582.854, "dur": 5.322, + "args": { + "External id": 3290796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595911583.462, "dur": 4.174, + "args": { + "External id": 3290797,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595911586.739, "dur": 0.769, + "args": { + "External id": 3290798,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595911589.268, "dur": 5.132, + "args": { + "External id": 3290799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595911590.711, "dur": 3.288, + "args": { + "External id": 3290800,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595911591.535, "dur": 2.395, + "args": { + "External id": 3290801,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595911596.880, "dur": 34.791, + "args": { + "External id": 3290802,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911641.466, "dur": 34.574, + "args": { + "External id": 3290803,"Record function id": 0, "Sequence number": 32987202, "Fwd thread id": 1, "Ev Idx": 1202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911642.483, "dur": 5.234, + "args": { + "External id": 3290804,"Sequence number": 32987202, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1203 + } + }, + { + "ph": "f", "id": 130, "pid": 1336756, "tid": 1381189, "ts": 1587595911642.483, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595911643.543, "dur": 4.024, + "args": { + "External id": 3290805,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595911646.053, "dur": 1.378, + "args": { + "External id": 3290806,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1381189, + "ts": 1587595911650.872, "dur": 23.008, + "args": { + "External id": 3290807,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911681.676, "dur": 9.547, + "args": { + "External id": 3290808,"Record function id": 0, "Sequence number": 32987201, "Fwd thread id": 1, "Ev Idx": 1207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911682.543, "dur": 6.742, + "args": { + "External id": 3290809,"Sequence number": 32987201, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1208 + } + }, + { + "ph": "f", "id": 131, "pid": 1336756, "tid": 1381189, "ts": 1587595911682.543, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595911683.574, "dur": 5.502, + "args": { + "External id": 3290810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595911686.447, "dur": 2.112, + "args": { + "External id": 3290811,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595911687.682, "dur": 0.756, + "args": { + "External id": 3290812,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595911695.013, "dur": 5.446, + "args": { + "External id": 3290813,"Record function id": 0, "Ev Idx": 1212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595911696.305, "dur": 3.574, + "args": { + "External id": 3290814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595911697.479, "dur": 2.012, + "args": { + "External id": 3290815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595911697.978, "dur": 1.437, + "args": { + "External id": 3290816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911703.537, "dur": 10.972, + "args": { + "External id": 3290817,"Record function id": 0, "Sequence number": 32987200, "Fwd thread id": 1, "Ev Idx": 1216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911704.400, "dur": 8.079, + "args": { + "External id": 3290818,"Sequence number": 32987200, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1217 + } + }, + { + "ph": "f", "id": 132, "pid": 1336756, "tid": 1381189, "ts": 1587595911704.400, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595911705.556, "dur": 6.770, + "args": { + "External id": 3290819,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595911711.417, "dur": 0.813, + "args": { + "External id": 3290820,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911720.253, "dur": 88.074, + "args": { + "External id": 3290821,"Record function id": 0, "Sequence number": 32987199, "Fwd thread id": 1, "Ev Idx": 1220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911721.141, "dur": 80.279, + "args": { + "External id": 3290822,"Sequence number": 32987199, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1221 + } + }, + { + "ph": "f", "id": 133, "pid": 1336756, "tid": 1381189, "ts": 1587595911721.141, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595911722.786, "dur": 2.669, + "args": { + "External id": 3290823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595911723.522, "dur": 1.524, + "args": { + "External id": 3290824,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595911724.381, "dur": 0.536, + "args": { + "External id": 3290825,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595911726.205, "dur": 28.384, + "args": { + "External id": 3290826,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595911755.612, "dur": 6.681, + "args": { + "External id": 3290827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595911758.135, "dur": 3.499, + "args": { + "External id": 3290828,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595911758.880, "dur": 2.563, + "args": { + "External id": 3290829,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595911763.438, "dur": 4.850, + "args": { + "External id": 3290830,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595911764.192, "dur": 3.667, + "args": { + "External id": 3290831,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595911766.981, "dur": 0.751, + "args": { + "External id": 3290832,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595911768.746, "dur": 32.011, + "args": { + "External id": 3290833,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911812.668, "dur": 25.336, + "args": { + "External id": 3290834,"Record function id": 0, "Sequence number": 32987198, "Fwd thread id": 1, "Ev Idx": 1233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911813.550, "dur": 5.542, + "args": { + "External id": 3290835,"Sequence number": 32987198, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1234 + } + }, + { + "ph": "f", "id": 134, "pid": 1336756, "tid": 1381189, "ts": 1587595911813.550, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595911816.995, "dur": 1.948, + "args": { + "External id": 3290836,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595911817.787, "dur": 1.061, + "args": { + "External id": 3290837,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595911821.373, "dur": 14.687, + "args": { + "External id": 3290838,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911841.643, "dur": 7.106, + "args": { + "External id": 3290839,"Record function id": 0, "Sequence number": 32987197, "Fwd thread id": 1, "Ev Idx": 1238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595911842.833, "dur": 3.856, + "args": { + "External id": 3290840,"Sequence number": 32987197, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1239 + } + }, + { + "ph": "f", "id": 135, "pid": 1336756, "tid": 1381189, "ts": 1587595911842.833, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595911843.677, "dur": 2.825, + "args": { + "External id": 3290841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595911844.451, "dur": 1.578, + "args": { + "External id": 3290842,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595911845.433, "dur": 0.478, + "args": { + "External id": 3290843,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595911854.331, "dur": 6.293, + "args": { + "External id": 3290844,"Record function id": 0, "Ev Idx": 1243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595911855.449, "dur": 4.718, + "args": { + "External id": 3290845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595911856.369, "dur": 3.514, + "args": { + "External id": 3290846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595911858.609, "dur": 1.184, + "args": { + "External id": 3290847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595911866.462, "dur": 407.310, + "args": { + "External id": 3290848,"Record function id": 0, "Sequence number": 32987196, "Fwd thread id": 1, "Ev Idx": 1247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595911867.569, "dur": 374.445, + "args": { + "External id": 3290849,"Sequence number": 32987196, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1248 + } + }, + { + "ph": "f", "id": 136, "pid": 1336756, "tid": 1381189, "ts": 1587595911867.569, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595911916.930, "dur": 2.653, + "args": { + "External id": 3290850,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595911917.611, "dur": 1.679, + "args": { + "External id": 3290851,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595911933.654, "dur": 6.434, + "args": { + "External id": 3290852,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595911949.199, "dur": 2.067, + "args": { + "External id": 3290853,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595912138.796, "dur": 2.739, + "args": { + "External id": 3290854,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 1253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595912145.796, "dur": 35.803, + "args": { + "External id": 3290855,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 1254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912155.401, "dur": 1.244, + "args": { + "External id": 3290856,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 1255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587595912187.021, "dur": 32.346, + "args": { + "External id": 3290857,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 1256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587595912188.876, "dur": 30.288, + "args": { + "External id": 3290858,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 1257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912194.922, "dur": 4.819, + "args": { + "External id": 3290859,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595912201.491, "dur": 17.166, + "args": { + "External id": 3290860,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 1259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1381189, + "ts": 1587595912224.083, "dur": 4.712, + "args": { + "External id": 3290861,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 1260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595912227.714, "dur": 0.981, + "args": { + "External id": 3290862,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 1261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595912234.056, "dur": 1.599, + "args": { + "External id": 3290863,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595912234.771, "dur": 0.753, + "args": { + "External id": 3290864,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595912252.170, "dur": 17.856, + "args": { + "External id": 3290865,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595912286.507, "dur": 9.150, + "args": { + "External id": 3290866,"Record function id": 0, "Ev Idx": 1265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595912288.570, "dur": 6.409, + "args": { + "External id": 3290867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595912290.814, "dur": 3.249, + "args": { + "External id": 3290868,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595912292.015, "dur": 1.925, + "args": { + "External id": 3290869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595912299.495, "dur": 7.982, + "args": { + "External id": 3290870,"Record function id": 0, "Sequence number": 32987195, "Fwd thread id": 1, "Ev Idx": 1269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595912303.519, "dur": 1.382, + "args": { + "External id": 3290871,"Sequence number": 32987195, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1270 + } + }, + { + "ph": "f", "id": 137, "pid": 1336756, "tid": 1381189, "ts": 1587595912303.519, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595912310.792, "dur": 402.632, + "args": { + "External id": 3290872,"Record function id": 0, "Sequence number": 32987194, "Fwd thread id": 1, "Ev Idx": 1271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595912313.726, "dur": 389.514, + "args": { + "External id": 3290873,"Sequence number": 32987194, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1272 + } + }, + { + "ph": "f", "id": 138, "pid": 1336756, "tid": 1381189, "ts": 1587595912313.726, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595912342.170, "dur": 7.689, + "args": { + "External id": 3290874,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595912346.705, "dur": 2.831, + "args": { + "External id": 3290875,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]", "[8192, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[33554432, 8192, 1], [], []], "Input Dims": [[16, 4096, 2048], [], []], "Ev Idx": 1274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595912353.051, "dur": 9.123, + "args": { + "External id": 3290876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595912354.734, "dur": 6.811, + "args": { + "External id": 3290877,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912358.234, "dur": 3.174, + "args": { + "External id": 3290878,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1381189, + "ts": 1587595912365.794, "dur": 86.132, + "args": { + "External id": 3290879,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8192, 1], [1, 5632], []], "Input Dims": [[65536, 2048], [5632, 2048], []], "Ev Idx": 1278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595912366.658, "dur": 5.896, + "args": { + "External id": 3290880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 5632]], "Input Dims": [[5632, 2048]], "Ev Idx": 1279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595912367.431, "dur": 4.637, + "args": { + "External id": 3290881,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 5632], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 1280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912371.534, "dur": 0.439, + "args": { + "External id": 3290882,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[5632, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 5632], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 1281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1381189, + "ts": 1587595912373.627, "dur": 77.756, + "args": { + "External id": 3290883,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 1282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595912374.938, "dur": 75.456, + "args": { + "External id": 3290884,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8192, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 1283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1381189, + "ts": 1587595912458.280, "dur": 2.544, + "args": { + "External id": 3290885,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [23068672, 5632, 1]], "Input Dims": [[65536, 5632], [16, 4096, 5632]], "Ev Idx": 1284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595912459.333, "dur": 1.349, + "args": { + "External id": 3290886,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 1285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595912491.817, "dur": 5.118, + "args": { + "External id": 3290887,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595912500.176, "dur": 2.046, + "args": { + "External id": 3290888,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595912502.900, "dur": 1.569, + "args": { + "External id": 3290889,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595912537.197, "dur": 2.277, + "args": { + "External id": 3290890,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595912537.985, "dur": 1.317, + "args": { + "External id": 3290891,"Record function id": 0, "Concrete Inputs": ["", "[-1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 1336756, "tid": 1381189, + "ts": 1587595912558.566, "dur": 125.206, + "args": { + "External id": 3290892,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[8192, 1], [5632, 1]], []], "Input Dims": [[], [[65536, 2048], [65536, 5632]], []], "Ev Idx": 1291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1381189, + "ts": 1587595912563.764, "dur": 11.193, + "args": { + "External id": 3290893,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912569.698, "dur": 4.326, + "args": { + "External id": 3290894,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048, 1]", "[8192, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595912576.523, "dur": 6.550, + "args": { + "External id": 3290895,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8192, 1, 1], []], "Input Dims": [[65536, 2048, 1], []], "Ev Idx": 1294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912581.629, "dur": 0.668, + "args": { + "External id": 3290896,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 65536]", "[1, 1, 8192]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[65536, 2048, 1], [], [], []], "Ev Idx": 1295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1381189, + "ts": 1587595912584.640, "dur": 2.038, + "args": { + "External id": 3290897,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 1296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912585.971, "dur": 0.396, + "args": { + "External id": 3290898,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 1297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595912589.026, "dur": 2.477, + "args": { + "External id": 3290899,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 1298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912590.275, "dur": 0.657, + "args": { + "External id": 3290900,"Record function id": 0, "Concrete Inputs": ["", "[1, 5632, 65536]", "[1, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1, 1], [], [], []], "Input Dims": [[65536, 5632, 1], [], [], []], "Ev Idx": 1299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595912596.801, "dur": 4.177, + "args": { + "External id": 3290901,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 8192], []], "Input Dims": [[2048, 1, 65536], []], "Ev Idx": 1300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912600.080, "dur": 0.599, + "args": { + "External id": 3290902,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536, 1]", "[1, 8192, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 8192], [], [], []], "Input Dims": [[2048, 1, 65536], [], [], []], "Ev Idx": 1301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595912601.703, "dur": 6.559, + "args": { + "External id": 3290903,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 8192, 1], []], "Input Dims": [[2048, 65536, 1], []], "Ev Idx": 1302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595912606.400, "dur": 1.694, + "args": { + "External id": 3290904,"Record function id": 0, "Concrete Inputs": ["", "[1, 2048, 65536]", "[2048, 1, 8192]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 8192, 1], [], []], "Input Dims": [[2048, 65536, 1], [], []], "Ev Idx": 1303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595912608.898, "dur": 3.890, + "args": { + "External id": 3290905,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 5632], []], "Input Dims": [[1, 5632, 65536], []], "Ev Idx": 1304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912611.964, "dur": 0.575, + "args": { + "External id": 3290906,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632, 1]", "[5632, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 5632], [], [], []], "Input Dims": [[1, 5632, 65536], [], [], []], "Ev Idx": 1305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595912613.498, "dur": 3.992, + "args": { + "External id": 3290907,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 1306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595912614.424, "dur": 2.980, + "args": { + "External id": 3290908,"Record function id": 0, "Concrete Inputs": ["", "[1, 65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 1], []], "Input Dims": [[65536, 5632, 1], []], "Ev Idx": 1307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587595912618.857, "dur": 51.475, + "args": { + "External id": 3290909,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1, 8192], [369098752, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632]], "Ev Idx": 1308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595912674.210, "dur": 1.216, + "args": { + "External id": 3290910,"Record function id": 0, "Concrete Inputs": ["", "[2048, 1, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[11534336, 5632, 1], []], "Input Dims": [[1, 2048, 5632], []], "Ev Idx": 1309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 1336756, "tid": 1381189, + "ts": 1587595912675.880, "dur": 2.419, + "args": { + "External id": 3290911,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 5632, 1], []], "Input Dims": [[2048, 1, 5632], []], "Ev Idx": 1310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912676.986, "dur": 0.656, + "args": { + "External id": 3290912,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632, 1]", "[5632, 1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 5632, 1], [], [], []], "Input Dims": [[2048, 1, 5632], [], [], []], "Ev Idx": 1311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595912681.937, "dur": 0.711, + "args": { + "External id": 3290913,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1, 5632], []], "Input Dims": [[2048, 5632, 1], []], "Ev Idx": 1312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595912722.461, "dur": 7.960, + "args": { + "External id": 3290914,"Record function id": 0, "Ev Idx": 1313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595912724.334, "dur": 5.520, + "args": { + "External id": 3290915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595912726.101, "dur": 3.039, + "args": { + "External id": 3290916,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595912727.056, "dur": 1.996, + "args": { + "External id": 3290917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595912734.324, "dur": 6.336, + "args": { + "External id": 3290918,"Record function id": 0, "Sequence number": 32987193, "Fwd thread id": 1, "Ev Idx": 1317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595912735.338, "dur": 3.355, + "args": { + "External id": 3290919,"Sequence number": 32987193, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 1318 + } + }, + { + "ph": "f", "id": 139, "pid": 1336756, "tid": 1381189, "ts": 1587595912735.338, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595912736.855, "dur": 1.661, + "args": { + "External id": 3290920,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595912737.505, "dur": 0.877, + "args": { + "External id": 3290921,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595912743.966, "dur": 118.918, + "args": { + "External id": 3290922,"Record function id": 0, "Sequence number": 32987192, "Fwd thread id": 1, "Ev Idx": 1321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595912744.852, "dur": 112.507, + "args": { + "External id": 3290923,"Sequence number": 32987192, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 1322 + } + }, + { + "ph": "f", "id": 140, "pid": 1336756, "tid": 1381189, "ts": 1587595912744.852, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595912749.818, "dur": 3.794, + "args": { + "External id": 3290924,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 1323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595912750.752, "dur": 2.340, + "args": { + "External id": 3290925,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 1324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912751.869, "dur": 0.946, + "args": { + "External id": 3290926,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 1325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595912754.709, "dur": 39.080, + "args": { + "External id": 3290927,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 1326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595912795.044, "dur": 8.113, + "args": { + "External id": 3290928,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595912795.965, "dur": 6.420, + "args": { + "External id": 3290929,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 1328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912801.167, "dur": 1.059, + "args": { + "External id": 3290930,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 1329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595912804.493, "dur": 5.083, + "args": { + "External id": 3290931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595912805.968, "dur": 3.203, + "args": { + "External id": 3290932,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912806.753, "dur": 2.348, + "args": { + "External id": 3290933,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595912809.998, "dur": 46.579, + "args": { + "External id": 3290934,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 1333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595912867.437, "dur": 32.668, + "args": { + "External id": 3290935,"Record function id": 0, "Sequence number": 32987191, "Fwd thread id": 1, "Ev Idx": 1334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595912868.421, "dur": 29.308, + "args": { + "External id": 3290936,"Sequence number": 32987191, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1335 + } + }, + { + "ph": "f", "id": 141, "pid": 1336756, "tid": 1381189, "ts": 1587595912868.421, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595912892.203, "dur": 5.334, + "args": { + "External id": 3290937,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595912895.147, "dur": 2.120, + "args": { + "External id": 3290938,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595912905.015, "dur": 9.094, + "args": { + "External id": 3290939,"Record function id": 0, "Sequence number": 32987190, "Fwd thread id": 1, "Ev Idx": 1338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595912906.076, "dur": 6.028, + "args": { + "External id": 3290940,"Sequence number": 32987190, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1339 + } + }, + { + "ph": "f", "id": 142, "pid": 1336756, "tid": 1381189, "ts": 1587595912906.076, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595912906.827, "dur": 5.052, + "args": { + "External id": 3290941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595912907.960, "dur": 3.432, + "args": { + "External id": 3290942,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912910.790, "dur": 0.470, + "args": { + "External id": 3290943,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595912918.020, "dur": 5.869, + "args": { + "External id": 3290944,"Record function id": 0, "Ev Idx": 1343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595912919.349, "dur": 4.035, + "args": { + "External id": 3290945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595912920.749, "dur": 2.376, + "args": { + "External id": 3290946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595912921.529, "dur": 1.502, + "args": { + "External id": 3290947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595912927.098, "dur": 7.402, + "args": { + "External id": 3290948,"Record function id": 0, "Sequence number": 32987189, "Fwd thread id": 1, "Ev Idx": 1347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595912928.087, "dur": 4.457, + "args": { + "External id": 3290949,"Sequence number": 32987189, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[23068672, 5632, 1]], "Input Dims": [[16, 4096, 5632]], "Ev Idx": 1348 + } + }, + { + "ph": "f", "id": 143, "pid": 1336756, "tid": 1381189, "ts": 1587595912928.087, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595912931.053, "dur": 1.355, + "args": { + "External id": 3290950,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595912931.647, "dur": 0.627, + "args": { + "External id": 3290951,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 1350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595912937.282, "dur": 166.778, + "args": { + "External id": 3290952,"Record function id": 0, "Sequence number": 32987188, "Fwd thread id": 1, "Ev Idx": 1351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595912938.049, "dur": 156.432, + "args": { + "External id": 3290953,"Sequence number": 32987188, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 1352 + } + }, + { + "ph": "f", "id": 144, "pid": 1336756, "tid": 1381189, "ts": 1587595912938.049, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595912939.680, "dur": 6.889, + "args": { + "External id": 3290954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[65536, 5632]], "Ev Idx": 1353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595912940.455, "dur": 5.690, + "args": { + "External id": 3290955,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[65536, 5632], [], []], "Ev Idx": 1354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595912945.402, "dur": 0.600, + "args": { + "External id": 3290956,"Record function id": 0, "Concrete Inputs": ["", "[5632, 65536]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[65536, 5632], [], [], []], "Ev Idx": 1355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595912947.247, "dur": 77.730, + "args": { + "External id": 3290957,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048]], "Ev Idx": 1356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595913027.330, "dur": 6.188, + "args": { + "External id": 3290958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595913028.570, "dur": 4.151, + "args": { + "External id": 3290959,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 1358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595913029.808, "dur": 2.722, + "args": { + "External id": 3290960,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 1359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595913034.901, "dur": 5.443, + "args": { + "External id": 3290961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595913036.274, "dur": 3.616, + "args": { + "External id": 3290962,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595913039.140, "dur": 0.667, + "args": { + "External id": 3290963,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595913040.864, "dur": 52.968, + "args": { + "External id": 3290964,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 1363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595913112.248, "dur": 36.598, + "args": { + "External id": 3290965,"Record function id": 0, "Sequence number": 32987187, "Fwd thread id": 1, "Ev Idx": 1364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595913113.830, "dur": 5.941, + "args": { + "External id": 3290966,"Sequence number": 32987187, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1365 + } + }, + { + "ph": "f", "id": 145, "pid": 1336756, "tid": 1381189, "ts": 1587595913113.830, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595913116.971, "dur": 2.650, + "args": { + "External id": 3290967,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595913117.933, "dur": 1.567, + "args": { + "External id": 3290968,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1381189, + "ts": 1587595913122.870, "dur": 23.088, + "args": { + "External id": 3290969,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595913152.667, "dur": 11.136, + "args": { + "External id": 3290970,"Record function id": 0, "Sequence number": 32987186, "Fwd thread id": 1, "Ev Idx": 1369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595913153.987, "dur": 7.375, + "args": { + "External id": 3290971,"Sequence number": 32987186, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1370 + } + }, + { + "ph": "f", "id": 146, "pid": 1336756, "tid": 1381189, "ts": 1587595913153.987, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595913154.952, "dur": 6.201, + "args": { + "External id": 3290972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 5632]], "Ev Idx": 1371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595913158.721, "dur": 1.895, + "args": { + "External id": 3290973,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 1372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595913159.937, "dur": 0.574, + "args": { + "External id": 3290974,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 1373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595913167.864, "dur": 9.280, + "args": { + "External id": 3290975,"Record function id": 0, "Ev Idx": 1374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595913172.485, "dur": 4.129, + "args": { + "External id": 3290976,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595913173.994, "dur": 2.300, + "args": { + "External id": 3290977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595913174.779, "dur": 1.417, + "args": { + "External id": 3290978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595913181.291, "dur": 419.337, + "args": { + "External id": 3290979,"Record function id": 0, "Sequence number": 32987185, "Fwd thread id": 1, "Ev Idx": 1378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595913186.464, "dur": 376.651, + "args": { + "External id": 3290980,"Sequence number": 32987185, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 1379 + } + }, + { + "ph": "f", "id": 147, "pid": 1336756, "tid": 1381189, "ts": 1587595913186.464, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336756, "tid": 1381189, + "ts": 1587595913209.967, "dur": 34.636, + "args": { + "External id": 3290981,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1381189, + "ts": 1587595913211.439, "dur": 32.973, + "args": { + "External id": 3290982,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595913214.172, "dur": 5.628, + "args": { + "External id": 3290983,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], [], []], "Ev Idx": 1382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595913216.539, "dur": 2.808, + "args": { + "External id": 3290984,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595913221.370, "dur": 22.562, + "args": { + "External id": 3290985,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595913257.897, "dur": 6.422, + "args": { + "External id": 3290986,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595913260.818, "dur": 3.331, + "args": { + "External id": 3290987,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595913268.047, "dur": 1.629, + "args": { + "External id": 3290988,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595913268.663, "dur": 0.900, + "args": { + "External id": 3290989,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595913282.716, "dur": 2.766, + "args": { + "External id": 3290990,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595913296.153, "dur": 3.192, + "args": { + "External id": 3290991,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595913452.158, "dur": 3.838, + "args": { + "External id": 3290992,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 1391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595913459.951, "dur": 32.949, + "args": { + "External id": 3290993,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 1392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595913470.504, "dur": 0.840, + "args": { + "External id": 3290994,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 1393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587595913498.248, "dur": 29.911, + "args": { + "External id": 3290995,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 1394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587595913500.213, "dur": 27.739, + "args": { + "External id": 3290996,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 1395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595913504.022, "dur": 3.825, + "args": { + "External id": 3290997,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595913511.493, "dur": 15.881, + "args": { + "External id": 3290998,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 1397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1381189, + "ts": 1587595913532.177, "dur": 4.434, + "args": { + "External id": 3290999,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 1398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595913535.336, "dur": 1.134, + "args": { + "External id": 3291000,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 1399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595913542.495, "dur": 4.114, + "args": { + "External id": 3291001,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595913543.419, "dur": 3.078, + "args": { + "External id": 3291002,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595913548.326, "dur": 3.849, + "args": { + "External id": 3291003,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595913551.184, "dur": 0.911, + "args": { + "External id": 3291004,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595913576.335, "dur": 22.732, + "args": { + "External id": 3291005,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595913611.751, "dur": 7.118, + "args": { + "External id": 3291006,"Record function id": 0, "Ev Idx": 1405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595913613.531, "dur": 4.626, + "args": { + "External id": 3291007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595913614.785, "dur": 2.662, + "args": { + "External id": 3291008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595913615.571, "dur": 1.730, + "args": { + "External id": 3291009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595913622.482, "dur": 8.892, + "args": { + "External id": 3291010,"Record function id": 0, "Sequence number": 32987184, "Fwd thread id": 1, "Ev Idx": 1409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595913623.626, "dur": 5.585, + "args": { + "External id": 3291011,"Sequence number": 32987184, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1410 + } + }, + { + "ph": "f", "id": 148, "pid": 1336756, "tid": 1381189, "ts": 1587595913623.626, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595913624.979, "dur": 4.090, + "args": { + "External id": 3291012,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595913627.583, "dur": 1.321, + "args": { + "External id": 3291013,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595913634.665, "dur": 129.508, + "args": { + "External id": 3291014,"Record function id": 0, "Sequence number": 32987183, "Fwd thread id": 1, "Ev Idx": 1413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595913635.424, "dur": 123.153, + "args": { + "External id": 3291015,"Sequence number": 32987183, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1414 + } + }, + { + "ph": "f", "id": 149, "pid": 1336756, "tid": 1381189, "ts": 1587595913635.424, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595913638.071, "dur": 4.338, + "args": { + "External id": 3291016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595913639.305, "dur": 2.488, + "args": { + "External id": 3291017,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595913640.696, "dur": 0.899, + "args": { + "External id": 3291018,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595913643.498, "dur": 54.525, + "args": { + "External id": 3291019,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595913701.486, "dur": 7.480, + "args": { + "External id": 3291020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595913702.162, "dur": 6.266, + "args": { + "External id": 3291021,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595913705.736, "dur": 2.511, + "args": { + "External id": 3291022,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595913710.368, "dur": 2.738, + "args": { + "External id": 3291023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595913711.288, "dur": 1.381, + "args": { + "External id": 3291024,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595913712.137, "dur": 0.467, + "args": { + "External id": 3291025,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595913716.206, "dur": 41.451, + "args": { + "External id": 3291026,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595913769.369, "dur": 11.373, + "args": { + "External id": 3291027,"Record function id": 0, "Sequence number": 32987182, "Fwd thread id": 1, "Ev Idx": 1426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595913770.809, "dur": 7.979, + "args": { + "External id": 3291028,"Sequence number": 32987182, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1427 + } + }, + { + "ph": "f", "id": 150, "pid": 1336756, "tid": 1381189, "ts": 1587595913770.809, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595913772.371, "dur": 6.272, + "args": { + "External id": 3291029,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595913775.557, "dur": 2.945, + "args": { + "External id": 3291030,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595913783.854, "dur": 9.208, + "args": { + "External id": 3291031,"Record function id": 0, "Sequence number": 32987181, "Fwd thread id": 1, "Ev Idx": 1430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595913785.236, "dur": 6.241, + "args": { + "External id": 3291032,"Sequence number": 32987181, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1431 + } + }, + { + "ph": "f", "id": 151, "pid": 1336756, "tid": 1381189, "ts": 1587595913785.236, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595913786.321, "dur": 4.960, + "args": { + "External id": 3291033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595913787.142, "dur": 3.636, + "args": { + "External id": 3291034,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595913790.232, "dur": 0.457, + "args": { + "External id": 3291035,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595913796.976, "dur": 5.317, + "args": { + "External id": 3291036,"Record function id": 0, "Ev Idx": 1435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595913798.575, "dur": 3.165, + "args": { + "External id": 3291037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595913799.686, "dur": 1.699, + "args": { + "External id": 3291038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595913800.340, "dur": 0.956, + "args": { + "External id": 3291039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595913805.187, "dur": 6.116, + "args": { + "External id": 3291040,"Record function id": 0, "Sequence number": 32987180, "Fwd thread id": 1, "Ev Idx": 1439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595913805.893, "dur": 3.268, + "args": { + "External id": 3291041,"Sequence number": 32987180, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1440 + } + }, + { + "ph": "f", "id": 152, "pid": 1336756, "tid": 1381189, "ts": 1587595913805.893, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595913807.180, "dur": 1.825, + "args": { + "External id": 3291042,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595913807.711, "dur": 1.163, + "args": { + "External id": 3291043,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595913815.002, "dur": 354.202, + "args": { + "External id": 3291044,"Record function id": 0, "Sequence number": 32987179, "Fwd thread id": 1, "Ev Idx": 1443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595913818.125, "dur": 332.738, + "args": { + "External id": 3291045,"Sequence number": 32987179, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1444 + } + }, + { + "ph": "f", "id": 153, "pid": 1336756, "tid": 1381189, "ts": 1587595913818.125, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595913830.878, "dur": 8.097, + "args": { + "External id": 3291046,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595913835.097, "dur": 3.466, + "args": { + "External id": 3291047,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595913840.903, "dur": 5.010, + "args": { + "External id": 3291048,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595913843.683, "dur": 2.000, + "args": { + "External id": 3291049,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595913847.253, "dur": 2.523, + "args": { + "External id": 3291050,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595913847.849, "dur": 1.719, + "args": { + "External id": 3291051,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595913891.625, "dur": 232.306, + "args": { + "External id": 3291052,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595913976.772, "dur": 5.884, + "args": { + "External id": 3291053,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595914022.726, "dur": 6.269, + "args": { + "External id": 3291054,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595914137.325, "dur": 3.866, + "args": { + "External id": 3291055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595914144.314, "dur": 0.878, + "args": { + "External id": 3291056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1381189, + "ts": 1587595914147.280, "dur": 0.827, + "args": { + "External id": 3291057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595914179.519, "dur": 226.188, + "args": { + "External id": 3291058,"Record function id": 0, "Sequence number": 32987178, "Fwd thread id": 1, "Ev Idx": 1457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595914181.262, "dur": 218.283, + "args": { + "External id": 3291059,"Sequence number": 32987178, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1458 + } + }, + { + "ph": "f", "id": 154, "pid": 1336756, "tid": 1381189, "ts": 1587595914181.262, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1381189, + "ts": 1587595914201.555, "dur": 53.429, + "args": { + "External id": 3291060,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595914206.628, "dur": 3.283, + "args": { + "External id": 3291061,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595914211.582, "dur": 42.885, + "args": { + "External id": 3291062,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 1461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595914263.930, "dur": 4.712, + "args": { + "External id": 3291063,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595914265.509, "dur": 2.761, + "args": { + "External id": 3291064,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595914412.664, "dur": 148.970, + "args": { + "External id": 3291065,"Record function id": 0, "Sequence number": 32987177, "Fwd thread id": 1, "Ev Idx": 1464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595914416.877, "dur": 139.167, + "args": { + "External id": 3291066,"Sequence number": 32987177, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1465 + } + }, + { + "ph": "f", "id": 155, "pid": 1336756, "tid": 1381189, "ts": 1587595914416.877, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1381189, + "ts": 1587595914428.534, "dur": 32.285, + "args": { + "External id": 3291067,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595914430.240, "dur": 3.111, + "args": { + "External id": 3291068,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595914434.426, "dur": 25.977, + "args": { + "External id": 3291069,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], []], "Ev Idx": 1468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1381189, + "ts": 1587595914467.235, "dur": 3.993, + "args": { + "External id": 3291070,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 1469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595914468.155, "dur": 2.735, + "args": { + "External id": 3291071,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914567.988, "dur": 14.847, + "args": { + "External id": 3291072,"Record function id": 0, "Sequence number": 32987176, "Fwd thread id": 1, "Ev Idx": 1471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914569.238, "dur": 10.982, + "args": { + "External id": 3291073,"Sequence number": 32987176, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1472 + } + }, + { + "ph": "f", "id": 156, "pid": 1336756, "tid": 1381189, "ts": 1587595914569.238, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595914573.055, "dur": 6.879, + "args": { + "External id": 3291074,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595914574.126, "dur": 5.618, + "args": { + "External id": 3291075,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914586.146, "dur": 6.286, + "args": { + "External id": 3291076,"Record function id": 0, "Sequence number": 32987175, "Fwd thread id": 1, "Ev Idx": 1475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914587.419, "dur": 3.526, + "args": { + "External id": 3291077,"Sequence number": 32987175, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1476 + } + }, + { + "ph": "f", "id": 157, "pid": 1336756, "tid": 1381189, "ts": 1587595914587.419, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595914588.946, "dur": 1.861, + "args": { + "External id": 3291078,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595914589.815, "dur": 0.854, + "args": { + "External id": 3291079,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914595.581, "dur": 9.192, + "args": { + "External id": 3291080,"Record function id": 0, "Sequence number": 32987174, "Fwd thread id": 1, "Ev Idx": 1479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914596.477, "dur": 6.657, + "args": { + "External id": 3291081,"Sequence number": 32987174, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 1480 + } + }, + { + "ph": "f", "id": 158, "pid": 1336756, "tid": 1381189, "ts": 1587595914596.477, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595914601.004, "dur": 1.988, + "args": { + "External id": 3291082,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595914601.728, "dur": 1.130, + "args": { + "External id": 3291083,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 1482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914607.955, "dur": 5.531, + "args": { + "External id": 3291084,"Record function id": 0, "Sequence number": 32987173, "Fwd thread id": 1, "Ev Idx": 1483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914608.764, "dur": 2.806, + "args": { + "External id": 3291085,"Sequence number": 32987173, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1484 + } + }, + { + "ph": "f", "id": 159, "pid": 1336756, "tid": 1381189, "ts": 1587595914608.764, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595914609.804, "dur": 1.606, + "args": { + "External id": 3291086,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595914610.465, "dur": 0.809, + "args": { + "External id": 3291087,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914617.025, "dur": 147.059, + "args": { + "External id": 3291088,"Record function id": 0, "Sequence number": 32987172, "Fwd thread id": 1, "Ev Idx": 1487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914617.873, "dur": 139.214, + "args": { + "External id": 3291089,"Sequence number": 32987172, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1488 + } + }, + { + "ph": "f", "id": 160, "pid": 1336756, "tid": 1381189, "ts": 1587595914617.873, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595914621.253, "dur": 10.152, + "args": { + "External id": 3291090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595914625.199, "dur": 5.519, + "args": { + "External id": 3291091,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595914629.043, "dur": 1.434, + "args": { + "External id": 3291092,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595914632.906, "dur": 64.661, + "args": { + "External id": 3291093,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595914698.855, "dur": 4.814, + "args": { + "External id": 3291094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595914699.696, "dur": 3.090, + "args": { + "External id": 3291095,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595914701.074, "dur": 1.548, + "args": { + "External id": 3291096,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595914705.498, "dur": 5.089, + "args": { + "External id": 3291097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595914708.728, "dur": 1.219, + "args": { + "External id": 3291098,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595914709.501, "dur": 0.361, + "args": { + "External id": 3291099,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595914711.169, "dur": 45.018, + "args": { + "External id": 3291100,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914768.717, "dur": 9.270, + "args": { + "External id": 3291101,"Record function id": 0, "Sequence number": 32987171, "Fwd thread id": 1, "Ev Idx": 1500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914770.117, "dur": 5.963, + "args": { + "External id": 3291102,"Sequence number": 32987171, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1501 + } + }, + { + "ph": "f", "id": 161, "pid": 1336756, "tid": 1381189, "ts": 1587595914770.117, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595914773.850, "dur": 2.080, + "args": { + "External id": 3291103,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595914774.739, "dur": 1.096, + "args": { + "External id": 3291104,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914781.331, "dur": 8.316, + "args": { + "External id": 3291105,"Record function id": 0, "Sequence number": 32987170, "Fwd thread id": 1, "Ev Idx": 1504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914782.166, "dur": 5.789, + "args": { + "External id": 3291106,"Sequence number": 32987170, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1505 + } + }, + { + "ph": "f", "id": 162, "pid": 1336756, "tid": 1381189, "ts": 1587595914782.166, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595914785.137, "dur": 2.607, + "args": { + "External id": 3291107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595914785.708, "dur": 1.552, + "args": { + "External id": 3291108,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595914786.527, "dur": 0.568, + "args": { + "External id": 3291109,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595914795.673, "dur": 8.503, + "args": { + "External id": 3291110,"Record function id": 0, "Ev Idx": 1509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595914797.006, "dur": 6.308, + "args": { + "External id": 3291111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595914799.313, "dur": 3.604, + "args": { + "External id": 3291112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595914800.619, "dur": 2.202, + "args": { + "External id": 3291113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914807.450, "dur": 8.721, + "args": { + "External id": 3291114,"Record function id": 0, "Sequence number": 32987169, "Fwd thread id": 1, "Ev Idx": 1513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914810.249, "dur": 4.321, + "args": { + "External id": 3291115,"Sequence number": 32987169, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1514 + } + }, + { + "ph": "f", "id": 163, "pid": 1336756, "tid": 1381189, "ts": 1587595914810.249, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595914811.018, "dur": 3.413, + "args": { + "External id": 3291116,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595914813.546, "dur": 0.738, + "args": { + "External id": 3291117,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914819.044, "dur": 118.734, + "args": { + "External id": 3291118,"Record function id": 0, "Sequence number": 32987168, "Fwd thread id": 1, "Ev Idx": 1517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914819.697, "dur": 112.060, + "args": { + "External id": 3291119,"Sequence number": 32987168, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1518 + } + }, + { + "ph": "f", "id": 164, "pid": 1336756, "tid": 1381189, "ts": 1587595914819.697, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595914821.767, "dur": 2.497, + "args": { + "External id": 3291120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595914822.481, "dur": 1.368, + "args": { + "External id": 3291121,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595914823.231, "dur": 0.513, + "args": { + "External id": 3291122,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595914824.819, "dur": 27.962, + "args": { + "External id": 3291123,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595914856.166, "dur": 2.998, + "args": { + "External id": 3291124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595914856.614, "dur": 2.022, + "args": { + "External id": 3291125,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595914857.354, "dur": 1.159, + "args": { + "External id": 3291126,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595914860.405, "dur": 6.976, + "args": { + "External id": 3291127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595914861.373, "dur": 5.432, + "args": { + "External id": 3291128,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595914864.348, "dur": 2.371, + "args": { + "External id": 3291129,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595914890.466, "dur": 40.246, + "args": { + "External id": 3291130,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914946.258, "dur": 36.104, + "args": { + "External id": 3291131,"Record function id": 0, "Sequence number": 32987167, "Fwd thread id": 1, "Ev Idx": 1530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595914947.409, "dur": 8.395, + "args": { + "External id": 3291132,"Sequence number": 32987167, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1531 + } + }, + { + "ph": "f", "id": 165, "pid": 1336756, "tid": 1381189, "ts": 1587595914947.409, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595914948.836, "dur": 6.803, + "args": { + "External id": 3291133,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595914954.020, "dur": 1.519, + "args": { + "External id": 3291134,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1381189, + "ts": 1587595914958.657, "dur": 21.317, + "args": { + "External id": 3291135,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595915024.430, "dur": 38.628, + "args": { + "External id": 3291136,"Record function id": 0, "Sequence number": 32987166, "Fwd thread id": 1, "Ev Idx": 1535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595915050.866, "dur": 9.782, + "args": { + "External id": 3291137,"Sequence number": 32987166, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1536 + } + }, + { + "ph": "f", "id": 166, "pid": 1336756, "tid": 1381189, "ts": 1587595915050.866, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595915054.155, "dur": 6.253, + "args": { + "External id": 3291138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595915055.253, "dur": 4.450, + "args": { + "External id": 3291139,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915058.765, "dur": 0.717, + "args": { + "External id": 3291140,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595915067.852, "dur": 6.301, + "args": { + "External id": 3291141,"Record function id": 0, "Ev Idx": 1540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595915068.855, "dur": 4.678, + "args": { + "External id": 3291142,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595915070.539, "dur": 2.603, + "args": { + "External id": 3291143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595915071.187, "dur": 1.814, + "args": { + "External id": 3291144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595915077.114, "dur": 7.473, + "args": { + "External id": 3291145,"Record function id": 0, "Sequence number": 32987165, "Fwd thread id": 1, "Ev Idx": 1544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595915078.171, "dur": 4.598, + "args": { + "External id": 3291146,"Sequence number": 32987165, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1545 + } + }, + { + "ph": "f", "id": 167, "pid": 1336756, "tid": 1381189, "ts": 1587595915078.171, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595915078.898, "dur": 3.725, + "args": { + "External id": 3291147,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595915081.335, "dur": 1.170, + "args": { + "External id": 3291148,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595915087.698, "dur": 100.564, + "args": { + "External id": 3291149,"Record function id": 0, "Sequence number": 32987164, "Fwd thread id": 1, "Ev Idx": 1548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595915088.527, "dur": 91.968, + "args": { + "External id": 3291150,"Sequence number": 32987164, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1549 + } + }, + { + "ph": "f", "id": 168, "pid": 1336756, "tid": 1381189, "ts": 1587595915088.527, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595915090.738, "dur": 4.213, + "args": { + "External id": 3291151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595915091.129, "dur": 3.404, + "args": { + "External id": 3291152,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[65536, 2048], [], []], "Ev Idx": 1551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915093.886, "dur": 0.541, + "args": { + "External id": 3291153,"Record function id": 0, "Concrete Inputs": ["", "[2048, 65536]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 1552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595915095.916, "dur": 36.758, + "args": { + "External id": 3291154,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048]], "Ev Idx": 1553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595915135.254, "dur": 2.555, + "args": { + "External id": 3291155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595915135.761, "dur": 1.496, + "args": { + "External id": 3291156,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915136.617, "dur": 0.531, + "args": { + "External id": 3291157,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595915138.762, "dur": 2.371, + "args": { + "External id": 3291158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595915139.767, "dur": 0.955, + "args": { + "External id": 3291159,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915140.360, "dur": 0.280, + "args": { + "External id": 3291160,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595915144.204, "dur": 35.774, + "args": { + "External id": 3291161,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 1560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595915192.632, "dur": 30.320, + "args": { + "External id": 3291162,"Record function id": 0, "Sequence number": 32987163, "Fwd thread id": 1, "Ev Idx": 1561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595915193.503, "dur": 5.246, + "args": { + "External id": 3291163,"Sequence number": 32987163, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 1562 + } + }, + { + "ph": "f", "id": 169, "pid": 1336756, "tid": 1381189, "ts": 1587595915193.503, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595915196.926, "dur": 1.662, + "args": { + "External id": 3291164,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595915197.493, "dur": 0.997, + "args": { + "External id": 3291165,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595915201.746, "dur": 19.367, + "args": { + "External id": 3291166,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595915226.248, "dur": 8.619, + "args": { + "External id": 3291167,"Record function id": 0, "Sequence number": 32987162, "Fwd thread id": 1, "Ev Idx": 1566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 1336756, "tid": 1381189, + "ts": 1587595915227.005, "dur": 5.611, + "args": { + "External id": 3291168,"Sequence number": 32987162, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1567 + } + }, + { + "ph": "f", "id": 170, "pid": 1336756, "tid": 1381189, "ts": 1587595915227.005, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1381189, + "ts": 1587595915227.591, "dur": 4.804, + "args": { + "External id": 3291169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 2048]], "Input Dims": [[2048, 2048]], "Ev Idx": 1568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1381189, + "ts": 1587595915228.479, "dur": 3.459, + "args": { + "External id": 3291170,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 2048], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 1569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915231.454, "dur": 0.398, + "args": { + "External id": 3291171,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[2048, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 2048], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 1570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595915238.468, "dur": 5.157, + "args": { + "External id": 3291172,"Record function id": 0, "Ev Idx": 1571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595915239.751, "dur": 3.334, + "args": { + "External id": 3291173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595915240.911, "dur": 1.822, + "args": { + "External id": 3291174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595915241.324, "dur": 1.293, + "args": { + "External id": 3291175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595915247.483, "dur": 340.554, + "args": { + "External id": 3291176,"Record function id": 0, "Sequence number": 32987161, "Fwd thread id": 1, "Ev Idx": 1575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595915250.903, "dur": 306.044, + "args": { + "External id": 3291177,"Sequence number": 32987161, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1576 + } + }, + { + "ph": "f", "id": 171, "pid": 1336756, "tid": 1381189, "ts": 1587595915250.903, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595915282.633, "dur": 1.686, + "args": { + "External id": 3291178,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595915283.225, "dur": 0.876, + "args": { + "External id": 3291179,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 1578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595915299.647, "dur": 5.758, + "args": { + "External id": 3291180,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595915314.271, "dur": 2.102, + "args": { + "External id": 3291181,"Record function id": 0, "Concrete Inputs": ["[132, 2048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595915457.404, "dur": 1.707, + "args": { + "External id": 3291182,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 2048]"], "Input type": ["float", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[132, 2048], []], "Ev Idx": 1581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1381189, + "ts": 1587595915463.066, "dur": 36.207, + "args": { + "External id": 3291183,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[270336, 2048, 1], [], [], []], "Input Dims": [[1, 132, 2048], [], [], []], "Ev Idx": 1582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915474.632, "dur": 1.021, + "args": { + "External id": 3291184,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 2048]", "[2048, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[1, 2048], [], [], []], "Ev Idx": 1583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587595915504.275, "dur": 30.735, + "args": { + "External id": 3291185,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], []], "Ev Idx": 1584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587595915506.066, "dur": 28.741, + "args": { + "External id": 3291186,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[1, 2048], [], [], [], [], [], []], "Ev Idx": 1585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915509.781, "dur": 3.970, + "args": { + "External id": 3291187,"Record function id": 0, "Concrete Inputs": ["[1, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595915517.208, "dur": 16.958, + "args": { + "External id": 3291188,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[1, 2048], [1, 2048], []], "Ev Idx": 1587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1381189, + "ts": 1587595915539.153, "dur": 4.580, + "args": { + "External id": 3291189,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1]], "Input Dims": [[1, 2048], [2048]], "Ev Idx": 1588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595915542.768, "dur": 0.825, + "args": { + "External id": 3291190,"Record function id": 0, "Concrete Inputs": ["", "[2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[1, 2048], []], "Ev Idx": 1589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1381189, + "ts": 1587595915548.889, "dur": 1.743, + "args": { + "External id": 3291191,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595915549.508, "dur": 1.028, + "args": { + "External id": 3291192,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 1591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1381189, + "ts": 1587595915569.128, "dur": 14.897, + "args": { + "External id": 3291193,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 1592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595915596.290, "dur": 8.373, + "args": { + "External id": 3291194,"Record function id": 0, "Ev Idx": 1593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595915597.791, "dur": 6.229, + "args": { + "External id": 3291195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595915598.976, "dur": 4.155, + "args": { + "External id": 3291196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595915601.737, "dur": 1.259, + "args": { + "External id": 3291197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595915608.237, "dur": 2818.472, + "args": { + "External id": 3291198,"Record function id": 0, "Ev Idx": 1597 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.27)", "pid": 1336756, "tid": 1381189, + "ts": 1587595915637.947, "dur": 950.536, + "args": { + "External id": 3291199,"Record function id": 0, "Ev Idx": 1598 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.26", "pid": 1336756, "tid": 1381189, + "ts": 1587595915663.921, "dur": 915.985, + "args": { + "External id": 3291200,"Record function id": 0, "Ev Idx": 1599 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.26)", "pid": 1336756, "tid": 1381189, + "ts": 1587595915676.744, "dur": 887.719, + "args": { + "External id": 3291201,"Record function id": 0, "Ev Idx": 1600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595915747.469, "dur": 6.064, + "args": { + "External id": 3291202,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587595915767.832, "dur": 36.939, + "args": { + "External id": 3291203,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915773.765, "dur": 3.201, + "args": { + "External id": 3291204,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915778.640, "dur": 0.652, + "args": { + "External id": 3291205,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915783.299, "dur": 0.438, + "args": { + "External id": 3291206,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915784.587, "dur": 0.301, + "args": { + "External id": 3291207,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915785.514, "dur": 0.543, + "args": { + "External id": 3291208,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915791.879, "dur": 0.293, + "args": { + "External id": 3291209,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915792.870, "dur": 0.326, + "args": { + "External id": 3291210,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915794.162, "dur": 1.556, + "args": { + "External id": 3291211,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915798.205, "dur": 1.825, + "args": { + "External id": 3291212,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595915818.167, "dur": 30.750, + "args": { + "External id": 3291213,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587595915903.543, "dur": 161.462, + "args": { + "External id": 3291214,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595915917.213, "dur": 7.307, + "args": { + "External id": 3291215,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587595915930.326, "dur": 9.090, + "args": { + "External id": 3291216,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587595915934.020, "dur": 4.978, + "args": { + "External id": 3291217,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915937.035, "dur": 0.510, + "args": { + "External id": 3291218,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587595915946.398, "dur": 33.808, + "args": { + "External id": 3291219,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915949.997, "dur": 0.586, + "args": { + "External id": 3291220,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915951.683, "dur": 2.376, + "args": { + "External id": 3291221,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915954.902, "dur": 0.684, + "args": { + "External id": 3291222,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915958.620, "dur": 0.324, + "args": { + "External id": 3291223,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915962.043, "dur": 0.380, + "args": { + "External id": 3291224,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915963.102, "dur": 2.553, + "args": { + "External id": 3291225,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915969.322, "dur": 0.548, + "args": { + "External id": 3291226,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915972.773, "dur": 0.342, + "args": { + "External id": 3291227,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595915975.225, "dur": 0.615, + "args": { + "External id": 3291228,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595916029.020, "dur": 27.400, + "args": { + "External id": 3291229,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587595916125.272, "dur": 344.312, + "args": { + "External id": 3291230,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587595916160.747, "dur": 303.942, + "args": { + "External id": 3291231,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1630, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587595916170.312, "dur": 288.927, + "args": { + "External id": 3291232,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587595916493.546, "dur": 2.323, + "args": { + "External id": 3291233,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1632, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595916596.110, "dur": 1804.986, + "args": { + "External id": 3291234,"Sequence number": 32987160, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1633 + } + }, + { + "ph": "f", "id": 172, "pid": 1336756, "tid": 1381189, "ts": 1587595916596.110, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595916719.365, "dur": 106.656, + "args": { + "External id": 3291235,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587595916892.175, "dur": 42.546, + "args": { + "External id": 3291236,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587595916956.059, "dur": 97.836, + "args": { + "External id": 3291237,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595917068.749, "dur": 35.568, + "args": { + "External id": 3291238,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595917111.088, "dur": 48.104, + "args": { + "External id": 3291239,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595917167.849, "dur": 28.412, + "args": { + "External id": 3291240,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595917203.986, "dur": 42.719, + "args": { + "External id": 3291241,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587595917277.689, "dur": 28.501, + "args": { + "External id": 3291242,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587595917326.129, "dur": 33.923, + "args": { + "External id": 3291243,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587595917386.187, "dur": 23.358, + "args": { + "External id": 3291244,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587595917428.122, "dur": 19.843, + "args": { + "External id": 3291245,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595917456.732, "dur": 29.731, + "args": { + "External id": 3291246,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595917489.586, "dur": 32.580, + "args": { + "External id": 3291247,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587595917551.530, "dur": 174.393, + "args": { + "External id": 3291248,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595917632.725, "dur": 7.397, + "args": { + "External id": 3291249,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595917642.020, "dur": 2.394, + "args": { + "External id": 3291250,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587595917760.417, "dur": 25.874, + "args": { + "External id": 3291251,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587595917803.433, "dur": 16.357, + "args": { + "External id": 3291252,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595917827.799, "dur": 32.856, + "args": { + "External id": 3291253,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595917866.144, "dur": 57.941, + "args": { + "External id": 3291254,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595917934.899, "dur": 23.309, + "args": { + "External id": 3291255,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595917964.921, "dur": 65.000, + "args": { + "External id": 3291256,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595918039.993, "dur": 24.922, + "args": { + "External id": 3291257,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587595918071.940, "dur": 30.688, + "args": { + "External id": 3291258,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587595918126.238, "dur": 28.958, + "args": { + "External id": 3291259,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587595918175.007, "dur": 28.028, + "args": { + "External id": 3291260,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587595918227.677, "dur": 24.957, + "args": { + "External id": 3291261,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587595918271.374, "dur": 36.920, + "args": { + "External id": 3291262,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587595918343.301, "dur": 25.723, + "args": { + "External id": 3291263,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918449.146, "dur": 14.648, + "args": { + "External id": 3291264,"Record function id": 0, "Ev Idx": 1663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918452.571, "dur": 10.289, + "args": { + "External id": 3291265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918456.572, "dur": 5.338, + "args": { + "External id": 3291266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918458.020, "dur": 3.805, + "args": { + "External id": 3291267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918467.558, "dur": 7.983, + "args": { + "External id": 3291268,"Record function id": 0, "Ev Idx": 1667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918471.561, "dur": 3.498, + "args": { + "External id": 3291269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918472.453, "dur": 1.800, + "args": { + "External id": 3291270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918473.377, "dur": 0.804, + "args": { + "External id": 3291271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918478.791, "dur": 4.716, + "args": { + "External id": 3291272,"Record function id": 0, "Ev Idx": 1671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918480.284, "dur": 2.766, + "args": { + "External id": 3291273,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918480.876, "dur": 1.723, + "args": { + "External id": 3291274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918481.409, "dur": 0.924, + "args": { + "External id": 3291275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918486.726, "dur": 5.870, + "args": { + "External id": 3291276,"Record function id": 0, "Ev Idx": 1675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918487.663, "dur": 4.543, + "args": { + "External id": 3291277,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918488.674, "dur": 3.090, + "args": { + "External id": 3291278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918488.965, "dur": 2.710, + "args": { + "External id": 3291279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918495.741, "dur": 4.300, + "args": { + "External id": 3291280,"Record function id": 0, "Ev Idx": 1679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918497.102, "dur": 2.501, + "args": { + "External id": 3291281,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918497.731, "dur": 1.420, + "args": { + "External id": 3291282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918498.202, "dur": 0.857, + "args": { + "External id": 3291283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918503.118, "dur": 3.570, + "args": { + "External id": 3291284,"Record function id": 0, "Ev Idx": 1683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918504.209, "dur": 2.071, + "args": { + "External id": 3291285,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918504.685, "dur": 1.004, + "args": { + "External id": 3291286,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918505.107, "dur": 0.517, + "args": { + "External id": 3291287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918509.830, "dur": 3.721, + "args": { + "External id": 3291288,"Record function id": 0, "Ev Idx": 1687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918511.114, "dur": 2.035, + "args": { + "External id": 3291289,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918511.589, "dur": 1.075, + "args": { + "External id": 3291290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918512.032, "dur": 0.555, + "args": { + "External id": 3291291,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918516.662, "dur": 5.953, + "args": { + "External id": 3291292,"Record function id": 0, "Ev Idx": 1691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918517.633, "dur": 4.547, + "args": { + "External id": 3291293,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918518.362, "dur": 3.280, + "args": { + "External id": 3291294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918520.785, "dur": 0.756, + "args": { + "External id": 3291295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918525.699, "dur": 3.811, + "args": { + "External id": 3291296,"Record function id": 0, "Ev Idx": 1695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587595918526.809, "dur": 2.316, + "args": { + "External id": 3291297,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918527.264, "dur": 1.256, + "args": { + "External id": 3291298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587595918527.815, "dur": 0.640, + "args": { + "External id": 3291299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595918534.612, "dur": 164954.070, + "args": { + "External id": 3291300,"Record function id": 0, "Sequence number": 32987159, "Fwd thread id": 1, "Ev Idx": 1699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587595918536.325, "dur": 164944.459, + "args": { + "External id": 3291301,"Sequence number": 32987159, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1700 + } + }, + { + "ph": "f", "id": 173, "pid": 1336756, "tid": 1381189, "ts": 1587595918536.325, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.27)", "pid": 1336756, "tid": 1381189, + "ts": 1587595918567.506, "dur": 40.499, + "args": { + "External id": 3291302,"Record function id": 0, "Ev Idx": 1701 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.27)", "pid": 1336756, "tid": 1381189, + "ts": 1587595918616.275, "dur": 74.910, + "args": { + "External id": 3291303,"Record function id": 0, "Ev Idx": 1702 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.27)", "pid": 1336756, "tid": 1381189, + "ts": 1587595918698.156, "dur": 164774.497, + "args": { + "External id": 3291304,"Record function id": 0, "Ev Idx": 1703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595918755.899, "dur": 7.630, + "args": { + "External id": 3291305,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587595918776.082, "dur": 4.616, + "args": { + "External id": 3291306,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587595918798.535, "dur": 163843.583, + "args": { + "External id": 3291307,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587595918813.759, "dur": 163816.015, + "args": { + "External id": 3291308,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587595918902.526, "dur": 6.923, + "args": { + "External id": 3291309,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587595918922.757, "dur": 163655.551, + "args": { + "External id": 3291310,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587595918925.420, "dur": 163651.315, + "args": { + "External id": 3291311,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587595918928.762, "dur": 7.645, + "args": { + "External id": 3291312,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587595918938.122, "dur": 163632.463, + "args": { + "External id": 3291313,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596082752.256, "dur": 13.085, + "args": { + "External id": 3291314,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596082755.907, "dur": 9.028, + "args": { + "External id": 3291315,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596082806.127, "dur": 344.686, + "args": { + "External id": 3291316,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596082843.574, "dur": 302.208, + "args": { + "External id": 3291317,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1716, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596082857.031, "dur": 282.285, + "args": { + "External id": 3291318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596083177.673, "dur": 2.227, + "args": { + "External id": 3291319,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1718, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596083243.225, "dur": 7.476, + "args": { + "External id": 3291320,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596083305.677, "dur": 1.229, + "args": { + "External id": 3291321,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596083324.294, "dur": 1.660, + "args": { + "External id": 3291322,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596083341.056, "dur": 0.881, + "args": { + "External id": 3291323,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596083352.922, "dur": 0.698, + "args": { + "External id": 3291324,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596083363.780, "dur": 0.876, + "args": { + "External id": 3291325,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596083376.942, "dur": 0.903, + "args": { + "External id": 3291326,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596083391.882, "dur": 2.352, + "args": { + "External id": 3291327,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596083405.593, "dur": 0.708, + "args": { + "External id": 3291328,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596083502.915, "dur": 2799.585, + "args": { + "External id": 3291329,"Record function id": 0, "Ev Idx": 1728 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.26)", "pid": 1336756, "tid": 1381189, + "ts": 1587596083522.615, "dur": 1046.928, + "args": { + "External id": 3291330,"Record function id": 0, "Ev Idx": 1729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.26)", "pid": 1336756, "tid": 1381189, + "ts": 1587596083539.129, "dur": 314.695, + "args": { + "External id": 3291331,"Record function id": 0, "Ev Idx": 1730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596083616.648, "dur": 3.724, + "args": { + "External id": 3291332,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596083623.473, "dur": 0.836, + "args": { + "External id": 3291333,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596083626.000, "dur": 0.793, + "args": { + "External id": 3291334,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596083628.639, "dur": 3.019, + "args": { + "External id": 3291335,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596083634.792, "dur": 0.740, + "args": { + "External id": 3291336,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596083637.277, "dur": 0.730, + "args": { + "External id": 3291337,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596083639.536, "dur": 1.808, + "args": { + "External id": 3291338,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596083642.840, "dur": 0.891, + "args": { + "External id": 3291339,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596083648.783, "dur": 1.214, + "args": { + "External id": 3291340,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596083651.252, "dur": 0.857, + "args": { + "External id": 3291341,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596083670.322, "dur": 154.494, + "args": { + "External id": 3291342,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596083688.001, "dur": 132.462, + "args": { + "External id": 3291343,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596083710.604, "dur": 12.492, + "args": { + "External id": 3291344,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596083726.159, "dur": 66.457, + "args": { + "External id": 3291345,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596083728.533, "dur": 63.752, + "args": { + "External id": 3291346,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596083731.757, "dur": 8.167, + "args": { + "External id": 3291347,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596083743.546, "dur": 48.183, + "args": { + "External id": 3291348,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1747 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 1336756, "tid": 1381189, + "ts": 1587596083949.189, "dur": 611.907, + "args": { + "External id": 3291349,"Record function id": 0, "Ev Idx": 1748 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 1336756, "tid": 1381189, + "ts": 1587596083967.284, "dur": 580.796, + "args": { + "External id": 3291350,"Record function id": 0, "Ev Idx": 1749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596084059.105, "dur": 6.152, + "args": { + "External id": 3291351,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596084080.322, "dur": 38.496, + "args": { + "External id": 3291352,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084087.259, "dur": 2.727, + "args": { + "External id": 3291353,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084091.601, "dur": 0.673, + "args": { + "External id": 3291354,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084093.355, "dur": 0.546, + "args": { + "External id": 3291355,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084098.534, "dur": 0.396, + "args": { + "External id": 3291356,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084099.514, "dur": 0.482, + "args": { + "External id": 3291357,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084100.654, "dur": 0.555, + "args": { + "External id": 3291358,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084106.036, "dur": 2.011, + "args": { + "External id": 3291359,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084108.701, "dur": 0.251, + "args": { + "External id": 3291360,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084111.283, "dur": 2.007, + "args": { + "External id": 3291361,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596084141.979, "dur": 36.029, + "args": { + "External id": 3291362,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596084208.591, "dur": 109.263, + "args": { + "External id": 3291363,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596084218.829, "dur": 5.082, + "args": { + "External id": 3291364,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596084229.329, "dur": 11.379, + "args": { + "External id": 3291365,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596084233.238, "dur": 7.028, + "args": { + "External id": 3291366,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084238.477, "dur": 0.622, + "args": { + "External id": 3291367,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596084246.961, "dur": 30.094, + "args": { + "External id": 3291368,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084248.415, "dur": 0.586, + "args": { + "External id": 3291369,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084252.004, "dur": 1.939, + "args": { + "External id": 3291370,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084254.477, "dur": 0.560, + "args": { + "External id": 3291371,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084255.901, "dur": 2.455, + "args": { + "External id": 3291372,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084263.639, "dur": 0.253, + "args": { + "External id": 3291373,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084264.678, "dur": 0.523, + "args": { + "External id": 3291374,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084267.545, "dur": 0.619, + "args": { + "External id": 3291375,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084271.018, "dur": 0.380, + "args": { + "External id": 3291376,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596084271.892, "dur": 0.557, + "args": { + "External id": 3291377,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596084289.616, "dur": 20.225, + "args": { + "External id": 3291378,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596084360.080, "dur": 119.050, + "args": { + "External id": 3291379,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596084390.122, "dur": 85.861, + "args": { + "External id": 3291380,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1779, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596084401.691, "dur": 70.058, + "args": { + "External id": 3291381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596084497.118, "dur": 1.824, + "args": { + "External id": 3291382,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1781, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596084576.400, "dur": 1702.629, + "args": { + "External id": 3291383,"Sequence number": 32987158, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1782 + } + }, + { + "ph": "f", "id": 174, "pid": 1336756, "tid": 1381189, "ts": 1587596084576.400, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596084682.038, "dur": 100.632, + "args": { + "External id": 3291384,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596084825.189, "dur": 39.213, + "args": { + "External id": 3291385,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596084901.848, "dur": 58.126, + "args": { + "External id": 3291386,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596084970.326, "dur": 71.846, + "args": { + "External id": 3291387,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596085054.425, "dur": 48.508, + "args": { + "External id": 3291388,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596085110.186, "dur": 27.444, + "args": { + "External id": 3291389,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596085144.550, "dur": 40.642, + "args": { + "External id": 3291390,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596085213.839, "dur": 26.711, + "args": { + "External id": 3291391,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596085263.453, "dur": 33.316, + "args": { + "External id": 3291392,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596085318.473, "dur": 20.614, + "args": { + "External id": 3291393,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596085357.682, "dur": 15.959, + "args": { + "External id": 3291394,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596085381.505, "dur": 30.046, + "args": { + "External id": 3291395,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596085414.943, "dur": 32.692, + "args": { + "External id": 3291396,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596085477.679, "dur": 163.943, + "args": { + "External id": 3291397,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596085553.412, "dur": 5.350, + "args": { + "External id": 3291398,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596085560.354, "dur": 2.278, + "args": { + "External id": 3291399,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596085679.153, "dur": 26.224, + "args": { + "External id": 3291400,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596085718.891, "dur": 16.293, + "args": { + "External id": 3291401,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596085742.708, "dur": 35.888, + "args": { + "External id": 3291402,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596085783.978, "dur": 34.674, + "args": { + "External id": 3291403,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596085829.126, "dur": 21.664, + "args": { + "External id": 3291404,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596085855.099, "dur": 47.767, + "args": { + "External id": 3291405,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596085911.438, "dur": 23.820, + "args": { + "External id": 3291406,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596085941.676, "dur": 89.861, + "args": { + "External id": 3291407,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596086065.366, "dur": 28.084, + "args": { + "External id": 3291408,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596086117.177, "dur": 25.447, + "args": { + "External id": 3291409,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596086160.240, "dur": 22.531, + "args": { + "External id": 3291410,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596086203.347, "dur": 14.245, + "args": { + "External id": 3291411,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596086232.086, "dur": 17.263, + "args": { + "External id": 3291412,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086324.391, "dur": 14.143, + "args": { + "External id": 3291413,"Record function id": 0, "Ev Idx": 1812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086327.417, "dur": 10.213, + "args": { + "External id": 3291414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086331.285, "dur": 5.641, + "args": { + "External id": 3291415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086333.066, "dur": 3.749, + "args": { + "External id": 3291416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086342.290, "dur": 4.947, + "args": { + "External id": 3291417,"Record function id": 0, "Ev Idx": 1816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086343.637, "dur": 3.176, + "args": { + "External id": 3291418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086344.553, "dur": 1.747, + "args": { + "External id": 3291419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086345.104, "dur": 1.099, + "args": { + "External id": 3291420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086350.397, "dur": 9.118, + "args": { + "External id": 3291421,"Record function id": 0, "Ev Idx": 1820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086354.480, "dur": 4.609, + "args": { + "External id": 3291422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086355.273, "dur": 3.392, + "args": { + "External id": 3291423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086355.766, "dur": 2.796, + "args": { + "External id": 3291424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086362.739, "dur": 4.421, + "args": { + "External id": 3291425,"Record function id": 0, "Ev Idx": 1824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086363.980, "dur": 2.780, + "args": { + "External id": 3291426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086364.913, "dur": 1.427, + "args": { + "External id": 3291427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086365.421, "dur": 0.842, + "args": { + "External id": 3291428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086370.506, "dur": 3.985, + "args": { + "External id": 3291429,"Record function id": 0, "Ev Idx": 1828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086371.900, "dur": 2.177, + "args": { + "External id": 3291430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086372.397, "dur": 1.195, + "args": { + "External id": 3291431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086372.678, "dur": 0.839, + "args": { + "External id": 3291432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086377.555, "dur": 5.316, + "args": { + "External id": 3291433,"Record function id": 0, "Ev Idx": 1832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086380.307, "dur": 2.111, + "args": { + "External id": 3291434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086380.777, "dur": 1.177, + "args": { + "External id": 3291435,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086381.164, "dur": 0.715, + "args": { + "External id": 3291436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086386.056, "dur": 3.352, + "args": { + "External id": 3291437,"Record function id": 0, "Ev Idx": 1836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086387.019, "dur": 1.907, + "args": { + "External id": 3291438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086387.459, "dur": 1.029, + "args": { + "External id": 3291439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086387.744, "dur": 0.672, + "args": { + "External id": 3291440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086392.413, "dur": 5.649, + "args": { + "External id": 3291441,"Record function id": 0, "Ev Idx": 1840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086393.425, "dur": 4.192, + "args": { + "External id": 3291442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086394.060, "dur": 3.142, + "args": { + "External id": 3291443,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086396.582, "dur": 0.542, + "args": { + "External id": 3291444,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086401.074, "dur": 3.088, + "args": { + "External id": 3291445,"Record function id": 0, "Ev Idx": 1844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596086401.995, "dur": 1.753, + "args": { + "External id": 3291446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086402.424, "dur": 0.891, + "args": { + "External id": 3291447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596086402.726, "dur": 0.513, + "args": { + "External id": 3291448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596086407.757, "dur": 47412.530, + "args": { + "External id": 3291449,"Record function id": 0, "Sequence number": 32987157, "Fwd thread id": 1, "Ev Idx": 1848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596086409.368, "dur": 47402.562, + "args": { + "External id": 3291450,"Sequence number": 32987157, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1849 + } + }, + { + "ph": "f", "id": 175, "pid": 1336756, "tid": 1381189, "ts": 1587596086409.368, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.26)", "pid": 1336756, "tid": 1381189, + "ts": 1587596086437.484, "dur": 38.114, + "args": { + "External id": 3291451,"Record function id": 0, "Ev Idx": 1850 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.26)", "pid": 1336756, "tid": 1381189, + "ts": 1587596086483.540, "dur": 58.588, + "args": { + "External id": 3291452,"Record function id": 0, "Ev Idx": 1851 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.26)", "pid": 1336756, "tid": 1381189, + "ts": 1587596086549.363, "dur": 47254.099, + "args": { + "External id": 3291453,"Record function id": 0, "Ev Idx": 1852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596086638.963, "dur": 5.964, + "args": { + "External id": 3291454,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596086653.824, "dur": 5.951, + "args": { + "External id": 3291455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596086676.316, "dur": 46267.259, + "args": { + "External id": 3291456,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596086689.244, "dur": 46243.316, + "args": { + "External id": 3291457,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596086764.969, "dur": 14.153, + "args": { + "External id": 3291458,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596086789.099, "dur": 46104.025, + "args": { + "External id": 3291459,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596086791.527, "dur": 46100.825, + "args": { + "External id": 3291460,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596086796.247, "dur": 4.656, + "args": { + "External id": 3291461,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596086802.537, "dur": 46085.015, + "args": { + "External id": 3291462,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596133069.704, "dur": 12.348, + "args": { + "External id": 3291463,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596133073.364, "dur": 7.888, + "args": { + "External id": 3291464,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596133112.605, "dur": 400.892, + "args": { + "External id": 3291465,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596133146.728, "dur": 361.744, + "args": { + "External id": 3291466,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1865, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596133161.706, "dur": 341.430, + "args": { + "External id": 3291467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596133535.608, "dur": 2.601, + "args": { + "External id": 3291468,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1867, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596133595.696, "dur": 6.499, + "args": { + "External id": 3291469,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596133647.346, "dur": 1.361, + "args": { + "External id": 3291470,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596133664.353, "dur": 3.398, + "args": { + "External id": 3291471,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596133681.531, "dur": 0.880, + "args": { + "External id": 3291472,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596133692.475, "dur": 0.636, + "args": { + "External id": 3291473,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596133704.783, "dur": 0.788, + "args": { + "External id": 3291474,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596133715.844, "dur": 2.774, + "args": { + "External id": 3291475,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596133729.388, "dur": 1.976, + "args": { + "External id": 3291476,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596133744.118, "dur": 0.992, + "args": { + "External id": 3291477,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596133834.341, "dur": 2768.949, + "args": { + "External id": 3291478,"Record function id": 0, "Ev Idx": 1877 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 1336756, "tid": 1381189, + "ts": 1587596133853.574, "dur": 1056.135, + "args": { + "External id": 3291479,"Record function id": 0, "Ev Idx": 1878 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 1336756, "tid": 1381189, + "ts": 1587596133867.656, "dur": 387.324, + "args": { + "External id": 3291480,"Record function id": 0, "Ev Idx": 1879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596133973.628, "dur": 4.670, + "args": { + "External id": 3291481,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596134018.031, "dur": 1.616, + "args": { + "External id": 3291482,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596134022.804, "dur": 2.683, + "args": { + "External id": 3291483,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596134027.274, "dur": 0.907, + "args": { + "External id": 3291484,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596134031.732, "dur": 0.904, + "args": { + "External id": 3291485,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596134033.762, "dur": 1.301, + "args": { + "External id": 3291486,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596134038.541, "dur": 2.106, + "args": { + "External id": 3291487,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596134041.989, "dur": 0.897, + "args": { + "External id": 3291488,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596134045.960, "dur": 0.782, + "args": { + "External id": 3291489,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596134047.811, "dur": 0.994, + "args": { + "External id": 3291490,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596134067.516, "dur": 157.224, + "args": { + "External id": 3291491,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596134086.423, "dur": 133.975, + "args": { + "External id": 3291492,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596134106.540, "dur": 15.348, + "args": { + "External id": 3291493,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596134124.979, "dur": 67.808, + "args": { + "External id": 3291494,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596134130.181, "dur": 62.343, + "args": { + "External id": 3291495,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134133.780, "dur": 5.612, + "args": { + "External id": 3291496,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596134141.435, "dur": 50.500, + "args": { + "External id": 3291497,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1896 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 1336756, "tid": 1381189, + "ts": 1587596134330.554, "dur": 570.554, + "args": { + "External id": 3291498,"Record function id": 0, "Ev Idx": 1897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 1336756, "tid": 1381189, + "ts": 1587596134348.346, "dur": 514.610, + "args": { + "External id": 3291499,"Record function id": 0, "Ev Idx": 1898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596134400.296, "dur": 4.712, + "args": { + "External id": 3291500,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596134419.059, "dur": 32.805, + "args": { + "External id": 3291501,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134424.829, "dur": 1.293, + "args": { + "External id": 3291502,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134429.112, "dur": 0.310, + "args": { + "External id": 3291503,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134430.220, "dur": 0.505, + "args": { + "External id": 3291504,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134433.060, "dur": 1.780, + "args": { + "External id": 3291505,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134435.345, "dur": 0.339, + "args": { + "External id": 3291506,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134438.137, "dur": 2.767, + "args": { + "External id": 3291507,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134442.833, "dur": 0.280, + "args": { + "External id": 3291508,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134443.827, "dur": 0.317, + "args": { + "External id": 3291509,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134446.671, "dur": 0.575, + "args": { + "External id": 3291510,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596134461.206, "dur": 32.495, + "args": { + "External id": 3291511,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596134521.077, "dur": 112.536, + "args": { + "External id": 3291512,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596134530.503, "dur": 3.248, + "args": { + "External id": 3291513,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596134538.376, "dur": 12.260, + "args": { + "External id": 3291514,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596134542.672, "dur": 7.538, + "args": { + "External id": 3291515,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134546.961, "dur": 1.875, + "args": { + "External id": 3291516,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596134556.580, "dur": 37.518, + "args": { + "External id": 3291517,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134557.865, "dur": 0.822, + "args": { + "External id": 3291518,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134561.907, "dur": 0.431, + "args": { + "External id": 3291519,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134575.012, "dur": 2.120, + "args": { + "External id": 3291520,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134580.983, "dur": 0.427, + "args": { + "External id": 3291521,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134581.938, "dur": 0.324, + "args": { + "External id": 3291522,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134582.731, "dur": 1.383, + "args": { + "External id": 3291523,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134586.125, "dur": 0.286, + "args": { + "External id": 3291524,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134586.858, "dur": 0.299, + "args": { + "External id": 3291525,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596134588.776, "dur": 0.335, + "args": { + "External id": 3291526,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596134604.465, "dur": 21.942, + "args": { + "External id": 3291527,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596134676.664, "dur": 118.998, + "args": { + "External id": 3291528,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596134706.346, "dur": 86.157, + "args": { + "External id": 3291529,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1928, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596134714.745, "dur": 73.756, + "args": { + "External id": 3291530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596134811.669, "dur": 2.179, + "args": { + "External id": 3291531,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1930, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596134917.076, "dur": 1666.678, + "args": { + "External id": 3291532,"Sequence number": 32987156, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1931 + } + }, + { + "ph": "f", "id": 176, "pid": 1336756, "tid": 1381189, "ts": 1587596134917.076, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596135078.290, "dur": 106.244, + "args": { + "External id": 3291533,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596135227.237, "dur": 37.866, + "args": { + "External id": 3291534,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596135280.491, "dur": 50.763, + "args": { + "External id": 3291535,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596135342.736, "dur": 33.007, + "args": { + "External id": 3291536,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596135381.992, "dur": 44.301, + "args": { + "External id": 3291537,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596135432.921, "dur": 27.504, + "args": { + "External id": 3291538,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596135475.980, "dur": 43.081, + "args": { + "External id": 3291539,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596135548.139, "dur": 24.498, + "args": { + "External id": 3291540,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596135590.973, "dur": 30.388, + "args": { + "External id": 3291541,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596135641.160, "dur": 18.912, + "args": { + "External id": 3291542,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596135677.916, "dur": 14.175, + "args": { + "External id": 3291543,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596135700.041, "dur": 29.596, + "args": { + "External id": 3291544,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596135732.855, "dur": 33.784, + "args": { + "External id": 3291545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596135794.970, "dur": 228.999, + "args": { + "External id": 3291546,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596135890.587, "dur": 9.282, + "args": { + "External id": 3291547,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596135902.234, "dur": 2.082, + "args": { + "External id": 3291548,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596136060.986, "dur": 28.130, + "args": { + "External id": 3291549,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596136102.921, "dur": 15.949, + "args": { + "External id": 3291550,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596136127.982, "dur": 43.282, + "args": { + "External id": 3291551,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596136176.687, "dur": 35.464, + "args": { + "External id": 3291552,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596136221.105, "dur": 21.719, + "args": { + "External id": 3291553,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596136246.811, "dur": 31.788, + "args": { + "External id": 3291554,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596136284.071, "dur": 32.566, + "args": { + "External id": 3291555,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596136328.740, "dur": 36.742, + "args": { + "External id": 3291556,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596136388.689, "dur": 25.041, + "args": { + "External id": 3291557,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596136435.796, "dur": 23.884, + "args": { + "External id": 3291558,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596136473.980, "dur": 22.229, + "args": { + "External id": 3291559,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596136512.736, "dur": 14.304, + "args": { + "External id": 3291560,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596136540.313, "dur": 16.928, + "args": { + "External id": 3291561,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136625.131, "dur": 14.220, + "args": { + "External id": 3291562,"Record function id": 0, "Ev Idx": 1961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136628.002, "dur": 10.524, + "args": { + "External id": 3291563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136632.177, "dur": 5.398, + "args": { + "External id": 3291564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136633.793, "dur": 3.694, + "args": { + "External id": 3291565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136643.140, "dur": 8.907, + "args": { + "External id": 3291566,"Record function id": 0, "Ev Idx": 1965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136646.485, "dur": 5.061, + "args": { + "External id": 3291567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136647.180, "dur": 3.896, + "args": { + "External id": 3291568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136647.881, "dur": 3.107, + "args": { + "External id": 3291569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136655.286, "dur": 4.324, + "args": { + "External id": 3291570,"Record function id": 0, "Ev Idx": 1969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136656.549, "dur": 2.646, + "args": { + "External id": 3291571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136657.070, "dur": 1.720, + "args": { + "External id": 3291572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136657.931, "dur": 0.783, + "args": { + "External id": 3291573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136662.620, "dur": 3.726, + "args": { + "External id": 3291574,"Record function id": 0, "Ev Idx": 1973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136663.810, "dur": 2.146, + "args": { + "External id": 3291575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136664.515, "dur": 1.040, + "args": { + "External id": 3291576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136664.845, "dur": 0.644, + "args": { + "External id": 3291577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136669.360, "dur": 3.350, + "args": { + "External id": 3291578,"Record function id": 0, "Ev Idx": 1977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136670.290, "dur": 2.009, + "args": { + "External id": 3291579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136670.734, "dur": 1.054, + "args": { + "External id": 3291580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136671.061, "dur": 0.653, + "args": { + "External id": 3291581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136675.779, "dur": 3.168, + "args": { + "External id": 3291582,"Record function id": 0, "Ev Idx": 1981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136676.680, "dur": 1.861, + "args": { + "External id": 3291583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136677.181, "dur": 0.966, + "args": { + "External id": 3291584,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136677.543, "dur": 0.527, + "args": { + "External id": 3291585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136682.118, "dur": 5.949, + "args": { + "External id": 3291586,"Record function id": 0, "Ev Idx": 1985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136683.167, "dur": 4.484, + "args": { + "External id": 3291587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136684.020, "dur": 3.217, + "args": { + "External id": 3291588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136686.593, "dur": 0.571, + "args": { + "External id": 3291589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136691.284, "dur": 3.766, + "args": { + "External id": 3291590,"Record function id": 0, "Ev Idx": 1989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136692.560, "dur": 2.094, + "args": { + "External id": 3291591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136693.074, "dur": 1.147, + "args": { + "External id": 3291592,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136693.472, "dur": 0.676, + "args": { + "External id": 3291593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136698.021, "dur": 6.020, + "args": { + "External id": 3291594,"Record function id": 0, "Ev Idx": 1993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596136698.994, "dur": 4.620, + "args": { + "External id": 3291595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136699.715, "dur": 3.521, + "args": { + "External id": 3291596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596136700.012, "dur": 3.158, + "args": { + "External id": 3291597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596136707.314, "dur": 38588.432, + "args": { + "External id": 3291598,"Record function id": 0, "Sequence number": 32987155, "Fwd thread id": 1, "Ev Idx": 1997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596136708.677, "dur": 38579.145, + "args": { + "External id": 3291599,"Sequence number": 32987155, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1998 + } + }, + { + "ph": "f", "id": 177, "pid": 1336756, "tid": 1381189, "ts": 1587596136708.677, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 1336756, "tid": 1381189, + "ts": 1587596136737.692, "dur": 36.757, + "args": { + "External id": 3291600,"Record function id": 0, "Ev Idx": 1999 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 1336756, "tid": 1381189, + "ts": 1587596136782.233, "dur": 59.223, + "args": { + "External id": 3291601,"Record function id": 0, "Ev Idx": 2000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 1336756, "tid": 1381189, + "ts": 1587596136850.966, "dur": 38429.349, + "args": { + "External id": 3291602,"Record function id": 0, "Ev Idx": 2001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596136958.320, "dur": 7.062, + "args": { + "External id": 3291603,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596136976.919, "dur": 39.497, + "args": { + "External id": 3291604,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596137032.987, "dur": 37443.541, + "args": { + "External id": 3291605,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596137046.179, "dur": 37421.775, + "args": { + "External id": 3291606,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596137100.992, "dur": 14.036, + "args": { + "External id": 3291607,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596137121.832, "dur": 37308.050, + "args": { + "External id": 3291608,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596137124.178, "dur": 37305.065, + "args": { + "External id": 3291609,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596137127.923, "dur": 6.863, + "args": { + "External id": 3291610,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596137136.524, "dur": 37288.839, + "args": { + "External id": 3291611,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596174560.236, "dur": 8.258, + "args": { + "External id": 3291612,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596174563.059, "dur": 5.126, + "args": { + "External id": 3291613,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596174595.903, "dur": 360.451, + "args": { + "External id": 3291614,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596174628.091, "dur": 323.463, + "args": { + "External id": 3291615,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2014, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596174639.654, "dur": 306.330, + "args": { + "External id": 3291616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596175011.659, "dur": 3.489, + "args": { + "External id": 3291617,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2016, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175077.969, "dur": 6.479, + "args": { + "External id": 3291618,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175128.338, "dur": 3.306, + "args": { + "External id": 3291619,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175146.982, "dur": 1.474, + "args": { + "External id": 3291620,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175160.524, "dur": 0.750, + "args": { + "External id": 3291621,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175171.837, "dur": 1.213, + "args": { + "External id": 3291622,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175183.281, "dur": 2.674, + "args": { + "External id": 3291623,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175199.601, "dur": 1.276, + "args": { + "External id": 3291624,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175211.090, "dur": 2.496, + "args": { + "External id": 3291625,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175222.549, "dur": 1.027, + "args": { + "External id": 3291626,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596175310.740, "dur": 2747.182, + "args": { + "External id": 3291627,"Record function id": 0, "Ev Idx": 2026 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 1336756, "tid": 1381189, + "ts": 1587596175327.731, "dur": 1040.807, + "args": { + "External id": 3291628,"Record function id": 0, "Ev Idx": 2027 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 1336756, "tid": 1381189, + "ts": 1587596175340.711, "dur": 310.927, + "args": { + "External id": 3291629,"Record function id": 0, "Ev Idx": 2028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596175412.498, "dur": 6.282, + "args": { + "External id": 3291630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596175422.066, "dur": 0.636, + "args": { + "External id": 3291631,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596175428.910, "dur": 1.110, + "args": { + "External id": 3291632,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596175431.543, "dur": 0.851, + "args": { + "External id": 3291633,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596175433.969, "dur": 1.405, + "args": { + "External id": 3291634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596175436.859, "dur": 0.866, + "args": { + "External id": 3291635,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596175440.950, "dur": 2.538, + "args": { + "External id": 3291636,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596175445.156, "dur": 0.885, + "args": { + "External id": 3291637,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596175447.535, "dur": 2.622, + "args": { + "External id": 3291638,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596175451.893, "dur": 0.703, + "args": { + "External id": 3291639,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596175472.661, "dur": 148.927, + "args": { + "External id": 3291640,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596175488.536, "dur": 129.114, + "args": { + "External id": 3291641,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596175504.833, "dur": 12.014, + "args": { + "External id": 3291642,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596175520.074, "dur": 67.732, + "args": { + "External id": 3291643,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596175522.434, "dur": 65.062, + "args": { + "External id": 3291644,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175526.843, "dur": 6.719, + "args": { + "External id": 3291645,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596175535.266, "dur": 51.546, + "args": { + "External id": 3291646,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2045 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 1336756, "tid": 1381189, + "ts": 1587596175723.942, "dur": 636.946, + "args": { + "External id": 3291647,"Record function id": 0, "Ev Idx": 2046 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 1336756, "tid": 1381189, + "ts": 1587596175738.782, "dur": 610.001, + "args": { + "External id": 3291648,"Record function id": 0, "Ev Idx": 2047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596175813.046, "dur": 5.025, + "args": { + "External id": 3291649,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596175833.657, "dur": 29.958, + "args": { + "External id": 3291650,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175838.417, "dur": 2.140, + "args": { + "External id": 3291651,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175842.296, "dur": 0.374, + "args": { + "External id": 3291652,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175843.891, "dur": 0.392, + "args": { + "External id": 3291653,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175846.764, "dur": 2.052, + "args": { + "External id": 3291654,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175849.900, "dur": 0.295, + "args": { + "External id": 3291655,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175851.490, "dur": 0.313, + "args": { + "External id": 3291656,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175854.565, "dur": 0.253, + "args": { + "External id": 3291657,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175855.988, "dur": 0.309, + "args": { + "External id": 3291658,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596175857.371, "dur": 1.631, + "args": { + "External id": 3291659,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596175892.828, "dur": 37.380, + "args": { + "External id": 3291660,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596175964.108, "dur": 149.224, + "args": { + "External id": 3291661,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596175975.016, "dur": 4.212, + "args": { + "External id": 3291662,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596176020.425, "dur": 10.976, + "args": { + "External id": 3291663,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596176024.251, "dur": 6.767, + "args": { + "External id": 3291664,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596176028.373, "dur": 0.817, + "args": { + "External id": 3291665,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596176039.867, "dur": 27.487, + "args": { + "External id": 3291666,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596176042.254, "dur": 2.322, + "args": { + "External id": 3291667,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596176046.164, "dur": 0.355, + "args": { + "External id": 3291668,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596176048.110, "dur": 1.242, + "args": { + "External id": 3291669,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596176050.488, "dur": 0.295, + "args": { + "External id": 3291670,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596176052.120, "dur": 0.229, + "args": { + "External id": 3291671,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596176055.117, "dur": 0.313, + "args": { + "External id": 3291672,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596176057.163, "dur": 0.372, + "args": { + "External id": 3291673,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596176058.849, "dur": 0.321, + "args": { + "External id": 3291674,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596176061.009, "dur": 2.133, + "args": { + "External id": 3291675,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596176082.034, "dur": 22.810, + "args": { + "External id": 3291676,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596176161.553, "dur": 117.414, + "args": { + "External id": 3291677,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596176190.873, "dur": 84.683, + "args": { + "External id": 3291678,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2077, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596176200.686, "dur": 70.647, + "args": { + "External id": 3291679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596176297.217, "dur": 1.909, + "args": { + "External id": 3291680,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2079, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596176374.894, "dur": 1661.419, + "args": { + "External id": 3291681,"Sequence number": 32987154, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2080 + } + }, + { + "ph": "f", "id": 178, "pid": 1336756, "tid": 1381189, "ts": 1587596176374.894, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596176478.470, "dur": 101.581, + "args": { + "External id": 3291682,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596176621.109, "dur": 37.598, + "args": { + "External id": 3291683,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596176676.318, "dur": 48.512, + "args": { + "External id": 3291684,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596176733.769, "dur": 32.123, + "args": { + "External id": 3291685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596176772.070, "dur": 44.796, + "args": { + "External id": 3291686,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596176822.940, "dur": 27.303, + "args": { + "External id": 3291687,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596176858.885, "dur": 63.698, + "args": { + "External id": 3291688,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596176951.560, "dur": 24.442, + "args": { + "External id": 3291689,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596177034.784, "dur": 33.145, + "args": { + "External id": 3291690,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596177088.327, "dur": 21.064, + "args": { + "External id": 3291691,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596177125.508, "dur": 15.603, + "args": { + "External id": 3291692,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596177151.708, "dur": 37.184, + "args": { + "External id": 3291693,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596177192.365, "dur": 36.209, + "args": { + "External id": 3291694,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596177261.228, "dur": 169.763, + "args": { + "External id": 3291695,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596177339.066, "dur": 5.707, + "args": { + "External id": 3291696,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596177346.397, "dur": 2.504, + "args": { + "External id": 3291697,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596177462.903, "dur": 25.213, + "args": { + "External id": 3291698,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596177500.528, "dur": 15.638, + "args": { + "External id": 3291699,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596177523.372, "dur": 33.392, + "args": { + "External id": 3291700,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596177564.695, "dur": 33.322, + "args": { + "External id": 3291701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596177604.836, "dur": 21.173, + "args": { + "External id": 3291702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596177630.007, "dur": 29.689, + "args": { + "External id": 3291703,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596177665.113, "dur": 19.622, + "args": { + "External id": 3291704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596177694.217, "dur": 29.759, + "args": { + "External id": 3291705,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596177742.022, "dur": 38.982, + "args": { + "External id": 3291706,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596177807.835, "dur": 25.712, + "args": { + "External id": 3291707,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596177847.701, "dur": 17.852, + "args": { + "External id": 3291708,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596177911.222, "dur": 16.894, + "args": { + "External id": 3291709,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596177945.588, "dur": 18.602, + "args": { + "External id": 3291710,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178080.159, "dur": 16.853, + "args": { + "External id": 3291711,"Record function id": 0, "Ev Idx": 2110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178083.659, "dur": 12.446, + "args": { + "External id": 3291712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178088.016, "dur": 7.332, + "args": { + "External id": 3291713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178089.463, "dur": 5.731, + "args": { + "External id": 3291714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178100.701, "dur": 4.641, + "args": { + "External id": 3291715,"Record function id": 0, "Ev Idx": 2114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178102.124, "dur": 2.768, + "args": { + "External id": 3291716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178102.915, "dur": 1.517, + "args": { + "External id": 3291717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178103.647, "dur": 0.683, + "args": { + "External id": 3291718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178108.746, "dur": 5.595, + "args": { + "External id": 3291719,"Record function id": 0, "Ev Idx": 2118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178110.490, "dur": 3.438, + "args": { + "External id": 3291720,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178111.554, "dur": 1.887, + "args": { + "External id": 3291721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178112.292, "dur": 1.033, + "args": { + "External id": 3291722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178117.438, "dur": 3.777, + "args": { + "External id": 3291723,"Record function id": 0, "Ev Idx": 2122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178118.616, "dur": 2.192, + "args": { + "External id": 3291724,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178119.381, "dur": 0.952, + "args": { + "External id": 3291725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178119.713, "dur": 0.546, + "args": { + "External id": 3291726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178124.309, "dur": 3.459, + "args": { + "External id": 3291727,"Record function id": 0, "Ev Idx": 2126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178125.401, "dur": 1.975, + "args": { + "External id": 3291728,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178125.921, "dur": 0.981, + "args": { + "External id": 3291729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178126.185, "dur": 0.642, + "args": { + "External id": 3291730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178130.872, "dur": 3.550, + "args": { + "External id": 3291731,"Record function id": 0, "Ev Idx": 2130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178131.992, "dur": 2.023, + "args": { + "External id": 3291732,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178132.546, "dur": 1.031, + "args": { + "External id": 3291733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178132.845, "dur": 0.656, + "args": { + "External id": 3291734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178137.556, "dur": 5.848, + "args": { + "External id": 3291735,"Record function id": 0, "Ev Idx": 2134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178138.890, "dur": 4.123, + "args": { + "External id": 3291736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178139.399, "dur": 3.062, + "args": { + "External id": 3291737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178141.882, "dur": 0.513, + "args": { + "External id": 3291738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178146.583, "dur": 5.033, + "args": { + "External id": 3291739,"Record function id": 0, "Ev Idx": 2138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178147.698, "dur": 3.516, + "args": { + "External id": 3291740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178148.193, "dur": 2.619, + "args": { + "External id": 3291741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178148.469, "dur": 2.269, + "args": { + "External id": 3291742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178154.580, "dur": 3.524, + "args": { + "External id": 3291743,"Record function id": 0, "Ev Idx": 2142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596178155.787, "dur": 1.922, + "args": { + "External id": 3291744,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178156.233, "dur": 1.055, + "args": { + "External id": 3291745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596178156.499, "dur": 0.715, + "args": { + "External id": 3291746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596178161.602, "dur": 38206.156, + "args": { + "External id": 3291747,"Record function id": 0, "Sequence number": 32987153, "Fwd thread id": 1, "Ev Idx": 2146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596178162.873, "dur": 38196.918, + "args": { + "External id": 3291748,"Sequence number": 32987153, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2147 + } + }, + { + "ph": "f", "id": 179, "pid": 1336756, "tid": 1381189, "ts": 1587596178162.873, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 1336756, "tid": 1381189, + "ts": 1587596178191.388, "dur": 34.921, + "args": { + "External id": 3291749,"Record function id": 0, "Ev Idx": 2148 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 1336756, "tid": 1381189, + "ts": 1587596178233.354, "dur": 57.239, + "args": { + "External id": 3291750,"Record function id": 0, "Ev Idx": 2149 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 1336756, "tid": 1381189, + "ts": 1587596178297.980, "dur": 38053.226, + "args": { + "External id": 3291751,"Record function id": 0, "Ev Idx": 2150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596178382.603, "dur": 6.382, + "args": { + "External id": 3291752,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596178398.556, "dur": 4.779, + "args": { + "External id": 3291753,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596178417.059, "dur": 37108.630, + "args": { + "External id": 3291754,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596178430.058, "dur": 37086.796, + "args": { + "External id": 3291755,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596178481.688, "dur": 13.164, + "args": { + "External id": 3291756,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596178501.074, "dur": 36976.935, + "args": { + "External id": 3291757,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596178504.869, "dur": 36972.506, + "args": { + "External id": 3291758,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596178508.782, "dur": 4.827, + "args": { + "External id": 3291759,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596178515.250, "dur": 36958.554, + "args": { + "External id": 3291760,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596215613.427, "dur": 8.311, + "args": { + "External id": 3291761,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596215616.405, "dur": 4.965, + "args": { + "External id": 3291762,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596215652.622, "dur": 390.951, + "args": { + "External id": 3291763,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596215683.063, "dur": 355.219, + "args": { + "External id": 3291764,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2163, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596215693.649, "dur": 338.281, + "args": { + "External id": 3291765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596216070.319, "dur": 2.862, + "args": { + "External id": 3291766,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2165, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216133.007, "dur": 6.678, + "args": { + "External id": 3291767,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216187.473, "dur": 1.272, + "args": { + "External id": 3291768,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216205.456, "dur": 1.384, + "args": { + "External id": 3291769,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216219.166, "dur": 1.087, + "args": { + "External id": 3291770,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216232.714, "dur": 0.701, + "args": { + "External id": 3291771,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216246.050, "dur": 0.953, + "args": { + "External id": 3291772,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216258.441, "dur": 0.833, + "args": { + "External id": 3291773,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216272.230, "dur": 2.740, + "args": { + "External id": 3291774,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216288.534, "dur": 0.837, + "args": { + "External id": 3291775,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596216382.729, "dur": 2728.848, + "args": { + "External id": 3291776,"Record function id": 0, "Ev Idx": 2175 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 1336756, "tid": 1381189, + "ts": 1587596216401.133, "dur": 1010.027, + "args": { + "External id": 3291777,"Record function id": 0, "Ev Idx": 2176 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 1336756, "tid": 1381189, + "ts": 1587596216416.078, "dur": 305.454, + "args": { + "External id": 3291778,"Record function id": 0, "Ev Idx": 2177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596216492.154, "dur": 3.695, + "args": { + "External id": 3291779,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596216499.113, "dur": 1.041, + "args": { + "External id": 3291780,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596216502.027, "dur": 1.080, + "args": { + "External id": 3291781,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596216505.224, "dur": 1.023, + "args": { + "External id": 3291782,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596216509.441, "dur": 0.961, + "args": { + "External id": 3291783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596216511.765, "dur": 0.922, + "args": { + "External id": 3291784,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596216514.258, "dur": 2.346, + "args": { + "External id": 3291785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596216518.005, "dur": 3.186, + "args": { + "External id": 3291786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596216524.637, "dur": 0.820, + "args": { + "External id": 3291787,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596216526.977, "dur": 0.980, + "args": { + "External id": 3291788,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596216545.373, "dur": 144.334, + "args": { + "External id": 3291789,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596216561.148, "dur": 124.365, + "args": { + "External id": 3291790,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596216579.210, "dur": 13.067, + "args": { + "External id": 3291791,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596216595.780, "dur": 63.401, + "args": { + "External id": 3291792,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596216598.472, "dur": 60.339, + "args": { + "External id": 3291793,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216602.191, "dur": 5.017, + "args": { + "External id": 3291794,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596216610.562, "dur": 47.431, + "args": { + "External id": 3291795,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2194 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 1336756, "tid": 1381189, + "ts": 1587596216792.441, "dur": 610.518, + "args": { + "External id": 3291796,"Record function id": 0, "Ev Idx": 2195 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 1336756, "tid": 1381189, + "ts": 1587596216809.467, "dur": 580.930, + "args": { + "External id": 3291797,"Record function id": 0, "Ev Idx": 2196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596216858.726, "dur": 4.366, + "args": { + "External id": 3291798,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596216897.542, "dur": 34.903, + "args": { + "External id": 3291799,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216902.881, "dur": 1.920, + "args": { + "External id": 3291800,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216907.941, "dur": 0.553, + "args": { + "External id": 3291801,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216909.902, "dur": 2.566, + "args": { + "External id": 3291802,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216913.975, "dur": 1.796, + "args": { + "External id": 3291803,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216917.339, "dur": 0.425, + "args": { + "External id": 3291804,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216919.278, "dur": 0.326, + "args": { + "External id": 3291805,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216922.305, "dur": 0.321, + "args": { + "External id": 3291806,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216923.697, "dur": 0.627, + "args": { + "External id": 3291807,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596216925.718, "dur": 0.470, + "args": { + "External id": 3291808,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596216943.164, "dur": 32.324, + "args": { + "External id": 3291809,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596217044.807, "dur": 108.438, + "args": { + "External id": 3291810,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596217055.732, "dur": 4.750, + "args": { + "External id": 3291811,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596217065.540, "dur": 13.623, + "args": { + "External id": 3291812,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596217069.488, "dur": 9.217, + "args": { + "External id": 3291813,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596217073.032, "dur": 3.985, + "args": { + "External id": 3291814,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596217086.049, "dur": 26.659, + "args": { + "External id": 3291815,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596217088.077, "dur": 0.340, + "args": { + "External id": 3291816,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596217089.675, "dur": 0.757, + "args": { + "External id": 3291817,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596217091.690, "dur": 0.652, + "args": { + "External id": 3291818,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596217094.566, "dur": 0.243, + "args": { + "External id": 3291819,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596217096.180, "dur": 0.576, + "args": { + "External id": 3291820,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596217098.208, "dur": 1.181, + "args": { + "External id": 3291821,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596217100.757, "dur": 0.320, + "args": { + "External id": 3291822,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596217102.643, "dur": 2.080, + "args": { + "External id": 3291823,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596217106.877, "dur": 0.259, + "args": { + "External id": 3291824,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596217122.876, "dur": 23.119, + "args": { + "External id": 3291825,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596217201.853, "dur": 118.376, + "args": { + "External id": 3291826,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596217229.555, "dur": 87.529, + "args": { + "External id": 3291827,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2226, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596217241.336, "dur": 71.701, + "args": { + "External id": 3291828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596217339.796, "dur": 1.763, + "args": { + "External id": 3291829,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2228, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596217418.087, "dur": 1670.070, + "args": { + "External id": 3291830,"Sequence number": 32987152, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2229 + } + }, + { + "ph": "f", "id": 180, "pid": 1336756, "tid": 1381189, "ts": 1587596217418.087, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596217520.765, "dur": 102.115, + "args": { + "External id": 3291831,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596217662.042, "dur": 37.635, + "args": { + "External id": 3291832,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596217717.520, "dur": 49.486, + "args": { + "External id": 3291833,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596217775.767, "dur": 31.842, + "args": { + "External id": 3291834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596217814.767, "dur": 47.711, + "args": { + "External id": 3291835,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596217890.453, "dur": 34.717, + "args": { + "External id": 3291836,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596217935.824, "dur": 42.116, + "args": { + "External id": 3291837,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596218043.940, "dur": 28.801, + "args": { + "External id": 3291838,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596218091.372, "dur": 30.125, + "args": { + "External id": 3291839,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596218143.797, "dur": 18.989, + "args": { + "External id": 3291840,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596218176.541, "dur": 15.566, + "args": { + "External id": 3291841,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596218202.678, "dur": 35.119, + "args": { + "External id": 3291842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596218241.320, "dur": 35.320, + "args": { + "External id": 3291843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596218303.483, "dur": 163.784, + "args": { + "External id": 3291844,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596218376.436, "dur": 6.163, + "args": { + "External id": 3291845,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596218384.618, "dur": 2.228, + "args": { + "External id": 3291846,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596218498.744, "dur": 23.430, + "args": { + "External id": 3291847,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596218536.063, "dur": 15.548, + "args": { + "External id": 3291848,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596218560.773, "dur": 33.388, + "args": { + "External id": 3291849,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596218599.425, "dur": 35.850, + "args": { + "External id": 3291850,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596218651.685, "dur": 21.343, + "args": { + "External id": 3291851,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596218677.695, "dur": 30.985, + "args": { + "External id": 3291852,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596218716.809, "dur": 20.466, + "args": { + "External id": 3291853,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596218744.010, "dur": 29.207, + "args": { + "External id": 3291854,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596218796.138, "dur": 22.094, + "args": { + "External id": 3291855,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596218835.116, "dur": 60.580, + "args": { + "External id": 3291856,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596218922.856, "dur": 22.510, + "args": { + "External id": 3291857,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596218964.248, "dur": 16.174, + "args": { + "External id": 3291858,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596219035.125, "dur": 21.249, + "args": { + "External id": 3291859,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219133.542, "dur": 15.928, + "args": { + "External id": 3291860,"Record function id": 0, "Ev Idx": 2259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219137.291, "dur": 11.349, + "args": { + "External id": 3291861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219141.614, "dur": 6.108, + "args": { + "External id": 3291862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219143.644, "dur": 3.940, + "args": { + "External id": 3291863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219153.307, "dur": 5.314, + "args": { + "External id": 3291864,"Record function id": 0, "Ev Idx": 2263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219154.925, "dur": 3.236, + "args": { + "External id": 3291865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219155.833, "dur": 1.783, + "args": { + "External id": 3291866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219156.751, "dur": 0.775, + "args": { + "External id": 3291867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219161.792, "dur": 4.143, + "args": { + "External id": 3291868,"Record function id": 0, "Ev Idx": 2267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219162.914, "dur": 2.617, + "args": { + "External id": 3291869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219163.366, "dur": 1.719, + "args": { + "External id": 3291870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219163.843, "dur": 1.184, + "args": { + "External id": 3291871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219169.038, "dur": 3.945, + "args": { + "External id": 3291872,"Record function id": 0, "Ev Idx": 2271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219170.369, "dur": 2.218, + "args": { + "External id": 3291873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219170.988, "dur": 1.175, + "args": { + "External id": 3291874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219171.523, "dur": 0.565, + "args": { + "External id": 3291875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219176.090, "dur": 3.718, + "args": { + "External id": 3291876,"Record function id": 0, "Ev Idx": 2275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219177.339, "dur": 2.051, + "args": { + "External id": 3291877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219177.926, "dur": 1.078, + "args": { + "External id": 3291878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219178.401, "dur": 0.528, + "args": { + "External id": 3291879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219182.846, "dur": 4.298, + "args": { + "External id": 3291880,"Record function id": 0, "Ev Idx": 2279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219184.047, "dur": 2.671, + "args": { + "External id": 3291881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219184.723, "dur": 1.577, + "args": { + "External id": 3291882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219185.426, "dur": 0.781, + "args": { + "External id": 3291883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219190.592, "dur": 6.176, + "args": { + "External id": 3291884,"Record function id": 0, "Ev Idx": 2283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219191.823, "dur": 4.499, + "args": { + "External id": 3291885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219192.271, "dur": 3.620, + "args": { + "External id": 3291886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219192.768, "dur": 3.022, + "args": { + "External id": 3291887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219199.856, "dur": 4.137, + "args": { + "External id": 3291888,"Record function id": 0, "Ev Idx": 2287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219201.416, "dur": 2.131, + "args": { + "External id": 3291889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219201.909, "dur": 1.184, + "args": { + "External id": 3291890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219202.209, "dur": 0.781, + "args": { + "External id": 3291891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219206.927, "dur": 4.244, + "args": { + "External id": 3291892,"Record function id": 0, "Ev Idx": 2291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596219208.238, "dur": 2.493, + "args": { + "External id": 3291893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219208.898, "dur": 1.416, + "args": { + "External id": 3291894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596219209.358, "dur": 0.830, + "args": { + "External id": 3291895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596219214.770, "dur": 37451.052, + "args": { + "External id": 3291896,"Record function id": 0, "Sequence number": 32987151, "Fwd thread id": 1, "Ev Idx": 2295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596219216.011, "dur": 37441.303, + "args": { + "External id": 3291897,"Sequence number": 32987151, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2296 + } + }, + { + "ph": "f", "id": 181, "pid": 1336756, "tid": 1381189, "ts": 1587596219216.011, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 1336756, "tid": 1381189, + "ts": 1587596219245.248, "dur": 36.493, + "args": { + "External id": 3291898,"Record function id": 0, "Ev Idx": 2297 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 1336756, "tid": 1381189, + "ts": 1587596219289.687, "dur": 60.138, + "args": { + "External id": 3291899,"Record function id": 0, "Ev Idx": 2298 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 1336756, "tid": 1381189, + "ts": 1587596219355.838, "dur": 37292.878, + "args": { + "External id": 3291900,"Record function id": 0, "Ev Idx": 2299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596219448.138, "dur": 6.009, + "args": { + "External id": 3291901,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596219463.712, "dur": 4.658, + "args": { + "External id": 3291902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596219482.383, "dur": 36270.533, + "args": { + "External id": 3291903,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596219494.923, "dur": 36247.258, + "args": { + "External id": 3291904,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596219543.764, "dur": 13.963, + "args": { + "External id": 3291905,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596219564.151, "dur": 36135.848, + "args": { + "External id": 3291906,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596219566.768, "dur": 36131.993, + "args": { + "External id": 3291907,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596219571.368, "dur": 4.777, + "args": { + "External id": 3291908,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596219578.232, "dur": 36116.581, + "args": { + "External id": 3291909,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596255850.799, "dur": 10.132, + "args": { + "External id": 3291910,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596255854.534, "dur": 5.968, + "args": { + "External id": 3291911,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596255906.519, "dur": 427.143, + "args": { + "External id": 3291912,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596255933.778, "dur": 394.349, + "args": { + "External id": 3291913,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2312, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596255946.458, "dur": 375.213, + "args": { + "External id": 3291914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596256356.406, "dur": 2.599, + "args": { + "External id": 3291915,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2314, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596256423.690, "dur": 9.750, + "args": { + "External id": 3291916,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596256481.256, "dur": 1.343, + "args": { + "External id": 3291917,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596256500.362, "dur": 1.089, + "args": { + "External id": 3291918,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596256514.817, "dur": 0.947, + "args": { + "External id": 3291919,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596256529.187, "dur": 2.906, + "args": { + "External id": 3291920,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596256544.455, "dur": 0.983, + "args": { + "External id": 3291921,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596256557.959, "dur": 1.305, + "args": { + "External id": 3291922,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596256571.981, "dur": 2.300, + "args": { + "External id": 3291923,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596256585.343, "dur": 2.799, + "args": { + "External id": 3291924,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596256680.744, "dur": 2800.862, + "args": { + "External id": 3291925,"Record function id": 0, "Ev Idx": 2324 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 1336756, "tid": 1381189, + "ts": 1587596256699.790, "dur": 1035.820, + "args": { + "External id": 3291926,"Record function id": 0, "Ev Idx": 2325 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 1336756, "tid": 1381189, + "ts": 1587596256714.704, "dur": 382.534, + "args": { + "External id": 3291927,"Record function id": 0, "Ev Idx": 2326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596256790.964, "dur": 3.844, + "args": { + "External id": 3291928,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596256798.138, "dur": 1.574, + "args": { + "External id": 3291929,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596256801.659, "dur": 0.930, + "args": { + "External id": 3291930,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596256804.562, "dur": 1.112, + "args": { + "External id": 3291931,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596256807.588, "dur": 1.218, + "args": { + "External id": 3291932,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596256810.717, "dur": 1.466, + "args": { + "External id": 3291933,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596256813.858, "dur": 4.280, + "args": { + "External id": 3291934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596256819.809, "dur": 1.075, + "args": { + "External id": 3291935,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596256822.487, "dur": 1.049, + "args": { + "External id": 3291936,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596256825.147, "dur": 0.815, + "args": { + "External id": 3291937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596256843.936, "dur": 216.415, + "args": { + "External id": 3291938,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596256859.761, "dur": 195.111, + "args": { + "External id": 3291939,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596256901.458, "dur": 13.427, + "args": { + "External id": 3291940,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596256918.921, "dur": 103.453, + "args": { + "External id": 3291941,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596256921.673, "dur": 100.198, + "args": { + "External id": 3291942,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596256925.892, "dur": 6.605, + "args": { + "External id": 3291943,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596256934.543, "dur": 86.232, + "args": { + "External id": 3291944,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2343 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 1336756, "tid": 1381189, + "ts": 1587596257180.000, "dur": 548.041, + "args": { + "External id": 3291945,"Record function id": 0, "Ev Idx": 2344 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 1336756, "tid": 1381189, + "ts": 1587596257198.551, "dur": 517.072, + "args": { + "External id": 3291946,"Record function id": 0, "Ev Idx": 2345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596257255.948, "dur": 5.738, + "args": { + "External id": 3291947,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596257277.452, "dur": 32.647, + "args": { + "External id": 3291948,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257282.244, "dur": 1.420, + "args": { + "External id": 3291949,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257286.347, "dur": 2.375, + "args": { + "External id": 3291950,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257290.956, "dur": 0.563, + "args": { + "External id": 3291951,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257293.806, "dur": 0.726, + "args": { + "External id": 3291952,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257296.083, "dur": 0.590, + "args": { + "External id": 3291953,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257298.395, "dur": 0.507, + "args": { + "External id": 3291954,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257300.757, "dur": 0.411, + "args": { + "External id": 3291955,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257303.104, "dur": 0.503, + "args": { + "External id": 3291956,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257305.330, "dur": 0.369, + "args": { + "External id": 3291957,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596257325.321, "dur": 34.081, + "args": { + "External id": 3291958,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596257393.957, "dur": 102.840, + "args": { + "External id": 3291959,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596257404.495, "dur": 4.937, + "args": { + "External id": 3291960,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596257414.713, "dur": 10.152, + "args": { + "External id": 3291961,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596257419.011, "dur": 5.428, + "args": { + "External id": 3291962,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257422.644, "dur": 0.618, + "args": { + "External id": 3291963,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596257431.634, "dur": 26.329, + "args": { + "External id": 3291964,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257434.082, "dur": 0.479, + "args": { + "External id": 3291965,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257436.329, "dur": 0.414, + "args": { + "External id": 3291966,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257438.302, "dur": 0.356, + "args": { + "External id": 3291967,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257440.382, "dur": 0.375, + "args": { + "External id": 3291968,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257442.495, "dur": 0.439, + "args": { + "External id": 3291969,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257444.755, "dur": 0.400, + "args": { + "External id": 3291970,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257447.341, "dur": 2.547, + "args": { + "External id": 3291971,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257451.568, "dur": 0.436, + "args": { + "External id": 3291972,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596257453.820, "dur": 0.424, + "args": { + "External id": 3291973,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596257469.083, "dur": 20.360, + "args": { + "External id": 3291974,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596257541.264, "dur": 109.231, + "args": { + "External id": 3291975,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596257563.939, "dur": 83.004, + "args": { + "External id": 3291976,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2375, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596257573.137, "dur": 69.169, + "args": { + "External id": 3291977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596257666.241, "dur": 2.090, + "args": { + "External id": 3291978,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2377, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596257742.122, "dur": 1721.059, + "args": { + "External id": 3291979,"Sequence number": 32987150, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2378 + } + }, + { + "ph": "f", "id": 182, "pid": 1336756, "tid": 1381189, "ts": 1587596257742.122, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596257843.584, "dur": 123.111, + "args": { + "External id": 3291980,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596258043.232, "dur": 41.990, + "args": { + "External id": 3291981,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596258107.360, "dur": 60.320, + "args": { + "External id": 3291982,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596258178.099, "dur": 34.794, + "args": { + "External id": 3291983,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596258219.728, "dur": 48.386, + "args": { + "External id": 3291984,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596258280.292, "dur": 29.580, + "args": { + "External id": 3291985,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596258317.223, "dur": 46.845, + "args": { + "External id": 3291986,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596258386.605, "dur": 23.473, + "args": { + "External id": 3291987,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596258437.408, "dur": 29.996, + "args": { + "External id": 3291988,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596258486.647, "dur": 20.421, + "args": { + "External id": 3291989,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596258520.668, "dur": 15.887, + "args": { + "External id": 3291990,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596258548.557, "dur": 29.431, + "args": { + "External id": 3291991,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596258581.356, "dur": 35.783, + "args": { + "External id": 3291992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596258643.170, "dur": 173.166, + "args": { + "External id": 3291993,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596258720.672, "dur": 5.249, + "args": { + "External id": 3291994,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596258728.166, "dur": 3.574, + "args": { + "External id": 3291995,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596258846.413, "dur": 40.293, + "args": { + "External id": 3291996,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596258900.155, "dur": 18.478, + "args": { + "External id": 3291997,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596258928.177, "dur": 39.278, + "args": { + "External id": 3291998,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596258974.971, "dur": 87.875, + "args": { + "External id": 3291999,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596259074.230, "dur": 25.018, + "args": { + "External id": 3292000,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596259107.572, "dur": 45.860, + "args": { + "External id": 3292001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596259167.753, "dur": 27.755, + "args": { + "External id": 3292002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596259204.747, "dur": 33.855, + "args": { + "External id": 3292003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596259261.526, "dur": 24.990, + "args": { + "External id": 3292004,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596259304.056, "dur": 27.110, + "args": { + "External id": 3292005,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596259346.169, "dur": 18.862, + "args": { + "External id": 3292006,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596259379.180, "dur": 16.209, + "args": { + "External id": 3292007,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596259416.838, "dur": 17.699, + "args": { + "External id": 3292008,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259504.141, "dur": 14.551, + "args": { + "External id": 3292009,"Record function id": 0, "Ev Idx": 2408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259507.222, "dur": 10.550, + "args": { + "External id": 3292010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259511.396, "dur": 5.431, + "args": { + "External id": 3292011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259513.019, "dur": 3.680, + "args": { + "External id": 3292012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259522.564, "dur": 5.803, + "args": { + "External id": 3292013,"Record function id": 0, "Ev Idx": 2412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259524.139, "dur": 3.775, + "args": { + "External id": 3292014,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259524.979, "dur": 2.416, + "args": { + "External id": 3292015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259526.392, "dur": 0.933, + "args": { + "External id": 3292016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259531.489, "dur": 4.862, + "args": { + "External id": 3292017,"Record function id": 0, "Ev Idx": 2416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259532.809, "dur": 3.120, + "args": { + "External id": 3292018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259533.913, "dur": 1.601, + "args": { + "External id": 3292019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259534.525, "dur": 0.898, + "args": { + "External id": 3292020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259539.429, "dur": 5.821, + "args": { + "External id": 3292021,"Record function id": 0, "Ev Idx": 2420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259540.738, "dur": 4.084, + "args": { + "External id": 3292022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259541.397, "dur": 2.872, + "args": { + "External id": 3292023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259541.757, "dur": 2.429, + "args": { + "External id": 3292024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259548.232, "dur": 3.677, + "args": { + "External id": 3292025,"Record function id": 0, "Ev Idx": 2424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259549.520, "dur": 1.987, + "args": { + "External id": 3292026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259549.968, "dur": 1.125, + "args": { + "External id": 3292027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259550.220, "dur": 0.801, + "args": { + "External id": 3292028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259555.017, "dur": 3.870, + "args": { + "External id": 3292029,"Record function id": 0, "Ev Idx": 2428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259556.252, "dur": 2.210, + "args": { + "External id": 3292030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259556.735, "dur": 1.307, + "args": { + "External id": 3292031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259557.245, "dur": 0.678, + "args": { + "External id": 3292032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259562.154, "dur": 4.064, + "args": { + "External id": 3292033,"Record function id": 0, "Ev Idx": 2432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259563.482, "dur": 2.313, + "args": { + "External id": 3292034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259564.233, "dur": 1.167, + "args": { + "External id": 3292035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259564.502, "dur": 0.813, + "args": { + "External id": 3292036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259569.234, "dur": 3.655, + "args": { + "External id": 3292037,"Record function id": 0, "Ev Idx": 2436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259570.420, "dur": 2.012, + "args": { + "External id": 3292038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259570.900, "dur": 1.141, + "args": { + "External id": 3292039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259571.355, "dur": 0.616, + "args": { + "External id": 3292040,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259575.918, "dur": 3.539, + "args": { + "External id": 3292041,"Record function id": 0, "Ev Idx": 2440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596259576.941, "dur": 2.077, + "args": { + "External id": 3292042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259577.414, "dur": 1.181, + "args": { + "External id": 3292043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596259577.962, "dur": 0.534, + "args": { + "External id": 3292044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596259582.933, "dur": 38259.575, + "args": { + "External id": 3292045,"Record function id": 0, "Sequence number": 32987149, "Fwd thread id": 1, "Ev Idx": 2444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596259584.109, "dur": 38250.764, + "args": { + "External id": 3292046,"Sequence number": 32987149, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2445 + } + }, + { + "ph": "f", "id": 183, "pid": 1336756, "tid": 1381189, "ts": 1587596259584.109, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 1336756, "tid": 1381189, + "ts": 1587596259614.424, "dur": 38.148, + "args": { + "External id": 3292047,"Record function id": 0, "Ev Idx": 2446 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 1336756, "tid": 1381189, + "ts": 1587596259660.466, "dur": 58.993, + "args": { + "External id": 3292048,"Record function id": 0, "Ev Idx": 2447 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 1336756, "tid": 1381189, + "ts": 1587596259725.201, "dur": 38102.483, + "args": { + "External id": 3292049,"Record function id": 0, "Ev Idx": 2448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596259809.399, "dur": 6.707, + "args": { + "External id": 3292050,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596259826.241, "dur": 4.604, + "args": { + "External id": 3292051,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596259844.309, "dur": 37226.439, + "args": { + "External id": 3292052,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596259858.094, "dur": 37203.075, + "args": { + "External id": 3292053,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596259921.585, "dur": 19.969, + "args": { + "External id": 3292054,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596259948.095, "dur": 37072.923, + "args": { + "External id": 3292055,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596259950.789, "dur": 37069.666, + "args": { + "External id": 3292056,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596259954.843, "dur": 6.302, + "args": { + "External id": 3292057,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596259963.027, "dur": 37053.634, + "args": { + "External id": 3292058,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596297157.447, "dur": 8.798, + "args": { + "External id": 3292059,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596297160.620, "dur": 5.262, + "args": { + "External id": 3292060,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596297197.277, "dur": 339.539, + "args": { + "External id": 3292061,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596297224.066, "dur": 308.412, + "args": { + "External id": 3292062,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2461, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596297235.171, "dur": 291.856, + "args": { + "External id": 3292063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596297555.122, "dur": 2.074, + "args": { + "External id": 3292064,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2463, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596297611.690, "dur": 6.170, + "args": { + "External id": 3292065,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596297663.872, "dur": 1.460, + "args": { + "External id": 3292066,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596297682.664, "dur": 0.977, + "args": { + "External id": 3292067,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596297699.015, "dur": 1.149, + "args": { + "External id": 3292068,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596297712.940, "dur": 0.957, + "args": { + "External id": 3292069,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596297726.882, "dur": 1.023, + "args": { + "External id": 3292070,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596297740.245, "dur": 1.013, + "args": { + "External id": 3292071,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596297755.783, "dur": 2.243, + "args": { + "External id": 3292072,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596297770.389, "dur": 0.812, + "args": { + "External id": 3292073,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596297855.837, "dur": 2763.385, + "args": { + "External id": 3292074,"Record function id": 0, "Ev Idx": 2473 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 1336756, "tid": 1381189, + "ts": 1587596297894.918, "dur": 1022.685, + "args": { + "External id": 3292075,"Record function id": 0, "Ev Idx": 2474 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 1336756, "tid": 1381189, + "ts": 1587596297910.767, "dur": 377.315, + "args": { + "External id": 3292076,"Record function id": 0, "Ev Idx": 2475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596298035.081, "dur": 4.662, + "args": { + "External id": 3292077,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596298043.646, "dur": 1.110, + "args": { + "External id": 3292078,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596298046.633, "dur": 1.241, + "args": { + "External id": 3292079,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596298049.438, "dur": 2.650, + "args": { + "External id": 3292080,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596298053.601, "dur": 0.917, + "args": { + "External id": 3292081,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596298056.177, "dur": 1.123, + "args": { + "External id": 3292082,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596298058.874, "dur": 1.651, + "args": { + "External id": 3292083,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596298062.081, "dur": 1.018, + "args": { + "External id": 3292084,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596298064.888, "dur": 0.792, + "args": { + "External id": 3292085,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596298067.206, "dur": 0.820, + "args": { + "External id": 3292086,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596298087.859, "dur": 165.138, + "args": { + "External id": 3292087,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596298118.316, "dur": 130.092, + "args": { + "External id": 3292088,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596298136.431, "dur": 12.245, + "args": { + "External id": 3292089,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596298151.765, "dur": 67.122, + "args": { + "External id": 3292090,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596298154.449, "dur": 64.099, + "args": { + "External id": 3292091,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298158.411, "dur": 8.025, + "args": { + "External id": 3292092,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596298168.281, "dur": 49.733, + "args": { + "External id": 3292093,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2492 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 1336756, "tid": 1381189, + "ts": 1587596298367.724, "dur": 542.171, + "args": { + "External id": 3292094,"Record function id": 0, "Ev Idx": 2493 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 1336756, "tid": 1381189, + "ts": 1587596298384.021, "dur": 511.771, + "args": { + "External id": 3292095,"Record function id": 0, "Ev Idx": 2494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596298439.572, "dur": 4.326, + "args": { + "External id": 3292096,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596298458.304, "dur": 31.904, + "args": { + "External id": 3292097,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298463.654, "dur": 1.977, + "args": { + "External id": 3292098,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298467.704, "dur": 0.440, + "args": { + "External id": 3292099,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298470.048, "dur": 0.764, + "args": { + "External id": 3292100,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298472.402, "dur": 0.468, + "args": { + "External id": 3292101,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298474.525, "dur": 0.643, + "args": { + "External id": 3292102,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298476.669, "dur": 0.505, + "args": { + "External id": 3292103,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298479.215, "dur": 2.194, + "args": { + "External id": 3292104,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298483.260, "dur": 0.458, + "args": { + "External id": 3292105,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298485.645, "dur": 0.563, + "args": { + "External id": 3292106,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596298499.726, "dur": 29.805, + "args": { + "External id": 3292107,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596298559.936, "dur": 96.497, + "args": { + "External id": 3292108,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596298569.370, "dur": 2.753, + "args": { + "External id": 3292109,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596298577.450, "dur": 9.981, + "args": { + "External id": 3292110,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596298581.427, "dur": 5.587, + "args": { + "External id": 3292111,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298585.306, "dur": 0.451, + "args": { + "External id": 3292112,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596298594.219, "dur": 25.572, + "args": { + "External id": 3292113,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298596.414, "dur": 0.446, + "args": { + "External id": 3292114,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298599.232, "dur": 0.489, + "args": { + "External id": 3292115,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298601.459, "dur": 0.420, + "args": { + "External id": 3292116,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298603.836, "dur": 2.015, + "args": { + "External id": 3292117,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298607.488, "dur": 0.341, + "args": { + "External id": 3292118,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298609.564, "dur": 0.411, + "args": { + "External id": 3292119,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298611.837, "dur": 0.363, + "args": { + "External id": 3292120,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298613.937, "dur": 0.349, + "args": { + "External id": 3292121,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596298615.898, "dur": 0.515, + "args": { + "External id": 3292122,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596298629.138, "dur": 20.600, + "args": { + "External id": 3292123,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596298699.734, "dur": 113.251, + "args": { + "External id": 3292124,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596298724.341, "dur": 84.904, + "args": { + "External id": 3292125,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2524, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596298733.561, "dur": 71.924, + "args": { + "External id": 3292126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596298827.536, "dur": 1.963, + "args": { + "External id": 3292127,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2526, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596298925.423, "dur": 1674.673, + "args": { + "External id": 3292128,"Sequence number": 32987148, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2527 + } + }, + { + "ph": "f", "id": 184, "pid": 1336756, "tid": 1381189, "ts": 1587596298925.423, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596299068.537, "dur": 106.355, + "args": { + "External id": 3292129,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596299215.061, "dur": 38.544, + "args": { + "External id": 3292130,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596299270.495, "dur": 51.113, + "args": { + "External id": 3292131,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596299331.051, "dur": 33.369, + "args": { + "External id": 3292132,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596299373.723, "dur": 48.300, + "args": { + "External id": 3292133,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596299430.494, "dur": 29.780, + "args": { + "External id": 3292134,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596299467.952, "dur": 44.995, + "args": { + "External id": 3292135,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596299534.583, "dur": 27.104, + "args": { + "External id": 3292136,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596299580.837, "dur": 30.367, + "args": { + "External id": 3292137,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596299629.539, "dur": 20.087, + "args": { + "External id": 3292138,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596299663.160, "dur": 15.072, + "args": { + "External id": 3292139,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596299688.791, "dur": 30.140, + "args": { + "External id": 3292140,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596299722.518, "dur": 33.917, + "args": { + "External id": 3292141,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596299785.051, "dur": 194.428, + "args": { + "External id": 3292142,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596299861.783, "dur": 22.877, + "args": { + "External id": 3292143,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596299888.338, "dur": 3.324, + "args": { + "External id": 3292144,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596300070.759, "dur": 27.858, + "args": { + "External id": 3292145,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596300111.775, "dur": 15.918, + "args": { + "External id": 3292146,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596300138.870, "dur": 44.115, + "args": { + "External id": 3292147,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596300189.834, "dur": 37.369, + "args": { + "External id": 3292148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596300236.771, "dur": 23.360, + "args": { + "External id": 3292149,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596300265.459, "dur": 30.846, + "args": { + "External id": 3292150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596300303.216, "dur": 21.559, + "args": { + "External id": 3292151,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596300332.255, "dur": 47.282, + "args": { + "External id": 3292152,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596300404.511, "dur": 28.657, + "args": { + "External id": 3292153,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596300449.028, "dur": 24.273, + "args": { + "External id": 3292154,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596300490.544, "dur": 17.954, + "args": { + "External id": 3292155,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596300525.155, "dur": 15.726, + "args": { + "External id": 3292156,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596300555.513, "dur": 16.798, + "args": { + "External id": 3292157,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300640.949, "dur": 41.231, + "args": { + "External id": 3292158,"Record function id": 0, "Ev Idx": 2557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300645.083, "dur": 36.035, + "args": { + "External id": 3292159,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300648.982, "dur": 31.166, + "args": { + "External id": 3292160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300675.908, "dur": 4.062, + "args": { + "External id": 3292161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300686.454, "dur": 5.300, + "args": { + "External id": 3292162,"Record function id": 0, "Ev Idx": 2561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300688.037, "dur": 3.236, + "args": { + "External id": 3292163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300688.644, "dur": 2.136, + "args": { + "External id": 3292164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300689.440, "dur": 1.207, + "args": { + "External id": 3292165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300694.902, "dur": 5.679, + "args": { + "External id": 3292166,"Record function id": 0, "Ev Idx": 2565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300696.128, "dur": 4.049, + "args": { + "External id": 3292167,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300696.793, "dur": 2.944, + "args": { + "External id": 3292168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300697.123, "dur": 2.530, + "args": { + "External id": 3292169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300703.663, "dur": 3.689, + "args": { + "External id": 3292170,"Record function id": 0, "Ev Idx": 2569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300704.871, "dur": 2.056, + "args": { + "External id": 3292171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300705.487, "dur": 1.038, + "args": { + "External id": 3292172,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300705.787, "dur": 0.667, + "args": { + "External id": 3292173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300710.334, "dur": 3.924, + "args": { + "External id": 3292174,"Record function id": 0, "Ev Idx": 2573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300711.513, "dur": 2.353, + "args": { + "External id": 3292175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300712.121, "dur": 1.340, + "args": { + "External id": 3292176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300712.851, "dur": 0.544, + "args": { + "External id": 3292177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300717.601, "dur": 3.667, + "args": { + "External id": 3292178,"Record function id": 0, "Ev Idx": 2577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300718.753, "dur": 2.111, + "args": { + "External id": 3292179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300719.330, "dur": 1.118, + "args": { + "External id": 3292180,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300719.784, "dur": 0.571, + "args": { + "External id": 3292181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300724.654, "dur": 3.490, + "args": { + "External id": 3292182,"Record function id": 0, "Ev Idx": 2581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300725.813, "dur": 1.935, + "args": { + "External id": 3292183,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300726.321, "dur": 1.022, + "args": { + "External id": 3292184,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300726.632, "dur": 0.640, + "args": { + "External id": 3292185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300731.587, "dur": 5.861, + "args": { + "External id": 3292186,"Record function id": 0, "Ev Idx": 2585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300732.722, "dur": 4.322, + "args": { + "External id": 3292187,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300733.290, "dur": 3.352, + "args": { + "External id": 3292188,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300735.822, "dur": 0.694, + "args": { + "External id": 3292189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300740.701, "dur": 3.657, + "args": { + "External id": 3292190,"Record function id": 0, "Ev Idx": 2589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596300741.886, "dur": 2.073, + "args": { + "External id": 3292191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300742.358, "dur": 1.196, + "args": { + "External id": 3292192,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596300742.858, "dur": 0.606, + "args": { + "External id": 3292193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596300748.036, "dur": 37637.610, + "args": { + "External id": 3292194,"Record function id": 0, "Sequence number": 32987147, "Fwd thread id": 1, "Ev Idx": 2593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596300749.255, "dur": 37628.614, + "args": { + "External id": 3292195,"Sequence number": 32987147, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2594 + } + }, + { + "ph": "f", "id": 185, "pid": 1336756, "tid": 1381189, "ts": 1587596300749.255, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 1336756, "tid": 1381189, + "ts": 1587596300778.639, "dur": 36.230, + "args": { + "External id": 3292196,"Record function id": 0, "Ev Idx": 2595 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 1336756, "tid": 1381189, + "ts": 1587596300822.221, "dur": 82.244, + "args": { + "External id": 3292197,"Record function id": 0, "Ev Idx": 2596 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 1336756, "tid": 1381189, + "ts": 1587596300911.884, "dur": 37457.334, + "args": { + "External id": 3292198,"Record function id": 0, "Ev Idx": 2597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596301038.073, "dur": 7.868, + "args": { + "External id": 3292199,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596301057.188, "dur": 6.887, + "args": { + "External id": 3292200,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596301083.764, "dur": 36408.225, + "args": { + "External id": 3292201,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596301097.788, "dur": 36384.004, + "args": { + "External id": 3292202,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596301138.322, "dur": 13.672, + "args": { + "External id": 3292203,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596301158.449, "dur": 36280.036, + "args": { + "External id": 3292204,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596301161.138, "dur": 36276.252, + "args": { + "External id": 3292205,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596301165.417, "dur": 5.023, + "args": { + "External id": 3292206,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596301172.247, "dur": 36261.380, + "args": { + "External id": 3292207,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596337589.491, "dur": 10.568, + "args": { + "External id": 3292208,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596337593.070, "dur": 6.622, + "args": { + "External id": 3292209,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596337632.216, "dur": 426.624, + "args": { + "External id": 3292210,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596337658.740, "dur": 394.403, + "args": { + "External id": 3292211,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2610, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596337670.103, "dur": 376.687, + "args": { + "External id": 3292212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596338081.853, "dur": 2.377, + "args": { + "External id": 3292213,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2612, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338147.915, "dur": 6.855, + "args": { + "External id": 3292214,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338203.123, "dur": 1.355, + "args": { + "External id": 3292215,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338221.920, "dur": 2.886, + "args": { + "External id": 3292216,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338237.867, "dur": 0.917, + "args": { + "External id": 3292217,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338251.635, "dur": 0.901, + "args": { + "External id": 3292218,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338263.940, "dur": 0.813, + "args": { + "External id": 3292219,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338277.292, "dur": 2.594, + "args": { + "External id": 3292220,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338292.945, "dur": 1.800, + "args": { + "External id": 3292221,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338306.028, "dur": 0.980, + "args": { + "External id": 3292222,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596338399.591, "dur": 2748.888, + "args": { + "External id": 3292223,"Record function id": 0, "Ev Idx": 2622 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 1336756, "tid": 1381189, + "ts": 1587596338418.992, "dur": 1004.906, + "args": { + "External id": 3292224,"Record function id": 0, "Ev Idx": 2623 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 1336756, "tid": 1381189, + "ts": 1587596338435.100, "dur": 314.032, + "args": { + "External id": 3292225,"Record function id": 0, "Ev Idx": 2624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596338512.954, "dur": 3.810, + "args": { + "External id": 3292226,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596338520.061, "dur": 0.949, + "args": { + "External id": 3292227,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596338523.305, "dur": 2.548, + "args": { + "External id": 3292228,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596338527.840, "dur": 0.858, + "args": { + "External id": 3292229,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596338530.476, "dur": 0.787, + "args": { + "External id": 3292230,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596338532.812, "dur": 0.685, + "args": { + "External id": 3292231,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596338535.228, "dur": 1.803, + "args": { + "External id": 3292232,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596338538.774, "dur": 1.031, + "args": { + "External id": 3292233,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596338541.374, "dur": 0.879, + "args": { + "External id": 3292234,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596338543.462, "dur": 0.672, + "args": { + "External id": 3292235,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596338565.357, "dur": 153.402, + "args": { + "External id": 3292236,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596338581.325, "dur": 133.256, + "args": { + "External id": 3292237,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596338602.703, "dur": 13.863, + "args": { + "External id": 3292238,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596338620.160, "dur": 66.210, + "args": { + "External id": 3292239,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596338622.857, "dur": 63.213, + "args": { + "External id": 3292240,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338626.993, "dur": 5.673, + "args": { + "External id": 3292241,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596338634.687, "dur": 50.398, + "args": { + "External id": 3292242,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2641 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 1336756, "tid": 1381189, + "ts": 1587596338824.244, "dur": 591.944, + "args": { + "External id": 3292243,"Record function id": 0, "Ev Idx": 2642 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 1336756, "tid": 1381189, + "ts": 1587596338839.929, "dur": 563.590, + "args": { + "External id": 3292244,"Record function id": 0, "Ev Idx": 2643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596338914.317, "dur": 6.317, + "args": { + "External id": 3292245,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596338936.030, "dur": 31.164, + "args": { + "External id": 3292246,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338941.188, "dur": 1.329, + "args": { + "External id": 3292247,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338944.850, "dur": 0.477, + "args": { + "External id": 3292248,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338946.969, "dur": 0.812, + "args": { + "External id": 3292249,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338949.581, "dur": 0.486, + "args": { + "External id": 3292250,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338951.956, "dur": 0.480, + "args": { + "External id": 3292251,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338954.220, "dur": 2.296, + "args": { + "External id": 3292252,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338958.208, "dur": 0.659, + "args": { + "External id": 3292253,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338960.508, "dur": 0.358, + "args": { + "External id": 3292254,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596338962.483, "dur": 0.401, + "args": { + "External id": 3292255,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596338976.953, "dur": 68.327, + "args": { + "External id": 3292256,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596339081.060, "dur": 105.021, + "args": { + "External id": 3292257,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596339092.270, "dur": 4.173, + "args": { + "External id": 3292258,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596339101.515, "dur": 10.770, + "args": { + "External id": 3292259,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596339105.532, "dur": 6.357, + "args": { + "External id": 3292260,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596339109.878, "dur": 0.441, + "args": { + "External id": 3292261,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596339119.750, "dur": 26.494, + "args": { + "External id": 3292262,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596339122.433, "dur": 0.563, + "args": { + "External id": 3292263,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596339124.961, "dur": 0.445, + "args": { + "External id": 3292264,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596339126.972, "dur": 2.501, + "args": { + "External id": 3292265,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596339131.172, "dur": 0.344, + "args": { + "External id": 3292266,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596339133.230, "dur": 0.498, + "args": { + "External id": 3292267,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596339135.524, "dur": 0.349, + "args": { + "External id": 3292268,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596339137.574, "dur": 0.424, + "args": { + "External id": 3292269,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596339139.898, "dur": 0.579, + "args": { + "External id": 3292270,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596339142.376, "dur": 0.395, + "args": { + "External id": 3292271,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596339158.664, "dur": 19.948, + "args": { + "External id": 3292272,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596339233.650, "dur": 107.976, + "args": { + "External id": 3292273,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596339255.230, "dur": 82.496, + "args": { + "External id": 3292274,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2673, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596339265.119, "dur": 68.229, + "args": { + "External id": 3292275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596339355.462, "dur": 2.064, + "args": { + "External id": 3292276,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2675, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596339431.758, "dur": 1697.610, + "args": { + "External id": 3292277,"Sequence number": 32987146, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2676 + } + }, + { + "ph": "f", "id": 186, "pid": 1336756, "tid": 1381189, "ts": 1587596339431.758, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596339535.812, "dur": 103.588, + "args": { + "External id": 3292278,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596339679.588, "dur": 38.578, + "args": { + "External id": 3292279,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596339735.357, "dur": 52.113, + "args": { + "External id": 3292280,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596339799.605, "dur": 33.655, + "args": { + "External id": 3292281,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596339840.627, "dur": 63.772, + "args": { + "External id": 3292282,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596339916.711, "dur": 33.450, + "args": { + "External id": 3292283,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596339958.176, "dur": 79.348, + "args": { + "External id": 3292284,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596340070.389, "dur": 26.245, + "args": { + "External id": 3292285,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596340118.409, "dur": 31.167, + "args": { + "External id": 3292286,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596340169.581, "dur": 20.595, + "args": { + "External id": 3292287,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596340203.981, "dur": 16.053, + "args": { + "External id": 3292288,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596340230.385, "dur": 35.405, + "args": { + "External id": 3292289,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596340269.548, "dur": 37.390, + "args": { + "External id": 3292290,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596340335.802, "dur": 180.702, + "args": { + "External id": 3292291,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596340418.199, "dur": 5.685, + "args": { + "External id": 3292292,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596340426.070, "dur": 5.644, + "args": { + "External id": 3292293,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596340547.246, "dur": 23.650, + "args": { + "External id": 3292294,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596340582.676, "dur": 15.435, + "args": { + "External id": 3292295,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596340606.911, "dur": 34.896, + "args": { + "External id": 3292296,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596340648.939, "dur": 37.341, + "args": { + "External id": 3292297,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596340696.181, "dur": 22.692, + "args": { + "External id": 3292298,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596340724.415, "dur": 32.343, + "args": { + "External id": 3292299,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596340763.428, "dur": 22.962, + "args": { + "External id": 3292300,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596340793.901, "dur": 49.400, + "args": { + "External id": 3292301,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596340883.926, "dur": 31.881, + "args": { + "External id": 3292302,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596340935.418, "dur": 23.924, + "args": { + "External id": 3292303,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596340977.369, "dur": 53.463, + "args": { + "External id": 3292304,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596341051.054, "dur": 17.157, + "args": { + "External id": 3292305,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596341082.137, "dur": 17.718, + "args": { + "External id": 3292306,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341170.842, "dur": 15.969, + "args": { + "External id": 3292307,"Record function id": 0, "Ev Idx": 2706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341174.255, "dur": 11.623, + "args": { + "External id": 3292308,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341178.592, "dur": 6.263, + "args": { + "External id": 3292309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341180.360, "dur": 4.365, + "args": { + "External id": 3292310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341190.839, "dur": 7.667, + "args": { + "External id": 3292311,"Record function id": 0, "Ev Idx": 2710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341192.163, "dur": 5.854, + "args": { + "External id": 3292312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341192.971, "dur": 4.544, + "args": { + "External id": 3292313,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341193.945, "dur": 3.432, + "args": { + "External id": 3292314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341201.755, "dur": 4.405, + "args": { + "External id": 3292315,"Record function id": 0, "Ev Idx": 2714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341203.036, "dur": 2.695, + "args": { + "External id": 3292316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341203.522, "dur": 1.727, + "args": { + "External id": 3292317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341204.030, "dur": 1.129, + "args": { + "External id": 3292318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341209.461, "dur": 3.778, + "args": { + "External id": 3292319,"Record function id": 0, "Ev Idx": 2718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341210.623, "dur": 2.162, + "args": { + "External id": 3292320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341211.275, "dur": 1.087, + "args": { + "External id": 3292321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341211.588, "dur": 0.703, + "args": { + "External id": 3292322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341216.418, "dur": 3.958, + "args": { + "External id": 3292323,"Record function id": 0, "Ev Idx": 2722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341217.632, "dur": 2.336, + "args": { + "External id": 3292324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341218.069, "dur": 1.494, + "args": { + "External id": 3292325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341218.739, "dur": 0.757, + "args": { + "External id": 3292326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341223.542, "dur": 4.190, + "args": { + "External id": 3292327,"Record function id": 0, "Ev Idx": 2726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341224.644, "dur": 2.657, + "args": { + "External id": 3292328,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341225.450, "dur": 1.391, + "args": { + "External id": 3292329,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341225.975, "dur": 0.745, + "args": { + "External id": 3292330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341230.894, "dur": 3.453, + "args": { + "External id": 3292331,"Record function id": 0, "Ev Idx": 2730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341232.092, "dur": 1.805, + "args": { + "External id": 3292332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341232.527, "dur": 0.912, + "args": { + "External id": 3292333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341232.820, "dur": 0.536, + "args": { + "External id": 3292334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341237.563, "dur": 3.843, + "args": { + "External id": 3292335,"Record function id": 0, "Ev Idx": 2734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341238.662, "dur": 2.335, + "args": { + "External id": 3292336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341239.307, "dur": 1.281, + "args": { + "External id": 3292337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341239.770, "dur": 0.747, + "args": { + "External id": 3292338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341247.771, "dur": 5.665, + "args": { + "External id": 3292339,"Record function id": 0, "Ev Idx": 2738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596341249.067, "dur": 3.964, + "args": { + "External id": 3292340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341249.522, "dur": 3.094, + "args": { + "External id": 3292341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596341250.010, "dur": 2.514, + "args": { + "External id": 3292342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596341257.196, "dur": 38344.005, + "args": { + "External id": 3292343,"Record function id": 0, "Sequence number": 32987145, "Fwd thread id": 1, "Ev Idx": 2742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596341258.678, "dur": 38334.274, + "args": { + "External id": 3292344,"Sequence number": 32987145, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2743 + } + }, + { + "ph": "f", "id": 187, "pid": 1336756, "tid": 1381189, "ts": 1587596341258.678, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 1336756, "tid": 1381189, + "ts": 1587596341286.658, "dur": 39.981, + "args": { + "External id": 3292345,"Record function id": 0, "Ev Idx": 2744 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 1336756, "tid": 1381189, + "ts": 1587596341334.475, "dur": 59.906, + "args": { + "External id": 3292346,"Record function id": 0, "Ev Idx": 2745 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 1336756, "tid": 1381189, + "ts": 1587596341399.895, "dur": 38185.195, + "args": { + "External id": 3292347,"Record function id": 0, "Ev Idx": 2746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596341483.757, "dur": 9.332, + "args": { + "External id": 3292348,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596341502.557, "dur": 4.672, + "args": { + "External id": 3292349,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596341520.904, "dur": 37233.849, + "args": { + "External id": 3292350,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596341534.339, "dur": 37211.687, + "args": { + "External id": 3292351,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596341610.612, "dur": 13.866, + "args": { + "External id": 3292352,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596341631.412, "dur": 37074.579, + "args": { + "External id": 3292353,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596341634.205, "dur": 37071.052, + "args": { + "External id": 3292354,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596341638.278, "dur": 5.491, + "args": { + "External id": 3292355,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596341645.766, "dur": 37055.710, + "args": { + "External id": 3292356,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596378843.561, "dur": 8.564, + "args": { + "External id": 3292357,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596378846.688, "dur": 5.097, + "args": { + "External id": 3292358,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596378898.492, "dur": 379.861, + "args": { + "External id": 3292359,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596378930.114, "dur": 343.119, + "args": { + "External id": 3292360,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2759, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596378941.966, "dur": 325.153, + "args": { + "External id": 3292361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596379299.540, "dur": 2.197, + "args": { + "External id": 3292362,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2761, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596379363.273, "dur": 6.584, + "args": { + "External id": 3292363,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596379416.872, "dur": 3.245, + "args": { + "External id": 3292364,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596379437.596, "dur": 0.957, + "args": { + "External id": 3292365,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596379452.058, "dur": 0.823, + "args": { + "External id": 3292366,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596379466.647, "dur": 1.146, + "args": { + "External id": 3292367,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596379480.157, "dur": 3.095, + "args": { + "External id": 3292368,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596379496.294, "dur": 1.209, + "args": { + "External id": 3292369,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596379510.549, "dur": 1.808, + "args": { + "External id": 3292370,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596379524.390, "dur": 0.823, + "args": { + "External id": 3292371,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596379615.824, "dur": 2810.312, + "args": { + "External id": 3292372,"Record function id": 0, "Ev Idx": 2771 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 1336756, "tid": 1381189, + "ts": 1587596379637.380, "dur": 1019.913, + "args": { + "External id": 3292373,"Record function id": 0, "Ev Idx": 2772 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 1336756, "tid": 1381189, + "ts": 1587596379651.487, "dur": 381.191, + "args": { + "External id": 3292374,"Record function id": 0, "Ev Idx": 2773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596379732.919, "dur": 5.402, + "args": { + "External id": 3292375,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596379741.809, "dur": 0.849, + "args": { + "External id": 3292376,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596379744.583, "dur": 1.135, + "args": { + "External id": 3292377,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596379747.269, "dur": 1.049, + "args": { + "External id": 3292378,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596379749.931, "dur": 0.599, + "args": { + "External id": 3292379,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596379757.934, "dur": 0.713, + "args": { + "External id": 3292380,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596379760.595, "dur": 1.685, + "args": { + "External id": 3292381,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596379763.810, "dur": 1.011, + "args": { + "External id": 3292382,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596379766.243, "dur": 2.887, + "args": { + "External id": 3292383,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596379770.876, "dur": 0.756, + "args": { + "External id": 3292384,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596379789.118, "dur": 168.700, + "args": { + "External id": 3292385,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596379804.367, "dur": 148.692, + "args": { + "External id": 3292386,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596379821.274, "dur": 11.567, + "args": { + "External id": 3292387,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596379836.183, "dur": 86.486, + "args": { + "External id": 3292388,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596379838.857, "dur": 83.451, + "args": { + "External id": 3292389,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596379842.857, "dur": 5.725, + "args": { + "External id": 3292390,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596379850.315, "dur": 71.005, + "args": { + "External id": 3292391,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2790 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 1336756, "tid": 1381189, + "ts": 1587596380118.710, "dur": 530.085, + "args": { + "External id": 3292392,"Record function id": 0, "Ev Idx": 2791 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 1336756, "tid": 1381189, + "ts": 1587596380137.580, "dur": 498.741, + "args": { + "External id": 3292393,"Record function id": 0, "Ev Idx": 2792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596380194.143, "dur": 5.413, + "args": { + "External id": 3292394,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596380214.657, "dur": 31.058, + "args": { + "External id": 3292395,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380219.700, "dur": 1.455, + "args": { + "External id": 3292396,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380223.373, "dur": 0.699, + "args": { + "External id": 3292397,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380225.621, "dur": 0.381, + "args": { + "External id": 3292398,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380227.806, "dur": 2.397, + "args": { + "External id": 3292399,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380232.017, "dur": 0.349, + "args": { + "External id": 3292400,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380233.837, "dur": 0.329, + "args": { + "External id": 3292401,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380236.079, "dur": 0.379, + "args": { + "External id": 3292402,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380238.481, "dur": 0.451, + "args": { + "External id": 3292403,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380240.785, "dur": 0.459, + "args": { + "External id": 3292404,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596380255.839, "dur": 33.797, + "args": { + "External id": 3292405,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596380321.465, "dur": 101.996, + "args": { + "External id": 3292406,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596380331.179, "dur": 3.533, + "args": { + "External id": 3292407,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596380339.909, "dur": 10.014, + "args": { + "External id": 3292408,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596380343.869, "dur": 5.658, + "args": { + "External id": 3292409,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380347.812, "dur": 0.635, + "args": { + "External id": 3292410,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596380357.075, "dur": 27.583, + "args": { + "External id": 3292411,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380359.144, "dur": 2.243, + "args": { + "External id": 3292412,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380363.487, "dur": 0.555, + "args": { + "External id": 3292413,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380365.927, "dur": 0.558, + "args": { + "External id": 3292414,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380368.365, "dur": 0.356, + "args": { + "External id": 3292415,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380370.273, "dur": 0.333, + "args": { + "External id": 3292416,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380372.421, "dur": 0.465, + "args": { + "External id": 3292417,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380374.303, "dur": 0.491, + "args": { + "External id": 3292418,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380376.353, "dur": 0.435, + "args": { + "External id": 3292419,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596380378.465, "dur": 1.975, + "args": { + "External id": 3292420,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596380394.107, "dur": 21.953, + "args": { + "External id": 3292421,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596380468.237, "dur": 106.281, + "args": { + "External id": 3292422,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596380490.802, "dur": 80.329, + "args": { + "External id": 3292423,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2822, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596380500.343, "dur": 66.091, + "args": { + "External id": 3292424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596380588.839, "dur": 1.888, + "args": { + "External id": 3292425,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2824, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596380663.909, "dur": 1742.259, + "args": { + "External id": 3292426,"Sequence number": 32987144, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2825 + } + }, + { + "ph": "f", "id": 188, "pid": 1336756, "tid": 1381189, "ts": 1587596380663.909, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596380765.676, "dur": 121.797, + "args": { + "External id": 3292427,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596380929.908, "dur": 40.782, + "args": { + "External id": 3292428,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596381030.665, "dur": 63.940, + "args": { + "External id": 3292429,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596381106.824, "dur": 38.983, + "args": { + "External id": 3292430,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596381153.977, "dur": 56.573, + "args": { + "External id": 3292431,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596381218.352, "dur": 28.888, + "args": { + "External id": 3292432,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596381260.143, "dur": 44.812, + "args": { + "External id": 3292433,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596381332.516, "dur": 24.783, + "args": { + "External id": 3292434,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596381378.322, "dur": 30.592, + "args": { + "External id": 3292435,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596381428.693, "dur": 22.188, + "args": { + "External id": 3292436,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596381464.097, "dur": 17.976, + "args": { + "External id": 3292437,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596381494.068, "dur": 30.677, + "args": { + "External id": 3292438,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596381528.128, "dur": 34.522, + "args": { + "External id": 3292439,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596381588.431, "dur": 181.379, + "args": { + "External id": 3292440,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596381669.467, "dur": 6.085, + "args": { + "External id": 3292441,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596381677.538, "dur": 2.509, + "args": { + "External id": 3292442,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596381800.496, "dur": 23.784, + "args": { + "External id": 3292443,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596381836.354, "dur": 16.123, + "args": { + "External id": 3292444,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596381861.219, "dur": 54.161, + "args": { + "External id": 3292445,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596381926.125, "dur": 38.680, + "args": { + "External id": 3292446,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596381972.886, "dur": 61.153, + "args": { + "External id": 3292447,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596382043.035, "dur": 50.082, + "args": { + "External id": 3292448,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596382106.372, "dur": 28.745, + "args": { + "External id": 3292449,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596382145.439, "dur": 33.748, + "args": { + "External id": 3292450,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596382202.965, "dur": 25.511, + "args": { + "External id": 3292451,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596382245.510, "dur": 28.142, + "args": { + "External id": 3292452,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596382289.401, "dur": 17.975, + "args": { + "External id": 3292453,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596382326.475, "dur": 16.574, + "args": { + "External id": 3292454,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596382356.291, "dur": 20.915, + "args": { + "External id": 3292455,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382448.490, "dur": 17.396, + "args": { + "External id": 3292456,"Record function id": 0, "Ev Idx": 2855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382451.904, "dur": 13.042, + "args": { + "External id": 3292457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382455.902, "dur": 8.006, + "args": { + "External id": 3292458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382457.576, "dur": 6.159, + "args": { + "External id": 3292459,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382469.839, "dur": 5.353, + "args": { + "External id": 3292460,"Record function id": 0, "Ev Idx": 2859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382471.625, "dur": 3.117, + "args": { + "External id": 3292461,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382472.355, "dur": 1.884, + "args": { + "External id": 3292462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382473.128, "dur": 0.973, + "args": { + "External id": 3292463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382478.484, "dur": 4.929, + "args": { + "External id": 3292464,"Record function id": 0, "Ev Idx": 2863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382479.819, "dur": 3.203, + "args": { + "External id": 3292465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382480.885, "dur": 1.711, + "args": { + "External id": 3292466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382481.573, "dur": 0.931, + "args": { + "External id": 3292467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382486.827, "dur": 4.458, + "args": { + "External id": 3292468,"Record function id": 0, "Ev Idx": 2867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382488.400, "dur": 2.500, + "args": { + "External id": 3292469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382488.955, "dur": 1.497, + "args": { + "External id": 3292470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382489.605, "dur": 0.765, + "args": { + "External id": 3292471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382494.528, "dur": 3.779, + "args": { + "External id": 3292472,"Record function id": 0, "Ev Idx": 2871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382495.751, "dur": 2.140, + "args": { + "External id": 3292473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382496.193, "dur": 1.282, + "args": { + "External id": 3292474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382496.703, "dur": 0.697, + "args": { + "External id": 3292475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382501.761, "dur": 3.687, + "args": { + "External id": 3292476,"Record function id": 0, "Ev Idx": 2875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382502.954, "dur": 2.083, + "args": { + "External id": 3292477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382503.372, "dur": 1.255, + "args": { + "External id": 3292478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382503.829, "dur": 0.700, + "args": { + "External id": 3292479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382508.636, "dur": 3.514, + "args": { + "External id": 3292480,"Record function id": 0, "Ev Idx": 2879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382509.818, "dur": 1.914, + "args": { + "External id": 3292481,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382510.285, "dur": 1.006, + "args": { + "External id": 3292482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382510.562, "dur": 0.651, + "args": { + "External id": 3292483,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382515.311, "dur": 5.805, + "args": { + "External id": 3292484,"Record function id": 0, "Ev Idx": 2883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382516.509, "dur": 4.202, + "args": { + "External id": 3292485,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382516.951, "dur": 3.378, + "args": { + "External id": 3292486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382517.208, "dur": 3.037, + "args": { + "External id": 3292487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382524.360, "dur": 4.013, + "args": { + "External id": 3292488,"Record function id": 0, "Ev Idx": 2887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596382525.944, "dur": 2.027, + "args": { + "External id": 3292489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382526.400, "dur": 1.160, + "args": { + "External id": 3292490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596382526.816, "dur": 0.654, + "args": { + "External id": 3292491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596382532.168, "dur": 36935.625, + "args": { + "External id": 3292492,"Record function id": 0, "Sequence number": 32987143, "Fwd thread id": 1, "Ev Idx": 2891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596382533.507, "dur": 36926.454, + "args": { + "External id": 3292493,"Sequence number": 32987143, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2892 + } + }, + { + "ph": "f", "id": 189, "pid": 1336756, "tid": 1381189, "ts": 1587596382533.507, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 1336756, "tid": 1381189, + "ts": 1587596382561.724, "dur": 36.859, + "args": { + "External id": 3292494,"Record function id": 0, "Ev Idx": 2893 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 1336756, "tid": 1381189, + "ts": 1587596382606.645, "dur": 59.771, + "args": { + "External id": 3292495,"Record function id": 0, "Ev Idx": 2894 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 1336756, "tid": 1381189, + "ts": 1587596382671.909, "dur": 36779.953, + "args": { + "External id": 3292496,"Record function id": 0, "Ev Idx": 2895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596382759.957, "dur": 5.749, + "args": { + "External id": 3292497,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596382775.320, "dur": 4.636, + "args": { + "External id": 3292498,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596382794.636, "dur": 35819.518, + "args": { + "External id": 3292499,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596382807.995, "dur": 35795.044, + "args": { + "External id": 3292500,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596382905.351, "dur": 15.028, + "args": { + "External id": 3292501,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596382926.822, "dur": 35630.684, + "args": { + "External id": 3292502,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596382929.500, "dur": 35626.722, + "args": { + "External id": 3292503,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596382933.596, "dur": 6.514, + "args": { + "External id": 3292504,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596382941.939, "dur": 35610.004, + "args": { + "External id": 3292505,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596418712.850, "dur": 12.018, + "args": { + "External id": 3292506,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596418716.875, "dur": 7.631, + "args": { + "External id": 3292507,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596418755.299, "dur": 391.285, + "args": { + "External id": 3292508,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596418781.961, "dur": 359.377, + "args": { + "External id": 3292509,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2908, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596418793.439, "dur": 341.858, + "args": { + "External id": 3292510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596419167.985, "dur": 2.494, + "args": { + "External id": 3292511,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2910, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596419230.153, "dur": 6.880, + "args": { + "External id": 3292512,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596419286.838, "dur": 1.747, + "args": { + "External id": 3292513,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596419305.739, "dur": 0.765, + "args": { + "External id": 3292514,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596419319.670, "dur": 1.186, + "args": { + "External id": 3292515,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596419333.514, "dur": 0.679, + "args": { + "External id": 3292516,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596419347.834, "dur": 0.808, + "args": { + "External id": 3292517,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596419361.506, "dur": 1.053, + "args": { + "External id": 3292518,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596419375.067, "dur": 2.536, + "args": { + "External id": 3292519,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596419389.023, "dur": 0.924, + "args": { + "External id": 3292520,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596419482.316, "dur": 2809.310, + "args": { + "External id": 3292521,"Record function id": 0, "Ev Idx": 2920 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 1336756, "tid": 1381189, + "ts": 1587596419500.142, "dur": 1044.979, + "args": { + "External id": 3292522,"Record function id": 0, "Ev Idx": 2921 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 1336756, "tid": 1381189, + "ts": 1587596419514.435, "dur": 315.136, + "args": { + "External id": 3292523,"Record function id": 0, "Ev Idx": 2922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596419588.071, "dur": 3.947, + "args": { + "External id": 3292524,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596419595.168, "dur": 1.228, + "args": { + "External id": 3292525,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596419598.297, "dur": 0.966, + "args": { + "External id": 3292526,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596419601.053, "dur": 1.036, + "args": { + "External id": 3292527,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596419603.737, "dur": 0.659, + "args": { + "External id": 3292528,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596419606.112, "dur": 0.785, + "args": { + "External id": 3292529,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596419608.683, "dur": 1.855, + "args": { + "External id": 3292530,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596419611.954, "dur": 2.555, + "args": { + "External id": 3292531,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596419616.413, "dur": 0.860, + "args": { + "External id": 3292532,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596419618.969, "dur": 0.728, + "args": { + "External id": 3292533,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596419637.838, "dur": 161.075, + "args": { + "External id": 3292534,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596419657.009, "dur": 137.706, + "args": { + "External id": 3292535,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596419680.870, "dur": 12.365, + "args": { + "External id": 3292536,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596419696.906, "dur": 67.716, + "args": { + "External id": 3292537,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596419703.438, "dur": 60.876, + "args": { + "External id": 3292538,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596419707.711, "dur": 4.868, + "args": { + "External id": 3292539,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596419714.556, "dur": 48.931, + "args": { + "External id": 3292540,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2939 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 1336756, "tid": 1381189, + "ts": 1587596419928.907, "dur": 607.801, + "args": { + "External id": 3292541,"Record function id": 0, "Ev Idx": 2940 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 1336756, "tid": 1381189, + "ts": 1587596419945.364, "dur": 578.286, + "args": { + "External id": 3292542,"Record function id": 0, "Ev Idx": 2941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596420039.622, "dur": 6.735, + "args": { + "External id": 3292543,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596420062.413, "dur": 39.898, + "args": { + "External id": 3292544,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420067.726, "dur": 1.530, + "args": { + "External id": 3292545,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420071.894, "dur": 0.347, + "args": { + "External id": 3292546,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420073.977, "dur": 2.522, + "args": { + "External id": 3292547,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420078.338, "dur": 0.607, + "args": { + "External id": 3292548,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420080.767, "dur": 0.366, + "args": { + "External id": 3292549,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420082.714, "dur": 0.409, + "args": { + "External id": 3292550,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420089.973, "dur": 0.604, + "args": { + "External id": 3292551,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420092.450, "dur": 0.608, + "args": { + "External id": 3292552,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420097.801, "dur": 0.370, + "args": { + "External id": 3292553,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596420112.051, "dur": 36.881, + "args": { + "External id": 3292554,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596420187.551, "dur": 111.943, + "args": { + "External id": 3292555,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596420197.526, "dur": 3.641, + "args": { + "External id": 3292556,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596420206.102, "dur": 15.494, + "args": { + "External id": 3292557,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596420210.166, "dur": 11.013, + "args": { + "External id": 3292558,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420217.527, "dur": 2.427, + "args": { + "External id": 3292559,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596420228.925, "dur": 26.683, + "args": { + "External id": 3292560,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420231.455, "dur": 0.578, + "args": { + "External id": 3292561,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420233.822, "dur": 0.377, + "args": { + "External id": 3292562,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420235.972, "dur": 0.434, + "args": { + "External id": 3292563,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420238.301, "dur": 0.386, + "args": { + "External id": 3292564,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420240.775, "dur": 0.432, + "args": { + "External id": 3292565,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420242.675, "dur": 0.422, + "args": { + "External id": 3292566,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420245.017, "dur": 0.325, + "args": { + "External id": 3292567,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420246.986, "dur": 2.109, + "args": { + "External id": 3292568,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596420250.762, "dur": 0.447, + "args": { + "External id": 3292569,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596420270.784, "dur": 20.800, + "args": { + "External id": 3292570,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596420346.269, "dur": 111.125, + "args": { + "External id": 3292571,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596420371.411, "dur": 82.589, + "args": { + "External id": 3292572,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2971, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596420380.850, "dur": 69.030, + "args": { + "External id": 3292573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596420473.015, "dur": 1.878, + "args": { + "External id": 3292574,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2973, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596420551.598, "dur": 1721.791, + "args": { + "External id": 3292575,"Sequence number": 32987142, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2974 + } + }, + { + "ph": "f", "id": 190, "pid": 1336756, "tid": 1381189, "ts": 1587596420551.598, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596420656.638, "dur": 107.493, + "args": { + "External id": 3292576,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596420803.847, "dur": 37.257, + "args": { + "External id": 3292577,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596420860.264, "dur": 78.873, + "args": { + "External id": 3292578,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596420953.035, "dur": 73.132, + "args": { + "External id": 3292579,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596421037.058, "dur": 53.108, + "args": { + "External id": 3292580,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596421098.907, "dur": 37.355, + "args": { + "External id": 3292581,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596421146.017, "dur": 44.210, + "args": { + "External id": 3292582,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596421216.071, "dur": 26.164, + "args": { + "External id": 3292583,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596421260.249, "dur": 30.369, + "args": { + "External id": 3292584,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596421311.271, "dur": 20.802, + "args": { + "External id": 3292585,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596421344.770, "dur": 20.159, + "args": { + "External id": 3292586,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596421376.578, "dur": 29.773, + "args": { + "External id": 3292587,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596421409.466, "dur": 35.184, + "args": { + "External id": 3292588,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596421472.102, "dur": 168.950, + "args": { + "External id": 3292589,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596421547.410, "dur": 5.929, + "args": { + "External id": 3292590,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596421555.414, "dur": 2.151, + "args": { + "External id": 3292591,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596421671.048, "dur": 23.561, + "args": { + "External id": 3292592,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596421706.489, "dur": 15.299, + "args": { + "External id": 3292593,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596421732.501, "dur": 37.254, + "args": { + "External id": 3292594,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596421776.634, "dur": 38.524, + "args": { + "External id": 3292595,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596421822.412, "dur": 22.666, + "args": { + "External id": 3292596,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596421850.944, "dur": 55.343, + "args": { + "External id": 3292597,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596421918.512, "dur": 44.662, + "args": { + "External id": 3292598,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596421976.544, "dur": 75.703, + "args": { + "External id": 3292599,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596422078.462, "dur": 25.410, + "args": { + "External id": 3292600,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596422121.253, "dur": 26.606, + "args": { + "External id": 3292601,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596422162.493, "dur": 18.818, + "args": { + "External id": 3292602,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596422197.255, "dur": 16.662, + "args": { + "External id": 3292603,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596422226.632, "dur": 17.109, + "args": { + "External id": 3292604,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422314.494, "dur": 19.006, + "args": { + "External id": 3292605,"Record function id": 0, "Ev Idx": 3004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422317.767, "dur": 14.675, + "args": { + "External id": 3292606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422321.866, "dur": 9.697, + "args": { + "External id": 3292607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422323.989, "dur": 7.453, + "args": { + "External id": 3292608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422337.412, "dur": 5.674, + "args": { + "External id": 3292609,"Record function id": 0, "Ev Idx": 3008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422339.099, "dur": 3.478, + "args": { + "External id": 3292610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422339.858, "dur": 2.191, + "args": { + "External id": 3292611,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422340.882, "dur": 1.024, + "args": { + "External id": 3292612,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422346.329, "dur": 4.135, + "args": { + "External id": 3292613,"Record function id": 0, "Ev Idx": 3012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422347.561, "dur": 2.497, + "args": { + "External id": 3292614,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422348.281, "dur": 1.334, + "args": { + "External id": 3292615,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422348.594, "dur": 0.933, + "args": { + "External id": 3292616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422353.684, "dur": 3.866, + "args": { + "External id": 3292617,"Record function id": 0, "Ev Idx": 3016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422354.999, "dur": 2.123, + "args": { + "External id": 3292618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422355.513, "dur": 1.201, + "args": { + "External id": 3292619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422355.933, "dur": 0.704, + "args": { + "External id": 3292620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422360.665, "dur": 3.914, + "args": { + "External id": 3292621,"Record function id": 0, "Ev Idx": 3020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422361.876, "dur": 2.293, + "args": { + "External id": 3292622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422362.356, "dur": 1.367, + "args": { + "External id": 3292623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422362.729, "dur": 0.921, + "args": { + "External id": 3292624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422367.795, "dur": 4.134, + "args": { + "External id": 3292625,"Record function id": 0, "Ev Idx": 3024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422369.273, "dur": 2.257, + "args": { + "External id": 3292626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422369.963, "dur": 1.166, + "args": { + "External id": 3292627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422370.386, "dur": 0.628, + "args": { + "External id": 3292628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422376.139, "dur": 9.011, + "args": { + "External id": 3292629,"Record function id": 0, "Ev Idx": 3028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422378.355, "dur": 5.949, + "args": { + "External id": 3292630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422379.422, "dur": 4.306, + "args": { + "External id": 3292631,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422380.106, "dur": 3.378, + "args": { + "External id": 3292632,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422391.884, "dur": 4.062, + "args": { + "External id": 3292633,"Record function id": 0, "Ev Idx": 3032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422393.068, "dur": 2.460, + "args": { + "External id": 3292634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422393.603, "dur": 1.523, + "args": { + "External id": 3292635,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422393.913, "dur": 1.122, + "args": { + "External id": 3292636,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422398.817, "dur": 4.553, + "args": { + "External id": 3292637,"Record function id": 0, "Ev Idx": 3036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596422399.937, "dur": 3.024, + "args": { + "External id": 3292638,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422400.630, "dur": 1.916, + "args": { + "External id": 3292639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596422401.439, "dur": 1.013, + "args": { + "External id": 3292640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596422406.619, "dur": 38486.836, + "args": { + "External id": 3292641,"Record function id": 0, "Sequence number": 32987141, "Fwd thread id": 1, "Ev Idx": 3040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596422408.127, "dur": 38456.848, + "args": { + "External id": 3292642,"Sequence number": 32987141, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3041 + } + }, + { + "ph": "f", "id": 191, "pid": 1336756, "tid": 1381189, "ts": 1587596422408.127, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 1336756, "tid": 1381189, + "ts": 1587596422437.041, "dur": 37.878, + "args": { + "External id": 3292643,"Record function id": 0, "Ev Idx": 3042 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 1336756, "tid": 1381189, + "ts": 1587596422482.504, "dur": 63.146, + "args": { + "External id": 3292644,"Record function id": 0, "Ev Idx": 3043 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 1336756, "tid": 1381189, + "ts": 1587596422551.679, "dur": 38305.410, + "args": { + "External id": 3292645,"Record function id": 0, "Ev Idx": 3044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596422640.797, "dur": 5.576, + "args": { + "External id": 3292646,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596422655.732, "dur": 4.573, + "args": { + "External id": 3292647,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596422673.605, "dur": 37362.397, + "args": { + "External id": 3292648,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596422720.603, "dur": 37304.690, + "args": { + "External id": 3292649,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596422760.881, "dur": 13.733, + "args": { + "External id": 3292650,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596422780.914, "dur": 37163.309, + "args": { + "External id": 3292651,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596422783.846, "dur": 37159.376, + "args": { + "External id": 3292652,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596422788.054, "dur": 4.895, + "args": { + "External id": 3292653,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596422794.745, "dur": 37144.239, + "args": { + "External id": 3292654,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596460133.634, "dur": 10.572, + "args": { + "External id": 3292655,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596460137.351, "dur": 6.481, + "args": { + "External id": 3292656,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596460176.480, "dur": 377.081, + "args": { + "External id": 3292657,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596460204.120, "dur": 345.015, + "args": { + "External id": 3292658,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3057, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596460215.488, "dur": 328.458, + "args": { + "External id": 3292659,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596460572.580, "dur": 2.275, + "args": { + "External id": 3292660,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3059, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596460631.049, "dur": 8.594, + "args": { + "External id": 3292661,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596460687.083, "dur": 1.392, + "args": { + "External id": 3292662,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596460707.308, "dur": 1.012, + "args": { + "External id": 3292663,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596460721.576, "dur": 0.873, + "args": { + "External id": 3292664,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596460735.058, "dur": 2.579, + "args": { + "External id": 3292665,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596460749.716, "dur": 0.821, + "args": { + "External id": 3292666,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596460765.294, "dur": 0.951, + "args": { + "External id": 3292667,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596460778.590, "dur": 1.884, + "args": { + "External id": 3292668,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596460791.582, "dur": 2.611, + "args": { + "External id": 3292669,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596460910.224, "dur": 2829.394, + "args": { + "External id": 3292670,"Record function id": 0, "Ev Idx": 3069 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 1336756, "tid": 1381189, + "ts": 1587596460931.075, "dur": 1111.714, + "args": { + "External id": 3292671,"Record function id": 0, "Ev Idx": 3070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 1336756, "tid": 1381189, + "ts": 1587596460947.594, "dur": 391.538, + "args": { + "External id": 3292672,"Record function id": 0, "Ev Idx": 3071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596461095.002, "dur": 5.251, + "args": { + "External id": 3292673,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596461104.223, "dur": 1.031, + "args": { + "External id": 3292674,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596461107.112, "dur": 0.984, + "args": { + "External id": 3292675,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596461109.701, "dur": 0.733, + "args": { + "External id": 3292676,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596461111.990, "dur": 0.931, + "args": { + "External id": 3292677,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596461116.374, "dur": 0.725, + "args": { + "External id": 3292678,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596461118.672, "dur": 3.403, + "args": { + "External id": 3292679,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596461123.411, "dur": 0.880, + "args": { + "External id": 3292680,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596461125.622, "dur": 0.692, + "args": { + "External id": 3292681,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596461127.635, "dur": 0.808, + "args": { + "External id": 3292682,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596461148.609, "dur": 152.076, + "args": { + "External id": 3292683,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596461164.800, "dur": 131.510, + "args": { + "External id": 3292684,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596461180.102, "dur": 12.014, + "args": { + "External id": 3292685,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596461195.510, "dur": 69.619, + "args": { + "External id": 3292686,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596461198.088, "dur": 66.562, + "args": { + "External id": 3292687,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461202.378, "dur": 5.156, + "args": { + "External id": 3292688,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596461209.537, "dur": 54.627, + "args": { + "External id": 3292689,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3088 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 1336756, "tid": 1381189, + "ts": 1587596461423.603, "dur": 610.342, + "args": { + "External id": 3292690,"Record function id": 0, "Ev Idx": 3089 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 1336756, "tid": 1381189, + "ts": 1587596461440.414, "dur": 539.008, + "args": { + "External id": 3292691,"Record function id": 0, "Ev Idx": 3090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596461495.062, "dur": 4.171, + "args": { + "External id": 3292692,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596461513.804, "dur": 31.211, + "args": { + "External id": 3292693,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461518.873, "dur": 1.341, + "args": { + "External id": 3292694,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461522.801, "dur": 2.806, + "args": { + "External id": 3292695,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461527.185, "dur": 0.549, + "args": { + "External id": 3292696,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461529.926, "dur": 0.318, + "args": { + "External id": 3292697,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461532.366, "dur": 0.334, + "args": { + "External id": 3292698,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461534.315, "dur": 0.617, + "args": { + "External id": 3292699,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461536.218, "dur": 0.339, + "args": { + "External id": 3292700,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461538.153, "dur": 0.661, + "args": { + "External id": 3292701,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461540.611, "dur": 0.369, + "args": { + "External id": 3292702,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596461554.764, "dur": 29.621, + "args": { + "External id": 3292703,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596461617.070, "dur": 108.675, + "args": { + "External id": 3292704,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596461626.389, "dur": 4.456, + "args": { + "External id": 3292705,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596461635.855, "dur": 10.321, + "args": { + "External id": 3292706,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596461639.990, "dur": 5.752, + "args": { + "External id": 3292707,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461643.962, "dur": 0.653, + "args": { + "External id": 3292708,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596461652.859, "dur": 26.560, + "args": { + "External id": 3292709,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461655.072, "dur": 0.762, + "args": { + "External id": 3292710,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461658.419, "dur": 0.362, + "args": { + "External id": 3292711,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461660.386, "dur": 0.453, + "args": { + "External id": 3292712,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461662.405, "dur": 0.443, + "args": { + "External id": 3292713,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461664.320, "dur": 0.396, + "args": { + "External id": 3292714,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461666.663, "dur": 0.373, + "args": { + "External id": 3292715,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461668.613, "dur": 2.960, + "args": { + "External id": 3292716,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461672.958, "dur": 0.341, + "args": { + "External id": 3292717,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596461675.032, "dur": 0.435, + "args": { + "External id": 3292718,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596461693.937, "dur": 21.196, + "args": { + "External id": 3292719,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596461768.553, "dur": 134.849, + "args": { + "External id": 3292720,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596461796.402, "dur": 102.925, + "args": { + "External id": 3292721,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3120, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596461805.493, "dur": 88.618, + "args": { + "External id": 3292722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596461922.109, "dur": 2.079, + "args": { + "External id": 3292723,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3122, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596462050.777, "dur": 1670.404, + "args": { + "External id": 3292724,"Sequence number": 32987140, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3123 + } + }, + { + "ph": "f", "id": 192, "pid": 1336756, "tid": 1381189, "ts": 1587596462050.777, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596462158.891, "dur": 109.240, + "args": { + "External id": 3292725,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596462310.284, "dur": 38.959, + "args": { + "External id": 3292726,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596462368.906, "dur": 56.547, + "args": { + "External id": 3292727,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596462435.256, "dur": 37.534, + "args": { + "External id": 3292728,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596462480.432, "dur": 47.127, + "args": { + "External id": 3292729,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596462536.728, "dur": 32.098, + "args": { + "External id": 3292730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596462576.354, "dur": 46.418, + "args": { + "External id": 3292731,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596462644.698, "dur": 24.179, + "args": { + "External id": 3292732,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596462686.273, "dur": 30.073, + "args": { + "External id": 3292733,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596462735.447, "dur": 19.712, + "args": { + "External id": 3292734,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596462767.194, "dur": 16.939, + "args": { + "External id": 3292735,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596462794.390, "dur": 30.999, + "args": { + "External id": 3292736,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596462828.786, "dur": 34.955, + "args": { + "External id": 3292737,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596462908.515, "dur": 226.958, + "args": { + "External id": 3292738,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596463023.112, "dur": 6.586, + "args": { + "External id": 3292739,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596463031.953, "dur": 7.198, + "args": { + "External id": 3292740,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596463167.509, "dur": 26.071, + "args": { + "External id": 3292741,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596463220.995, "dur": 16.144, + "args": { + "External id": 3292742,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596463246.769, "dur": 44.464, + "args": { + "External id": 3292743,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596463298.273, "dur": 51.498, + "args": { + "External id": 3292744,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596463366.058, "dur": 29.593, + "args": { + "External id": 3292745,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596463403.457, "dur": 36.949, + "args": { + "External id": 3292746,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596463447.245, "dur": 21.147, + "args": { + "External id": 3292747,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596463479.538, "dur": 31.507, + "args": { + "External id": 3292748,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596463532.448, "dur": 24.326, + "args": { + "External id": 3292749,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596463572.458, "dur": 23.989, + "args": { + "External id": 3292750,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596463609.837, "dur": 17.735, + "args": { + "External id": 3292751,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596463644.060, "dur": 16.841, + "args": { + "External id": 3292752,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596463674.445, "dur": 20.475, + "args": { + "External id": 3292753,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463762.016, "dur": 14.751, + "args": { + "External id": 3292754,"Record function id": 0, "Ev Idx": 3153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463765.458, "dur": 10.410, + "args": { + "External id": 3292755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463769.392, "dur": 5.623, + "args": { + "External id": 3292756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463771.185, "dur": 3.704, + "args": { + "External id": 3292757,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463780.822, "dur": 5.409, + "args": { + "External id": 3292758,"Record function id": 0, "Ev Idx": 3157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463782.200, "dur": 3.460, + "args": { + "External id": 3292759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463783.084, "dur": 2.164, + "args": { + "External id": 3292760,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463783.879, "dur": 1.218, + "args": { + "External id": 3292761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463789.550, "dur": 4.767, + "args": { + "External id": 3292762,"Record function id": 0, "Ev Idx": 3161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463791.189, "dur": 2.719, + "args": { + "External id": 3292763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463791.902, "dur": 1.593, + "args": { + "External id": 3292764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463792.438, "dur": 0.968, + "args": { + "External id": 3292765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463797.586, "dur": 7.342, + "args": { + "External id": 3292766,"Record function id": 0, "Ev Idx": 3165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463799.274, "dur": 5.256, + "args": { + "External id": 3292767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463800.385, "dur": 3.758, + "args": { + "External id": 3292768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463801.242, "dur": 2.843, + "args": { + "External id": 3292769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463808.058, "dur": 4.580, + "args": { + "External id": 3292770,"Record function id": 0, "Ev Idx": 3169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463809.646, "dur": 2.577, + "args": { + "External id": 3292771,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463810.261, "dur": 1.585, + "args": { + "External id": 3292772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463810.956, "dur": 0.821, + "args": { + "External id": 3292773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463815.847, "dur": 3.647, + "args": { + "External id": 3292774,"Record function id": 0, "Ev Idx": 3173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463817.151, "dur": 1.908, + "args": { + "External id": 3292775,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463817.585, "dur": 1.089, + "args": { + "External id": 3292776,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463818.029, "dur": 0.543, + "args": { + "External id": 3292777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463822.573, "dur": 3.677, + "args": { + "External id": 3292778,"Record function id": 0, "Ev Idx": 3177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463823.941, "dur": 1.911, + "args": { + "External id": 3292779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463824.405, "dur": 1.063, + "args": { + "External id": 3292780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463824.685, "dur": 0.704, + "args": { + "External id": 3292781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463829.283, "dur": 3.853, + "args": { + "External id": 3292782,"Record function id": 0, "Ev Idx": 3181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463830.652, "dur": 2.073, + "args": { + "External id": 3292783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463831.103, "dur": 1.222, + "args": { + "External id": 3292784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463831.552, "dur": 0.699, + "args": { + "External id": 3292785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463836.236, "dur": 3.879, + "args": { + "External id": 3292786,"Record function id": 0, "Ev Idx": 3185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596463837.618, "dur": 2.087, + "args": { + "External id": 3292787,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463838.088, "dur": 1.235, + "args": { + "External id": 3292788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596463838.670, "dur": 0.565, + "args": { + "External id": 3292789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596463843.684, "dur": 36180.376, + "args": { + "External id": 3292790,"Record function id": 0, "Sequence number": 32987139, "Fwd thread id": 1, "Ev Idx": 3189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596463844.849, "dur": 36169.396, + "args": { + "External id": 3292791,"Sequence number": 32987139, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3190 + } + }, + { + "ph": "f", "id": 193, "pid": 1336756, "tid": 1381189, "ts": 1587596463844.849, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 1336756, "tid": 1381189, + "ts": 1587596463890.004, "dur": 41.916, + "args": { + "External id": 3292792,"Record function id": 0, "Ev Idx": 3191 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 1336756, "tid": 1381189, + "ts": 1587596463940.071, "dur": 107.216, + "args": { + "External id": 3292793,"Record function id": 0, "Ev Idx": 3192 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 1336756, "tid": 1381189, + "ts": 1587596464055.679, "dur": 35918.341, + "args": { + "External id": 3292794,"Record function id": 0, "Ev Idx": 3193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596464144.197, "dur": 6.387, + "args": { + "External id": 3292795,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596464161.088, "dur": 5.061, + "args": { + "External id": 3292796,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596464181.128, "dur": 34983.915, + "args": { + "External id": 3292797,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596464194.077, "dur": 34960.762, + "args": { + "External id": 3292798,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596464238.386, "dur": 15.075, + "args": { + "External id": 3292799,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596464262.838, "dur": 34850.438, + "args": { + "External id": 3292800,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596464265.628, "dur": 34846.484, + "args": { + "External id": 3292801,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596464269.848, "dur": 4.848, + "args": { + "External id": 3292802,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596464276.378, "dur": 34831.082, + "args": { + "External id": 3292803,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596499263.807, "dur": 11.566, + "args": { + "External id": 3292804,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596499268.096, "dur": 6.827, + "args": { + "External id": 3292805,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596499306.953, "dur": 347.915, + "args": { + "External id": 3292806,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596499333.809, "dur": 316.425, + "args": { + "External id": 3292807,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3206, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596499345.034, "dur": 299.708, + "args": { + "External id": 3292808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596499674.398, "dur": 2.250, + "args": { + "External id": 3292809,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3208, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596499730.425, "dur": 6.209, + "args": { + "External id": 3292810,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596499782.797, "dur": 1.615, + "args": { + "External id": 3292811,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596499801.552, "dur": 1.014, + "args": { + "External id": 3292812,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596499817.825, "dur": 0.967, + "args": { + "External id": 3292813,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596499830.485, "dur": 0.938, + "args": { + "External id": 3292814,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596499843.381, "dur": 0.862, + "args": { + "External id": 3292815,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596499856.198, "dur": 1.025, + "args": { + "External id": 3292816,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596499893.362, "dur": 2.343, + "args": { + "External id": 3292817,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596499910.057, "dur": 0.799, + "args": { + "External id": 3292818,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596500040.986, "dur": 2743.406, + "args": { + "External id": 3292819,"Record function id": 0, "Ev Idx": 3218 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 1336756, "tid": 1381189, + "ts": 1587596500062.967, "dur": 1040.269, + "args": { + "External id": 3292820,"Record function id": 0, "Ev Idx": 3219 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 1336756, "tid": 1381189, + "ts": 1587596500083.084, "dur": 331.612, + "args": { + "External id": 3292821,"Record function id": 0, "Ev Idx": 3220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596500168.319, "dur": 4.536, + "args": { + "External id": 3292822,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596500176.151, "dur": 1.183, + "args": { + "External id": 3292823,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596500179.245, "dur": 1.009, + "args": { + "External id": 3292824,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596500182.182, "dur": 2.941, + "args": { + "External id": 3292825,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596500186.552, "dur": 0.702, + "args": { + "External id": 3292826,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596500188.962, "dur": 0.768, + "args": { + "External id": 3292827,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596500191.518, "dur": 1.735, + "args": { + "External id": 3292828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596500197.798, "dur": 0.949, + "args": { + "External id": 3292829,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596500200.307, "dur": 0.795, + "args": { + "External id": 3292830,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596500202.672, "dur": 0.773, + "args": { + "External id": 3292831,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596500222.080, "dur": 160.465, + "args": { + "External id": 3292832,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596500238.126, "dur": 139.913, + "args": { + "External id": 3292833,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596500263.883, "dur": 13.817, + "args": { + "External id": 3292834,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596500281.034, "dur": 67.636, + "args": { + "External id": 3292835,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596500283.873, "dur": 64.502, + "args": { + "External id": 3292836,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500287.808, "dur": 6.939, + "args": { + "External id": 3292837,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596500296.653, "dur": 51.264, + "args": { + "External id": 3292838,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3237 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 1336756, "tid": 1381189, + "ts": 1587596500498.415, "dur": 597.262, + "args": { + "External id": 3292839,"Record function id": 0, "Ev Idx": 3238 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 1336756, "tid": 1381189, + "ts": 1587596500513.501, "dur": 568.950, + "args": { + "External id": 3292840,"Record function id": 0, "Ev Idx": 3239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596500568.839, "dur": 4.166, + "args": { + "External id": 3292841,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596500587.566, "dur": 33.720, + "args": { + "External id": 3292842,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500592.606, "dur": 1.385, + "args": { + "External id": 3292843,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500596.402, "dur": 0.583, + "args": { + "External id": 3292844,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500598.712, "dur": 0.373, + "args": { + "External id": 3292845,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500600.585, "dur": 0.622, + "args": { + "External id": 3292846,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500602.942, "dur": 0.404, + "args": { + "External id": 3292847,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500605.177, "dur": 0.412, + "args": { + "External id": 3292848,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500610.567, "dur": 2.578, + "args": { + "External id": 3292849,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500614.844, "dur": 0.365, + "args": { + "External id": 3292850,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500617.123, "dur": 0.433, + "args": { + "External id": 3292851,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596500630.789, "dur": 30.800, + "args": { + "External id": 3292852,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596500695.850, "dur": 98.934, + "args": { + "External id": 3292853,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596500705.300, "dur": 2.963, + "args": { + "External id": 3292854,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596500713.686, "dur": 9.888, + "args": { + "External id": 3292855,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596500717.843, "dur": 5.320, + "args": { + "External id": 3292856,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500721.313, "dur": 0.668, + "args": { + "External id": 3292857,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596500729.729, "dur": 27.009, + "args": { + "External id": 3292858,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500732.039, "dur": 0.595, + "args": { + "External id": 3292859,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500735.080, "dur": 0.382, + "args": { + "External id": 3292860,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500737.089, "dur": 0.375, + "args": { + "External id": 3292861,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500739.373, "dur": 2.439, + "args": { + "External id": 3292862,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500743.725, "dur": 0.431, + "args": { + "External id": 3292863,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500746.108, "dur": 0.346, + "args": { + "External id": 3292864,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500748.550, "dur": 0.593, + "args": { + "External id": 3292865,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500750.830, "dur": 0.361, + "args": { + "External id": 3292866,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596500752.744, "dur": 0.379, + "args": { + "External id": 3292867,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596500767.876, "dur": 20.043, + "args": { + "External id": 3292868,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596500839.338, "dur": 135.498, + "args": { + "External id": 3292869,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596500861.092, "dur": 110.027, + "args": { + "External id": 3292870,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3269, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596500892.385, "dur": 74.449, + "args": { + "External id": 3292871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596501025.559, "dur": 3.082, + "args": { + "External id": 3292872,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3271, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596501110.910, "dur": 1655.501, + "args": { + "External id": 3292873,"Sequence number": 32987138, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3272 + } + }, + { + "ph": "f", "id": 194, "pid": 1336756, "tid": 1381189, "ts": 1587596501110.910, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596501224.406, "dur": 104.663, + "args": { + "External id": 3292874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596501366.861, "dur": 38.676, + "args": { + "External id": 3292875,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596501423.977, "dur": 51.645, + "args": { + "External id": 3292876,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596501484.919, "dur": 33.602, + "args": { + "External id": 3292877,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596501527.884, "dur": 46.891, + "args": { + "External id": 3292878,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596501600.193, "dur": 30.539, + "args": { + "External id": 3292879,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596501639.436, "dur": 44.007, + "args": { + "External id": 3292880,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596501705.015, "dur": 24.099, + "args": { + "External id": 3292881,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596501750.583, "dur": 33.549, + "args": { + "External id": 3292882,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596501804.098, "dur": 20.756, + "args": { + "External id": 3292883,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596501840.197, "dur": 17.376, + "args": { + "External id": 3292884,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596501883.284, "dur": 36.383, + "args": { + "External id": 3292885,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596501924.195, "dur": 33.880, + "args": { + "External id": 3292886,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596502020.110, "dur": 183.021, + "args": { + "External id": 3292887,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596502100.470, "dur": 9.583, + "args": { + "External id": 3292888,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596502112.183, "dur": 2.207, + "args": { + "External id": 3292889,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596502234.742, "dur": 26.864, + "args": { + "External id": 3292890,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596502273.673, "dur": 16.205, + "args": { + "External id": 3292891,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596502300.978, "dur": 40.582, + "args": { + "External id": 3292892,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596502347.333, "dur": 41.208, + "args": { + "External id": 3292893,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596502397.562, "dur": 22.449, + "args": { + "External id": 3292894,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596502424.926, "dur": 29.976, + "args": { + "External id": 3292895,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596502470.533, "dur": 30.874, + "args": { + "External id": 3292896,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596502514.673, "dur": 33.288, + "args": { + "External id": 3292897,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596502569.993, "dur": 23.891, + "args": { + "External id": 3292898,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596502610.709, "dur": 26.912, + "args": { + "External id": 3292899,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596502652.437, "dur": 17.712, + "args": { + "External id": 3292900,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596502692.523, "dur": 15.313, + "args": { + "External id": 3292901,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596502721.199, "dur": 17.983, + "args": { + "External id": 3292902,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502806.194, "dur": 14.982, + "args": { + "External id": 3292903,"Record function id": 0, "Ev Idx": 3302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502809.397, "dur": 10.844, + "args": { + "External id": 3292904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502813.464, "dur": 5.848, + "args": { + "External id": 3292905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502815.493, "dur": 3.643, + "args": { + "External id": 3292906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502825.211, "dur": 4.770, + "args": { + "External id": 3292907,"Record function id": 0, "Ev Idx": 3306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502826.728, "dur": 2.797, + "args": { + "External id": 3292908,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502827.541, "dur": 1.461, + "args": { + "External id": 3292909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502828.020, "dur": 0.882, + "args": { + "External id": 3292910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502833.279, "dur": 6.328, + "args": { + "External id": 3292911,"Record function id": 0, "Ev Idx": 3310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502834.672, "dur": 4.489, + "args": { + "External id": 3292912,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502835.530, "dur": 3.180, + "args": { + "External id": 3292913,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502835.888, "dur": 2.746, + "args": { + "External id": 3292914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502842.710, "dur": 4.363, + "args": { + "External id": 3292915,"Record function id": 0, "Ev Idx": 3314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502844.092, "dur": 2.574, + "args": { + "External id": 3292916,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502844.727, "dur": 1.467, + "args": { + "External id": 3292917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502845.027, "dur": 1.077, + "args": { + "External id": 3292918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502850.380, "dur": 4.597, + "args": { + "External id": 3292919,"Record function id": 0, "Ev Idx": 3318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502852.032, "dur": 2.528, + "args": { + "External id": 3292920,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502852.683, "dur": 1.430, + "args": { + "External id": 3292921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502853.278, "dur": 0.719, + "args": { + "External id": 3292922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502858.291, "dur": 3.911, + "args": { + "External id": 3292923,"Record function id": 0, "Ev Idx": 3322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502859.448, "dur": 2.349, + "args": { + "External id": 3292924,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502859.948, "dur": 1.429, + "args": { + "External id": 3292925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502860.699, "dur": 0.606, + "args": { + "External id": 3292926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502865.545, "dur": 25.191, + "args": { + "External id": 3292927,"Record function id": 0, "Ev Idx": 3326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502884.733, "dur": 4.877, + "args": { + "External id": 3292928,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502886.140, "dur": 2.637, + "args": { + "External id": 3292929,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502887.154, "dur": 1.355, + "args": { + "External id": 3292930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502895.940, "dur": 4.450, + "args": { + "External id": 3292931,"Record function id": 0, "Ev Idx": 3330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502897.463, "dur": 2.489, + "args": { + "External id": 3292932,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502897.961, "dur": 1.540, + "args": { + "External id": 3292933,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502898.742, "dur": 0.633, + "args": { + "External id": 3292934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502903.701, "dur": 4.302, + "args": { + "External id": 3292935,"Record function id": 0, "Ev Idx": 3334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596502904.974, "dur": 2.630, + "args": { + "External id": 3292936,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502905.759, "dur": 1.416, + "args": { + "External id": 3292937,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596502906.428, "dur": 0.676, + "args": { + "External id": 3292938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596502912.213, "dur": 36860.398, + "args": { + "External id": 3292939,"Record function id": 0, "Sequence number": 32987137, "Fwd thread id": 1, "Ev Idx": 3338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596502913.634, "dur": 36851.051, + "args": { + "External id": 3292940,"Sequence number": 32987137, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3339 + } + }, + { + "ph": "f", "id": 195, "pid": 1336756, "tid": 1381189, "ts": 1587596502913.634, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 1336756, "tid": 1381189, + "ts": 1587596502942.244, "dur": 38.018, + "args": { + "External id": 3292941,"Record function id": 0, "Ev Idx": 3340 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 1336756, "tid": 1381189, + "ts": 1587596503025.270, "dur": 66.082, + "args": { + "External id": 3292942,"Record function id": 0, "Ev Idx": 3341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 1336756, "tid": 1381189, + "ts": 1587596503097.480, "dur": 36659.783, + "args": { + "External id": 3292943,"Record function id": 0, "Ev Idx": 3342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596503184.973, "dur": 7.414, + "args": { + "External id": 3292944,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596503202.415, "dur": 7.217, + "args": { + "External id": 3292945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596503223.932, "dur": 35783.307, + "args": { + "External id": 3292946,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596503237.268, "dur": 35736.816, + "args": { + "External id": 3292947,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596503281.151, "dur": 13.569, + "args": { + "External id": 3292948,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596503301.027, "dur": 35630.422, + "args": { + "External id": 3292949,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596503303.933, "dur": 35626.295, + "args": { + "External id": 3292950,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596503308.096, "dur": 5.314, + "args": { + "External id": 3292951,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596503315.354, "dur": 35610.661, + "args": { + "External id": 3292952,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596539107.147, "dur": 10.822, + "args": { + "External id": 3292953,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596539110.586, "dur": 6.965, + "args": { + "External id": 3292954,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596539147.988, "dur": 293.548, + "args": { + "External id": 3292955,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596539177.582, "dur": 259.772, + "args": { + "External id": 3292956,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3355, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596539188.500, "dur": 243.386, + "args": { + "External id": 3292957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596539469.610, "dur": 2.078, + "args": { + "External id": 3292958,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3357, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596539528.528, "dur": 6.328, + "args": { + "External id": 3292959,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596539583.861, "dur": 1.260, + "args": { + "External id": 3292960,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596539603.493, "dur": 3.889, + "args": { + "External id": 3292961,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596539620.652, "dur": 0.936, + "args": { + "External id": 3292962,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596539635.009, "dur": 1.090, + "args": { + "External id": 3292963,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596539648.166, "dur": 1.043, + "args": { + "External id": 3292964,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596539660.605, "dur": 2.626, + "args": { + "External id": 3292965,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596539680.184, "dur": 2.023, + "args": { + "External id": 3292966,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596539694.241, "dur": 0.941, + "args": { + "External id": 3292967,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596539790.625, "dur": 2763.658, + "args": { + "External id": 3292968,"Record function id": 0, "Ev Idx": 3367 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 1336756, "tid": 1381189, + "ts": 1587596539808.486, "dur": 1009.556, + "args": { + "External id": 3292969,"Record function id": 0, "Ev Idx": 3368 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 1336756, "tid": 1381189, + "ts": 1587596539821.803, "dur": 381.437, + "args": { + "External id": 3292970,"Record function id": 0, "Ev Idx": 3369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596539923.981, "dur": 4.513, + "args": { + "External id": 3292971,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596539932.392, "dur": 0.832, + "args": { + "External id": 3292972,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596539935.475, "dur": 2.833, + "args": { + "External id": 3292973,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596539940.093, "dur": 0.724, + "args": { + "External id": 3292974,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596539942.503, "dur": 0.871, + "args": { + "External id": 3292975,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596539945.462, "dur": 0.879, + "args": { + "External id": 3292976,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596539948.104, "dur": 1.645, + "args": { + "External id": 3292977,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596539951.537, "dur": 0.842, + "args": { + "External id": 3292978,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596539954.544, "dur": 0.580, + "args": { + "External id": 3292979,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596539956.630, "dur": 0.635, + "args": { + "External id": 3292980,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596539979.448, "dur": 188.542, + "args": { + "External id": 3292981,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596540030.456, "dur": 132.821, + "args": { + "External id": 3292982,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596540051.373, "dur": 13.121, + "args": { + "External id": 3292983,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596540067.974, "dur": 66.223, + "args": { + "External id": 3292984,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596540070.615, "dur": 63.266, + "args": { + "External id": 3292985,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540074.910, "dur": 6.261, + "args": { + "External id": 3292986,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596540083.137, "dur": 50.202, + "args": { + "External id": 3292987,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3386 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 1336756, "tid": 1381189, + "ts": 1587596540283.253, "dur": 526.632, + "args": { + "External id": 3292988,"Record function id": 0, "Ev Idx": 3387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 1336756, "tid": 1381189, + "ts": 1587596540300.514, "dur": 496.747, + "args": { + "External id": 3292989,"Record function id": 0, "Ev Idx": 3388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596540356.523, "dur": 4.924, + "args": { + "External id": 3292990,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596540376.206, "dur": 30.986, + "args": { + "External id": 3292991,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540381.076, "dur": 1.304, + "args": { + "External id": 3292992,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540385.162, "dur": 0.601, + "args": { + "External id": 3292993,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540387.603, "dur": 0.491, + "args": { + "External id": 3292994,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540389.791, "dur": 0.402, + "args": { + "External id": 3292995,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540392.149, "dur": 0.458, + "args": { + "External id": 3292996,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540394.571, "dur": 2.085, + "args": { + "External id": 3292997,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540398.298, "dur": 0.553, + "args": { + "External id": 3292998,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540400.463, "dur": 0.370, + "args": { + "External id": 3292999,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540402.427, "dur": 0.485, + "args": { + "External id": 3293000,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596540417.133, "dur": 32.030, + "args": { + "External id": 3293001,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596540483.539, "dur": 100.168, + "args": { + "External id": 3293002,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596540493.511, "dur": 2.960, + "args": { + "External id": 3293003,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596540501.432, "dur": 9.913, + "args": { + "External id": 3293004,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596540505.670, "dur": 5.242, + "args": { + "External id": 3293005,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540509.225, "dur": 0.424, + "args": { + "External id": 3293006,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596540517.785, "dur": 28.562, + "args": { + "External id": 3293007,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540520.327, "dur": 0.709, + "args": { + "External id": 3293008,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540525.776, "dur": 0.367, + "args": { + "External id": 3293009,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540527.747, "dur": 2.263, + "args": { + "External id": 3293010,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540531.771, "dur": 0.414, + "args": { + "External id": 3293011,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540534.072, "dur": 0.342, + "args": { + "External id": 3293012,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540535.855, "dur": 0.340, + "args": { + "External id": 3293013,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540537.890, "dur": 0.406, + "args": { + "External id": 3293014,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540540.102, "dur": 0.411, + "args": { + "External id": 3293015,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596540542.033, "dur": 0.399, + "args": { + "External id": 3293016,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596540556.145, "dur": 20.793, + "args": { + "External id": 3293017,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596540627.269, "dur": 107.920, + "args": { + "External id": 3293018,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596540648.465, "dur": 83.289, + "args": { + "External id": 3293019,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3418, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596540658.134, "dur": 69.600, + "args": { + "External id": 3293020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596540748.705, "dur": 2.054, + "args": { + "External id": 3293021,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3420, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596540824.934, "dur": 1712.400, + "args": { + "External id": 3293022,"Sequence number": 32987136, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3421 + } + }, + { + "ph": "f", "id": 196, "pid": 1336756, "tid": 1381189, "ts": 1587596540824.934, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596540955.610, "dur": 151.932, + "args": { + "External id": 3293023,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596541149.593, "dur": 40.892, + "args": { + "External id": 3293024,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596541209.499, "dur": 53.886, + "args": { + "External id": 3293025,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596541275.772, "dur": 33.906, + "args": { + "External id": 3293026,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596541317.215, "dur": 46.209, + "args": { + "External id": 3293027,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596541371.480, "dur": 29.541, + "args": { + "External id": 3293028,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596541407.846, "dur": 41.163, + "args": { + "External id": 3293029,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596541476.314, "dur": 22.497, + "args": { + "External id": 3293030,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596541520.277, "dur": 29.869, + "args": { + "External id": 3293031,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596541572.161, "dur": 19.504, + "args": { + "External id": 3293032,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596541604.981, "dur": 15.976, + "args": { + "External id": 3293033,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596541628.488, "dur": 27.149, + "args": { + "External id": 3293034,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596541659.080, "dur": 32.231, + "args": { + "External id": 3293035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596541732.479, "dur": 192.732, + "args": { + "External id": 3293036,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596541811.761, "dur": 5.446, + "args": { + "External id": 3293037,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596541819.308, "dur": 2.012, + "args": { + "External id": 3293038,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596541958.984, "dur": 61.500, + "args": { + "External id": 3293039,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596542034.344, "dur": 18.295, + "args": { + "External id": 3293040,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596542063.924, "dur": 47.405, + "args": { + "External id": 3293041,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596542118.761, "dur": 37.947, + "args": { + "External id": 3293042,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596542168.922, "dur": 22.776, + "args": { + "External id": 3293043,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596542197.336, "dur": 31.007, + "args": { + "External id": 3293044,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596542249.023, "dur": 32.027, + "args": { + "External id": 3293045,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596542290.693, "dur": 33.854, + "args": { + "External id": 3293046,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596542345.182, "dur": 23.697, + "args": { + "External id": 3293047,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596542385.882, "dur": 27.154, + "args": { + "External id": 3293048,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596542427.580, "dur": 18.566, + "args": { + "External id": 3293049,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596542463.580, "dur": 15.684, + "args": { + "External id": 3293050,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596542491.487, "dur": 17.853, + "args": { + "External id": 3293051,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542576.056, "dur": 15.017, + "args": { + "External id": 3293052,"Record function id": 0, "Ev Idx": 3451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542579.469, "dur": 10.796, + "args": { + "External id": 3293053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542583.526, "dur": 6.002, + "args": { + "External id": 3293054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542585.514, "dur": 3.868, + "args": { + "External id": 3293055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542594.732, "dur": 7.242, + "args": { + "External id": 3293056,"Record function id": 0, "Ev Idx": 3455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542596.181, "dur": 5.339, + "args": { + "External id": 3293057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542596.828, "dur": 4.054, + "args": { + "External id": 3293058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542598.020, "dur": 2.762, + "args": { + "External id": 3293059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542605.214, "dur": 5.575, + "args": { + "External id": 3293060,"Record function id": 0, "Ev Idx": 3459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542606.790, "dur": 3.554, + "args": { + "External id": 3293061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542607.901, "dur": 1.943, + "args": { + "External id": 3293062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542608.606, "dur": 1.154, + "args": { + "External id": 3293063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542613.822, "dur": 4.451, + "args": { + "External id": 3293064,"Record function id": 0, "Ev Idx": 3463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542615.211, "dur": 2.656, + "args": { + "External id": 3293065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542615.952, "dur": 1.483, + "args": { + "External id": 3293066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542616.586, "dur": 0.758, + "args": { + "External id": 3293067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542621.443, "dur": 4.128, + "args": { + "External id": 3293068,"Record function id": 0, "Ev Idx": 3467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542622.937, "dur": 2.248, + "args": { + "External id": 3293069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542623.435, "dur": 1.324, + "args": { + "External id": 3293070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542624.019, "dur": 0.666, + "args": { + "External id": 3293071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542628.659, "dur": 3.995, + "args": { + "External id": 3293072,"Record function id": 0, "Ev Idx": 3471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542629.833, "dur": 2.402, + "args": { + "External id": 3293073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542630.597, "dur": 1.212, + "args": { + "External id": 3293074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542631.035, "dur": 0.666, + "args": { + "External id": 3293075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542635.724, "dur": 3.678, + "args": { + "External id": 3293076,"Record function id": 0, "Ev Idx": 3475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542636.911, "dur": 2.081, + "args": { + "External id": 3293077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542637.378, "dur": 1.113, + "args": { + "External id": 3293078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542637.867, "dur": 0.550, + "args": { + "External id": 3293079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542642.384, "dur": 3.697, + "args": { + "External id": 3293080,"Record function id": 0, "Ev Idx": 3479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542643.673, "dur": 2.011, + "args": { + "External id": 3293081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542644.120, "dur": 1.145, + "args": { + "External id": 3293082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542644.578, "dur": 0.579, + "args": { + "External id": 3293083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542649.190, "dur": 5.827, + "args": { + "External id": 3293084,"Record function id": 0, "Ev Idx": 3483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596542650.376, "dur": 4.250, + "args": { + "External id": 3293085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542650.824, "dur": 3.386, + "args": { + "External id": 3293086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596542651.446, "dur": 2.650, + "args": { + "External id": 3293087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596542658.922, "dur": 36923.182, + "args": { + "External id": 3293088,"Record function id": 0, "Sequence number": 32987135, "Fwd thread id": 1, "Ev Idx": 3487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596542660.229, "dur": 36914.198, + "args": { + "External id": 3293089,"Sequence number": 32987135, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3488 + } + }, + { + "ph": "f", "id": 197, "pid": 1336756, "tid": 1381189, "ts": 1587596542660.229, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 1336756, "tid": 1381189, + "ts": 1587596542686.386, "dur": 34.910, + "args": { + "External id": 3293090,"Record function id": 0, "Ev Idx": 3489 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 1336756, "tid": 1381189, + "ts": 1587596542728.469, "dur": 59.796, + "args": { + "External id": 3293091,"Record function id": 0, "Ev Idx": 3490 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 1336756, "tid": 1381189, + "ts": 1587596542793.995, "dur": 36772.801, + "args": { + "External id": 3293092,"Record function id": 0, "Ev Idx": 3491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596542895.959, "dur": 7.237, + "args": { + "External id": 3293093,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596542914.596, "dur": 4.973, + "args": { + "External id": 3293094,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596542935.951, "dur": 35714.272, + "args": { + "External id": 3293095,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596542949.360, "dur": 35690.935, + "args": { + "External id": 3293096,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596543034.779, "dur": 14.708, + "args": { + "External id": 3293097,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596543056.103, "dur": 35543.266, + "args": { + "External id": 3293098,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596543058.797, "dur": 35539.825, + "args": { + "External id": 3293099,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596543063.349, "dur": 6.531, + "args": { + "External id": 3293100,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596543071.870, "dur": 35522.963, + "args": { + "External id": 3293101,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596578740.000, "dur": 9.287, + "args": { + "External id": 3293102,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596578743.336, "dur": 5.639, + "args": { + "External id": 3293103,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596578780.732, "dur": 464.464, + "args": { + "External id": 3293104,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596578808.454, "dur": 431.324, + "args": { + "External id": 3293105,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3504, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596578819.595, "dur": 413.857, + "args": { + "External id": 3293106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596579271.512, "dur": 2.567, + "args": { + "External id": 3293107,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3506, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596579340.313, "dur": 6.744, + "args": { + "External id": 3293108,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596579398.523, "dur": 3.301, + "args": { + "External id": 3293109,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596579419.424, "dur": 0.994, + "args": { + "External id": 3293110,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596579434.247, "dur": 0.883, + "args": { + "External id": 3293111,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596579447.980, "dur": 0.881, + "args": { + "External id": 3293112,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596579460.613, "dur": 3.081, + "args": { + "External id": 3293113,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596579475.867, "dur": 1.115, + "args": { + "External id": 3293114,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596579490.192, "dur": 2.000, + "args": { + "External id": 3293115,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596579504.559, "dur": 0.704, + "args": { + "External id": 3293116,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596579596.710, "dur": 2846.141, + "args": { + "External id": 3293117,"Record function id": 0, "Ev Idx": 3516 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 1336756, "tid": 1381189, + "ts": 1587596579616.448, "dur": 1046.313, + "args": { + "External id": 3293118,"Record function id": 0, "Ev Idx": 3517 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 1336756, "tid": 1381189, + "ts": 1587596579632.735, "dur": 344.503, + "args": { + "External id": 3293119,"Record function id": 0, "Ev Idx": 3518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596579708.004, "dur": 5.313, + "args": { + "External id": 3293120,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596579716.565, "dur": 0.971, + "args": { + "External id": 3293121,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596579719.554, "dur": 0.796, + "args": { + "External id": 3293122,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596579722.174, "dur": 0.646, + "args": { + "External id": 3293123,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596579724.355, "dur": 0.667, + "args": { + "External id": 3293124,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596579727.043, "dur": 0.932, + "args": { + "External id": 3293125,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596579729.525, "dur": 1.644, + "args": { + "External id": 3293126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596579732.859, "dur": 0.850, + "args": { + "External id": 3293127,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596579735.320, "dur": 2.863, + "args": { + "External id": 3293128,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596579740.077, "dur": 0.655, + "args": { + "External id": 3293129,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596579759.220, "dur": 180.376, + "args": { + "External id": 3293130,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596579775.539, "dur": 159.283, + "args": { + "External id": 3293131,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596579795.792, "dur": 12.021, + "args": { + "External id": 3293132,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596579811.284, "dur": 89.010, + "args": { + "External id": 3293133,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596579813.902, "dur": 85.965, + "args": { + "External id": 3293134,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596579818.347, "dur": 6.116, + "args": { + "External id": 3293135,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596579826.350, "dur": 72.543, + "args": { + "External id": 3293136,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3535 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 1336756, "tid": 1381189, + "ts": 1587596580107.120, "dur": 547.396, + "args": { + "External id": 3293137,"Record function id": 0, "Ev Idx": 3536 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 1336756, "tid": 1381189, + "ts": 1587596580125.316, "dur": 512.856, + "args": { + "External id": 3293138,"Record function id": 0, "Ev Idx": 3537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596580180.472, "dur": 5.848, + "args": { + "External id": 3293139,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596580201.931, "dur": 31.586, + "args": { + "External id": 3293140,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580207.318, "dur": 1.529, + "args": { + "External id": 3293141,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580211.119, "dur": 0.434, + "args": { + "External id": 3293142,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580213.247, "dur": 0.548, + "args": { + "External id": 3293143,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580215.787, "dur": 2.041, + "args": { + "External id": 3293144,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580219.575, "dur": 0.524, + "args": { + "External id": 3293145,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580221.789, "dur": 0.489, + "args": { + "External id": 3293146,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580224.087, "dur": 0.379, + "args": { + "External id": 3293147,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580226.237, "dur": 0.397, + "args": { + "External id": 3293148,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580228.825, "dur": 0.366, + "args": { + "External id": 3293149,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596580243.703, "dur": 33.848, + "args": { + "External id": 3293150,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596580307.868, "dur": 115.787, + "args": { + "External id": 3293151,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596580321.019, "dur": 2.990, + "args": { + "External id": 3293152,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596580336.029, "dur": 10.355, + "args": { + "External id": 3293153,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596580340.445, "dur": 5.508, + "args": { + "External id": 3293154,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580344.129, "dur": 0.596, + "args": { + "External id": 3293155,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596580355.610, "dur": 30.013, + "args": { + "External id": 3293156,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580358.262, "dur": 2.064, + "args": { + "External id": 3293157,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580362.269, "dur": 0.488, + "args": { + "External id": 3293158,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580364.644, "dur": 0.395, + "args": { + "External id": 3293159,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580366.583, "dur": 0.467, + "args": { + "External id": 3293160,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580368.693, "dur": 0.707, + "args": { + "External id": 3293161,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580371.115, "dur": 0.602, + "args": { + "External id": 3293162,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580373.097, "dur": 0.333, + "args": { + "External id": 3293163,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580374.673, "dur": 0.297, + "args": { + "External id": 3293164,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596580379.758, "dur": 2.299, + "args": { + "External id": 3293165,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596580396.108, "dur": 20.477, + "args": { + "External id": 3293166,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596580467.571, "dur": 107.305, + "args": { + "External id": 3293167,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596580487.647, "dur": 83.530, + "args": { + "External id": 3293168,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3567, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596580497.285, "dur": 69.172, + "args": { + "External id": 3293169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596580589.139, "dur": 1.781, + "args": { + "External id": 3293170,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3569, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596580670.330, "dur": 1752.530, + "args": { + "External id": 3293171,"Sequence number": 32987134, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3570 + } + }, + { + "ph": "f", "id": 198, "pid": 1336756, "tid": 1381189, "ts": 1587596580670.330, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596580775.972, "dur": 127.816, + "args": { + "External id": 3293172,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596580948.602, "dur": 79.652, + "args": { + "External id": 3293173,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596581051.311, "dur": 62.324, + "args": { + "External id": 3293174,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596581123.725, "dur": 34.818, + "args": { + "External id": 3293175,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596581165.651, "dur": 45.302, + "args": { + "External id": 3293176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596581219.141, "dur": 28.462, + "args": { + "External id": 3293177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596581257.573, "dur": 43.120, + "args": { + "External id": 3293178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596581325.162, "dur": 24.193, + "args": { + "External id": 3293179,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596581367.236, "dur": 29.663, + "args": { + "External id": 3293180,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596581415.947, "dur": 19.928, + "args": { + "External id": 3293181,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596581450.067, "dur": 17.472, + "args": { + "External id": 3293182,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596581477.282, "dur": 29.713, + "args": { + "External id": 3293183,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596581510.566, "dur": 34.577, + "args": { + "External id": 3293184,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596581596.789, "dur": 175.514, + "args": { + "External id": 3293185,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596581676.972, "dur": 5.673, + "args": { + "External id": 3293186,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596581684.618, "dur": 1.820, + "args": { + "External id": 3293187,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596581804.374, "dur": 24.178, + "args": { + "External id": 3293188,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596581839.953, "dur": 15.466, + "args": { + "External id": 3293189,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596581884.017, "dur": 44.174, + "args": { + "External id": 3293190,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596581939.181, "dur": 37.991, + "args": { + "External id": 3293191,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596582024.061, "dur": 28.365, + "args": { + "External id": 3293192,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596582059.641, "dur": 51.441, + "args": { + "External id": 3293193,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596582124.870, "dur": 25.867, + "args": { + "External id": 3293194,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596582162.068, "dur": 35.285, + "args": { + "External id": 3293195,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596582217.508, "dur": 28.306, + "args": { + "External id": 3293196,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596582272.456, "dur": 25.336, + "args": { + "External id": 3293197,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596582313.572, "dur": 20.644, + "args": { + "External id": 3293198,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596582350.129, "dur": 15.311, + "args": { + "External id": 3293199,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596582376.793, "dur": 18.713, + "args": { + "External id": 3293200,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582465.373, "dur": 16.714, + "args": { + "External id": 3293201,"Record function id": 0, "Ev Idx": 3600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582468.351, "dur": 12.757, + "args": { + "External id": 3293202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582472.511, "dur": 7.687, + "args": { + "External id": 3293203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582474.191, "dur": 5.887, + "args": { + "External id": 3293204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582485.896, "dur": 4.871, + "args": { + "External id": 3293205,"Record function id": 0, "Ev Idx": 3604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582487.162, "dur": 3.156, + "args": { + "External id": 3293206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582488.153, "dur": 1.709, + "args": { + "External id": 3293207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582488.813, "dur": 0.975, + "args": { + "External id": 3293208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582493.895, "dur": 3.973, + "args": { + "External id": 3293209,"Record function id": 0, "Ev Idx": 3608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582494.820, "dur": 2.654, + "args": { + "External id": 3293210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582495.373, "dur": 1.644, + "args": { + "External id": 3293211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582495.969, "dur": 0.961, + "args": { + "External id": 3293212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582500.948, "dur": 4.073, + "args": { + "External id": 3293213,"Record function id": 0, "Ev Idx": 3612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582502.208, "dur": 2.371, + "args": { + "External id": 3293214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582502.954, "dur": 1.181, + "args": { + "External id": 3293215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582503.495, "dur": 0.577, + "args": { + "External id": 3293216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582508.035, "dur": 3.840, + "args": { + "External id": 3293217,"Record function id": 0, "Ev Idx": 3616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582509.184, "dur": 2.271, + "args": { + "External id": 3293218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582509.781, "dur": 1.236, + "args": { + "External id": 3293219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582510.218, "dur": 0.733, + "args": { + "External id": 3293220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582514.990, "dur": 3.624, + "args": { + "External id": 3293221,"Record function id": 0, "Ev Idx": 3620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582515.985, "dur": 2.194, + "args": { + "External id": 3293222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582516.461, "dur": 1.241, + "args": { + "External id": 3293223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582516.874, "dur": 0.731, + "args": { + "External id": 3293224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582521.753, "dur": 3.283, + "args": { + "External id": 3293225,"Record function id": 0, "Ev Idx": 3624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582522.746, "dur": 1.874, + "args": { + "External id": 3293226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582523.173, "dur": 0.997, + "args": { + "External id": 3293227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582523.469, "dur": 0.610, + "args": { + "External id": 3293228,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582528.047, "dur": 4.953, + "args": { + "External id": 3293229,"Record function id": 0, "Ev Idx": 3628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582528.957, "dur": 3.645, + "args": { + "External id": 3293230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582529.423, "dur": 2.729, + "args": { + "External id": 3293231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582529.711, "dur": 2.357, + "args": { + "External id": 3293232,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582536.015, "dur": 4.636, + "args": { + "External id": 3293233,"Record function id": 0, "Ev Idx": 3632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596582537.213, "dur": 3.005, + "args": { + "External id": 3293234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582537.642, "dur": 2.100, + "args": { + "External id": 3293235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596582538.437, "dur": 1.214, + "args": { + "External id": 3293236,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596582544.524, "dur": 36299.582, + "args": { + "External id": 3293237,"Record function id": 0, "Sequence number": 32987133, "Fwd thread id": 1, "Ev Idx": 3636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596582545.793, "dur": 36290.329, + "args": { + "External id": 3293238,"Sequence number": 32987133, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3637 + } + }, + { + "ph": "f", "id": 199, "pid": 1336756, "tid": 1381189, "ts": 1587596582545.793, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 1336756, "tid": 1381189, + "ts": 1587596582576.274, "dur": 37.559, + "args": { + "External id": 3293239,"Record function id": 0, "Ev Idx": 3638 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 1336756, "tid": 1381189, + "ts": 1587596582621.403, "dur": 61.340, + "args": { + "External id": 3293240,"Record function id": 0, "Ev Idx": 3639 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 1336756, "tid": 1381189, + "ts": 1587596582688.923, "dur": 36139.242, + "args": { + "External id": 3293241,"Record function id": 0, "Ev Idx": 3640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596582773.524, "dur": 5.895, + "args": { + "External id": 3293242,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596582788.772, "dur": 4.500, + "args": { + "External id": 3293243,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596582806.487, "dur": 35003.935, + "args": { + "External id": 3293244,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596582819.387, "dur": 34980.042, + "args": { + "External id": 3293245,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596582857.951, "dur": 31.246, + "args": { + "External id": 3293246,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596582896.122, "dur": 34860.484, + "args": { + "External id": 3293247,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596582898.904, "dur": 34856.869, + "args": { + "External id": 3293248,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596582903.048, "dur": 5.681, + "args": { + "External id": 3293249,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596582910.647, "dur": 34840.835, + "args": { + "External id": 3293250,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596618121.516, "dur": 13.864, + "args": { + "External id": 3293251,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596618125.492, "dur": 9.207, + "args": { + "External id": 3293252,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596618168.143, "dur": 369.159, + "args": { + "External id": 3293253,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596618196.475, "dur": 335.900, + "args": { + "External id": 3293254,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3653, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596618212.228, "dur": 314.306, + "args": { + "External id": 3293255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596618555.246, "dur": 1.993, + "args": { + "External id": 3293256,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3655, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596618613.661, "dur": 6.740, + "args": { + "External id": 3293257,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596618669.551, "dur": 1.254, + "args": { + "External id": 3293258,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596618687.886, "dur": 0.999, + "args": { + "External id": 3293259,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596618702.731, "dur": 0.807, + "args": { + "External id": 3293260,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596618714.980, "dur": 1.246, + "args": { + "External id": 3293261,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596618729.103, "dur": 1.108, + "args": { + "External id": 3293262,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596618741.092, "dur": 1.034, + "args": { + "External id": 3293263,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596618752.739, "dur": 2.419, + "args": { + "External id": 3293264,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596618764.949, "dur": 0.726, + "args": { + "External id": 3293265,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596618858.896, "dur": 2794.323, + "args": { + "External id": 3293266,"Record function id": 0, "Ev Idx": 3665 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 1336756, "tid": 1381189, + "ts": 1587596618899.065, "dur": 1025.126, + "args": { + "External id": 3293267,"Record function id": 0, "Ev Idx": 3666 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 1336756, "tid": 1381189, + "ts": 1587596618914.704, "dur": 370.829, + "args": { + "External id": 3293268,"Record function id": 0, "Ev Idx": 3667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596619033.477, "dur": 4.947, + "args": { + "External id": 3293269,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596619042.121, "dur": 1.255, + "args": { + "External id": 3293270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596619045.080, "dur": 1.526, + "args": { + "External id": 3293271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596619048.041, "dur": 1.221, + "args": { + "External id": 3293272,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596619050.727, "dur": 0.832, + "args": { + "External id": 3293273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596619052.915, "dur": 0.991, + "args": { + "External id": 3293274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596619055.346, "dur": 1.920, + "args": { + "External id": 3293275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596619058.656, "dur": 2.602, + "args": { + "External id": 3293276,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596619062.748, "dur": 0.769, + "args": { + "External id": 3293277,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596619065.034, "dur": 0.810, + "args": { + "External id": 3293278,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596619085.183, "dur": 164.443, + "args": { + "External id": 3293279,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596619102.431, "dur": 142.181, + "args": { + "External id": 3293280,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596619126.652, "dur": 15.636, + "args": { + "External id": 3293281,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596619145.672, "dur": 69.219, + "args": { + "External id": 3293282,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596619148.249, "dur": 66.267, + "args": { + "External id": 3293283,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619152.236, "dur": 5.402, + "args": { + "External id": 3293284,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596619159.430, "dur": 54.350, + "args": { + "External id": 3293285,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3684 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 1336756, "tid": 1381189, + "ts": 1587596619369.167, "dur": 547.345, + "args": { + "External id": 3293286,"Record function id": 0, "Ev Idx": 3685 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 1336756, "tid": 1381189, + "ts": 1587596619386.993, "dur": 515.064, + "args": { + "External id": 3293287,"Record function id": 0, "Ev Idx": 3686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596619443.525, "dur": 5.325, + "args": { + "External id": 3293288,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596619463.275, "dur": 28.639, + "args": { + "External id": 3293289,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619468.156, "dur": 1.375, + "args": { + "External id": 3293290,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619471.816, "dur": 0.699, + "args": { + "External id": 3293291,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619474.211, "dur": 2.517, + "args": { + "External id": 3293292,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619478.144, "dur": 0.398, + "args": { + "External id": 3293293,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619480.017, "dur": 0.400, + "args": { + "External id": 3293294,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619482.179, "dur": 0.411, + "args": { + "External id": 3293295,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619483.910, "dur": 0.324, + "args": { + "External id": 3293296,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619485.908, "dur": 0.420, + "args": { + "External id": 3293297,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619488.023, "dur": 0.392, + "args": { + "External id": 3293298,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596619500.570, "dur": 30.697, + "args": { + "External id": 3293299,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596619562.289, "dur": 97.038, + "args": { + "External id": 3293300,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596619572.482, "dur": 3.604, + "args": { + "External id": 3293301,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596619581.048, "dur": 11.946, + "args": { + "External id": 3293302,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596619585.111, "dur": 7.444, + "args": { + "External id": 3293303,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619588.575, "dur": 2.800, + "args": { + "External id": 3293304,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596619599.805, "dur": 22.759, + "args": { + "External id": 3293305,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619601.761, "dur": 0.509, + "args": { + "External id": 3293306,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619603.904, "dur": 0.589, + "args": { + "External id": 3293307,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619606.150, "dur": 0.565, + "args": { + "External id": 3293308,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619608.115, "dur": 0.377, + "args": { + "External id": 3293309,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619610.233, "dur": 0.448, + "args": { + "External id": 3293310,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619612.159, "dur": 0.412, + "args": { + "External id": 3293311,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619614.009, "dur": 0.292, + "args": { + "External id": 3293312,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619615.565, "dur": 1.531, + "args": { + "External id": 3293313,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596619618.413, "dur": 0.468, + "args": { + "External id": 3293314,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596619631.512, "dur": 20.944, + "args": { + "External id": 3293315,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596619704.753, "dur": 107.934, + "args": { + "External id": 3293316,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596619725.530, "dur": 83.887, + "args": { + "External id": 3293317,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3716, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596619734.812, "dur": 70.694, + "args": { + "External id": 3293318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596619826.285, "dur": 1.973, + "args": { + "External id": 3293319,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3718, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596619931.351, "dur": 1702.808, + "args": { + "External id": 3293320,"Sequence number": 32987132, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3719 + } + }, + { + "ph": "f", "id": 200, "pid": 1336756, "tid": 1381189, "ts": 1587596619931.351, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596620085.203, "dur": 105.999, + "args": { + "External id": 3293321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596620231.267, "dur": 37.924, + "args": { + "External id": 3293322,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596620288.417, "dur": 52.315, + "args": { + "External id": 3293323,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596620350.440, "dur": 33.835, + "args": { + "External id": 3293324,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596620392.293, "dur": 47.901, + "args": { + "External id": 3293325,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596620448.134, "dur": 28.032, + "args": { + "External id": 3293326,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596620485.882, "dur": 42.556, + "args": { + "External id": 3293327,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596620550.870, "dur": 23.160, + "args": { + "External id": 3293328,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596620592.995, "dur": 31.861, + "args": { + "External id": 3293329,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596620644.458, "dur": 20.380, + "args": { + "External id": 3293330,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596620678.322, "dur": 17.500, + "args": { + "External id": 3293331,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596620706.979, "dur": 29.592, + "args": { + "External id": 3293332,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596620739.803, "dur": 34.302, + "args": { + "External id": 3293333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596620799.576, "dur": 232.539, + "args": { + "External id": 3293334,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596620895.509, "dur": 6.749, + "args": { + "External id": 3293335,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596620904.383, "dur": 2.143, + "args": { + "External id": 3293336,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596621067.340, "dur": 28.338, + "args": { + "External id": 3293337,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596621126.544, "dur": 16.428, + "args": { + "External id": 3293338,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596621155.578, "dur": 45.174, + "args": { + "External id": 3293339,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596621208.609, "dur": 38.092, + "args": { + "External id": 3293340,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596621254.042, "dur": 22.233, + "args": { + "External id": 3293341,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596621282.228, "dur": 41.910, + "args": { + "External id": 3293342,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596621340.407, "dur": 30.971, + "args": { + "External id": 3293343,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596621382.363, "dur": 35.945, + "args": { + "External id": 3293344,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596621437.268, "dur": 26.681, + "args": { + "External id": 3293345,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596621481.322, "dur": 28.054, + "args": { + "External id": 3293346,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596621523.498, "dur": 18.289, + "args": { + "External id": 3293347,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596621559.008, "dur": 15.112, + "args": { + "External id": 3293348,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596621587.797, "dur": 18.021, + "args": { + "External id": 3293349,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621676.190, "dur": 14.998, + "args": { + "External id": 3293350,"Record function id": 0, "Ev Idx": 3749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621679.711, "dur": 10.701, + "args": { + "External id": 3293351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621683.801, "dur": 5.514, + "args": { + "External id": 3293352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621685.508, "dur": 3.682, + "args": { + "External id": 3293353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621694.929, "dur": 5.517, + "args": { + "External id": 3293354,"Record function id": 0, "Ev Idx": 3753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621696.512, "dur": 3.473, + "args": { + "External id": 3293355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621697.342, "dur": 2.081, + "args": { + "External id": 3293356,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621698.273, "dur": 1.036, + "args": { + "External id": 3293357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621703.564, "dur": 4.340, + "args": { + "External id": 3293358,"Record function id": 0, "Ev Idx": 3757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621704.738, "dur": 2.736, + "args": { + "External id": 3293359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621705.434, "dur": 1.595, + "args": { + "External id": 3293360,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621705.783, "dur": 1.141, + "args": { + "External id": 3293361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621710.932, "dur": 4.608, + "args": { + "External id": 3293362,"Record function id": 0, "Ev Idx": 3761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621712.232, "dur": 2.917, + "args": { + "External id": 3293363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621712.709, "dur": 2.032, + "args": { + "External id": 3293364,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621713.519, "dur": 1.161, + "args": { + "External id": 3293365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621718.506, "dur": 4.198, + "args": { + "External id": 3293366,"Record function id": 0, "Ev Idx": 3765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621719.780, "dur": 2.527, + "args": { + "External id": 3293367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621720.510, "dur": 1.384, + "args": { + "External id": 3293368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621721.269, "dur": 0.551, + "args": { + "External id": 3293369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621725.765, "dur": 3.691, + "args": { + "External id": 3293370,"Record function id": 0, "Ev Idx": 3769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621726.841, "dur": 2.213, + "args": { + "External id": 3293371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621727.527, "dur": 1.097, + "args": { + "External id": 3293372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621727.966, "dur": 0.569, + "args": { + "External id": 3293373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621732.623, "dur": 5.722, + "args": { + "External id": 3293374,"Record function id": 0, "Ev Idx": 3773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621733.589, "dur": 4.340, + "args": { + "External id": 3293375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621734.202, "dur": 3.327, + "args": { + "External id": 3293376,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621734.488, "dur": 2.957, + "args": { + "External id": 3293377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621741.280, "dur": 3.583, + "args": { + "External id": 3293378,"Record function id": 0, "Ev Idx": 3777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621742.451, "dur": 2.020, + "args": { + "External id": 3293379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621742.919, "dur": 1.139, + "args": { + "External id": 3293380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621743.176, "dur": 0.795, + "args": { + "External id": 3293381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621747.803, "dur": 3.285, + "args": { + "External id": 3293382,"Record function id": 0, "Ev Idx": 3781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596621748.770, "dur": 1.910, + "args": { + "External id": 3293383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621749.253, "dur": 1.024, + "args": { + "External id": 3293384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596621749.731, "dur": 0.462, + "args": { + "External id": 3293385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596621754.488, "dur": 36363.369, + "args": { + "External id": 3293386,"Record function id": 0, "Sequence number": 32987131, "Fwd thread id": 1, "Ev Idx": 3785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596621755.649, "dur": 36353.863, + "args": { + "External id": 3293387,"Sequence number": 32987131, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3786 + } + }, + { + "ph": "f", "id": 201, "pid": 1336756, "tid": 1381189, "ts": 1587596621755.649, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 1336756, "tid": 1381189, + "ts": 1587596621783.818, "dur": 40.199, + "args": { + "External id": 3293388,"Record function id": 0, "Ev Idx": 3787 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 1336756, "tid": 1381189, + "ts": 1587596621831.681, "dur": 84.075, + "args": { + "External id": 3293389,"Record function id": 0, "Ev Idx": 3788 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 1336756, "tid": 1381189, + "ts": 1587596621924.099, "dur": 36177.157, + "args": { + "External id": 3293390,"Record function id": 0, "Ev Idx": 3789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596622051.162, "dur": 7.726, + "args": { + "External id": 3293391,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596622070.132, "dur": 5.072, + "args": { + "External id": 3293392,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596622090.289, "dur": 35107.542, + "args": { + "External id": 3293393,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596622103.645, "dur": 35083.958, + "args": { + "External id": 3293394,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596622142.347, "dur": 13.497, + "args": { + "External id": 3293395,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596622161.878, "dur": 34984.810, + "args": { + "External id": 3293396,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596622164.645, "dur": 34981.243, + "args": { + "External id": 3293397,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596622168.966, "dur": 4.638, + "args": { + "External id": 3293398,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596622175.453, "dur": 34966.572, + "args": { + "External id": 3293399,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596657296.921, "dur": 10.848, + "args": { + "External id": 3293400,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596657300.907, "dur": 6.469, + "args": { + "External id": 3293401,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596657341.373, "dur": 398.858, + "args": { + "External id": 3293402,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596657367.931, "dur": 367.689, + "args": { + "External id": 3293403,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3802, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596657379.490, "dur": 350.772, + "args": { + "External id": 3293404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596657761.048, "dur": 2.608, + "args": { + "External id": 3293405,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3804, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596657823.214, "dur": 8.237, + "args": { + "External id": 3293406,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596657896.727, "dur": 2.116, + "args": { + "External id": 3293407,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596657917.329, "dur": 1.298, + "args": { + "External id": 3293408,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596657932.024, "dur": 0.928, + "args": { + "External id": 3293409,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596657943.359, "dur": 2.660, + "args": { + "External id": 3293410,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596657957.991, "dur": 0.948, + "args": { + "External id": 3293411,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596657970.122, "dur": 0.897, + "args": { + "External id": 3293412,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658015.427, "dur": 2.552, + "args": { + "External id": 3293413,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658033.360, "dur": 2.914, + "args": { + "External id": 3293414,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596658134.254, "dur": 2716.285, + "args": { + "External id": 3293415,"Record function id": 0, "Ev Idx": 3814 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 1336756, "tid": 1381189, + "ts": 1587596658153.605, "dur": 1023.040, + "args": { + "External id": 3293416,"Record function id": 0, "Ev Idx": 3815 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 1336756, "tid": 1381189, + "ts": 1587596658170.378, "dur": 327.766, + "args": { + "External id": 3293417,"Record function id": 0, "Ev Idx": 3816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596658255.660, "dur": 4.511, + "args": { + "External id": 3293418,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596658263.250, "dur": 1.211, + "args": { + "External id": 3293419,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596658266.148, "dur": 1.361, + "args": { + "External id": 3293420,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596658269.080, "dur": 0.936, + "args": { + "External id": 3293421,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596658271.529, "dur": 1.094, + "args": { + "External id": 3293422,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596658274.238, "dur": 4.046, + "args": { + "External id": 3293423,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596658279.848, "dur": 3.806, + "args": { + "External id": 3293424,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596658285.108, "dur": 1.014, + "args": { + "External id": 3293425,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596658287.809, "dur": 0.764, + "args": { + "External id": 3293426,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596658289.734, "dur": 0.845, + "args": { + "External id": 3293427,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596658309.242, "dur": 157.522, + "args": { + "External id": 3293428,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596658325.795, "dur": 136.695, + "args": { + "External id": 3293429,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596658348.790, "dur": 13.681, + "args": { + "External id": 3293430,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596658366.074, "dur": 66.980, + "args": { + "External id": 3293431,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596658369.136, "dur": 63.593, + "args": { + "External id": 3293432,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658373.027, "dur": 5.630, + "args": { + "External id": 3293433,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596658380.955, "dur": 51.167, + "args": { + "External id": 3293434,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3833 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 1336756, "tid": 1381189, + "ts": 1587596658580.700, "dur": 588.241, + "args": { + "External id": 3293435,"Record function id": 0, "Ev Idx": 3834 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 1336756, "tid": 1381189, + "ts": 1587596658596.570, "dur": 559.575, + "args": { + "External id": 3293436,"Record function id": 0, "Ev Idx": 3835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596658648.553, "dur": 4.487, + "args": { + "External id": 3293437,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596658667.744, "dur": 28.627, + "args": { + "External id": 3293438,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658672.374, "dur": 1.245, + "args": { + "External id": 3293439,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658675.355, "dur": 2.708, + "args": { + "External id": 3293440,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658679.592, "dur": 0.408, + "args": { + "External id": 3293441,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658681.759, "dur": 0.330, + "args": { + "External id": 3293442,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658683.790, "dur": 0.554, + "args": { + "External id": 3293443,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658685.746, "dur": 0.547, + "args": { + "External id": 3293444,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658688.064, "dur": 0.451, + "args": { + "External id": 3293445,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658690.079, "dur": 0.390, + "args": { + "External id": 3293446,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658691.873, "dur": 0.642, + "args": { + "External id": 3293447,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596658705.336, "dur": 30.972, + "args": { + "External id": 3293448,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596658766.876, "dur": 97.284, + "args": { + "External id": 3293449,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596658775.992, "dur": 5.011, + "args": { + "External id": 3293450,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596658786.281, "dur": 9.397, + "args": { + "External id": 3293451,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596658790.148, "dur": 5.134, + "args": { + "External id": 3293452,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658793.729, "dur": 0.488, + "args": { + "External id": 3293453,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596658802.445, "dur": 25.449, + "args": { + "External id": 3293454,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658804.698, "dur": 0.763, + "args": { + "External id": 3293455,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658807.405, "dur": 0.376, + "args": { + "External id": 3293456,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658809.628, "dur": 0.425, + "args": { + "External id": 3293457,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658811.767, "dur": 0.427, + "args": { + "External id": 3293458,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658814.200, "dur": 0.318, + "args": { + "External id": 3293459,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658816.228, "dur": 0.431, + "args": { + "External id": 3293460,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658818.354, "dur": 2.209, + "args": { + "External id": 3293461,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658822.260, "dur": 0.413, + "args": { + "External id": 3293462,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596658824.543, "dur": 0.356, + "args": { + "External id": 3293463,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596658836.457, "dur": 20.307, + "args": { + "External id": 3293464,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596658933.198, "dur": 153.699, + "args": { + "External id": 3293465,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596658958.438, "dur": 124.081, + "args": { + "External id": 3293466,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3865, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596658968.585, "dur": 108.981, + "args": { + "External id": 3293467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596659102.390, "dur": 2.102, + "args": { + "External id": 3293468,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3867, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596659185.096, "dur": 1646.237, + "args": { + "External id": 3293469,"Sequence number": 32987130, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3868 + } + }, + { + "ph": "f", "id": 202, "pid": 1336756, "tid": 1381189, "ts": 1587596659185.096, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596659302.715, "dur": 105.687, + "args": { + "External id": 3293470,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596659449.372, "dur": 40.880, + "args": { + "External id": 3293471,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596659509.047, "dur": 55.857, + "args": { + "External id": 3293472,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596659575.102, "dur": 33.570, + "args": { + "External id": 3293473,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596659615.815, "dur": 46.729, + "args": { + "External id": 3293474,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596659671.654, "dur": 28.532, + "args": { + "External id": 3293475,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596659707.927, "dur": 42.686, + "args": { + "External id": 3293476,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596659773.024, "dur": 23.034, + "args": { + "External id": 3293477,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596659812.803, "dur": 30.078, + "args": { + "External id": 3293478,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596659860.910, "dur": 39.806, + "args": { + "External id": 3293479,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596659917.981, "dur": 17.495, + "args": { + "External id": 3293480,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596659946.645, "dur": 33.919, + "args": { + "External id": 3293481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596660032.205, "dur": 42.993, + "args": { + "External id": 3293482,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596660103.622, "dur": 173.829, + "args": { + "External id": 3293483,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596660180.963, "dur": 6.458, + "args": { + "External id": 3293484,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596660189.577, "dur": 3.896, + "args": { + "External id": 3293485,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596660310.631, "dur": 27.294, + "args": { + "External id": 3293486,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596660350.590, "dur": 15.657, + "args": { + "External id": 3293487,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596660375.329, "dur": 38.773, + "args": { + "External id": 3293488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596660421.030, "dur": 37.767, + "args": { + "External id": 3293489,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596660465.459, "dur": 21.935, + "args": { + "External id": 3293490,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596660495.326, "dur": 30.940, + "args": { + "External id": 3293491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596660532.795, "dur": 21.183, + "args": { + "External id": 3293492,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596660575.395, "dur": 42.980, + "args": { + "External id": 3293493,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596660637.202, "dur": 24.174, + "args": { + "External id": 3293494,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596660681.109, "dur": 24.135, + "args": { + "External id": 3293495,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596660718.641, "dur": 21.127, + "args": { + "External id": 3293496,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596660754.063, "dur": 15.243, + "args": { + "External id": 3293497,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596660782.993, "dur": 21.160, + "args": { + "External id": 3293498,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660894.388, "dur": 17.288, + "args": { + "External id": 3293499,"Record function id": 0, "Ev Idx": 3898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660898.450, "dur": 11.931, + "args": { + "External id": 3293500,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660902.831, "dur": 6.118, + "args": { + "External id": 3293501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660904.363, "dur": 4.278, + "args": { + "External id": 3293502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660916.243, "dur": 5.811, + "args": { + "External id": 3293503,"Record function id": 0, "Ev Idx": 3902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660917.990, "dur": 3.582, + "args": { + "External id": 3293504,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660918.846, "dur": 2.203, + "args": { + "External id": 3293505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660919.723, "dur": 1.233, + "args": { + "External id": 3293506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660925.610, "dur": 4.762, + "args": { + "External id": 3293507,"Record function id": 0, "Ev Idx": 3906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660927.019, "dur": 2.940, + "args": { + "External id": 3293508,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660927.839, "dur": 1.688, + "args": { + "External id": 3293509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660928.481, "dur": 0.968, + "args": { + "External id": 3293510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660933.696, "dur": 6.546, + "args": { + "External id": 3293511,"Record function id": 0, "Ev Idx": 3910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660935.264, "dur": 4.567, + "args": { + "External id": 3293512,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660935.914, "dur": 3.490, + "args": { + "External id": 3293513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660936.387, "dur": 2.944, + "args": { + "External id": 3293514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660943.282, "dur": 3.973, + "args": { + "External id": 3293515,"Record function id": 0, "Ev Idx": 3914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660944.642, "dur": 2.171, + "args": { + "External id": 3293516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660945.124, "dur": 1.270, + "args": { + "External id": 3293517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660945.505, "dur": 0.817, + "args": { + "External id": 3293518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660950.384, "dur": 4.423, + "args": { + "External id": 3293519,"Record function id": 0, "Ev Idx": 3918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660951.862, "dur": 2.510, + "args": { + "External id": 3293520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660952.570, "dur": 1.404, + "args": { + "External id": 3293521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660953.224, "dur": 0.632, + "args": { + "External id": 3293522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660960.889, "dur": 3.662, + "args": { + "External id": 3293523,"Record function id": 0, "Ev Idx": 3922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660962.072, "dur": 2.070, + "args": { + "External id": 3293524,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660962.564, "dur": 1.159, + "args": { + "External id": 3293525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660962.830, "dur": 0.806, + "args": { + "External id": 3293526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660967.580, "dur": 3.694, + "args": { + "External id": 3293527,"Record function id": 0, "Ev Idx": 3926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660968.750, "dur": 2.091, + "args": { + "External id": 3293528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660969.371, "dur": 1.049, + "args": { + "External id": 3293529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660969.666, "dur": 0.677, + "args": { + "External id": 3293530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660974.171, "dur": 67.357, + "args": { + "External id": 3293531,"Record function id": 0, "Ev Idx": 3930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596660975.256, "dur": 65.012, + "args": { + "External id": 3293532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596660976.062, "dur": 62.973, + "args": { + "External id": 3293533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596661036.726, "dur": 1.915, + "args": { + "External id": 3293534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596661047.610, "dur": 35234.953, + "args": { + "External id": 3293535,"Record function id": 0, "Sequence number": 32987129, "Fwd thread id": 1, "Ev Idx": 3934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596661049.427, "dur": 35224.651, + "args": { + "External id": 3293536,"Sequence number": 32987129, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3935 + } + }, + { + "ph": "f", "id": 203, "pid": 1336756, "tid": 1381189, "ts": 1587596661049.427, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 1336756, "tid": 1381189, + "ts": 1587596661080.894, "dur": 37.231, + "args": { + "External id": 3293537,"Record function id": 0, "Ev Idx": 3936 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 1336756, "tid": 1381189, + "ts": 1587596661126.020, "dur": 63.096, + "args": { + "External id": 3293538,"Record function id": 0, "Ev Idx": 3937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 1336756, "tid": 1381189, + "ts": 1587596661195.691, "dur": 35067.225, + "args": { + "External id": 3293539,"Record function id": 0, "Ev Idx": 3938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596661281.670, "dur": 7.159, + "args": { + "External id": 3293540,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596661298.677, "dur": 4.919, + "args": { + "External id": 3293541,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596661319.026, "dur": 34105.709, + "args": { + "External id": 3293542,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596661331.834, "dur": 34082.419, + "args": { + "External id": 3293543,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596661369.189, "dur": 15.098, + "args": { + "External id": 3293544,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596661390.291, "dur": 33981.771, + "args": { + "External id": 3293545,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596661392.937, "dur": 33978.058, + "args": { + "External id": 3293546,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596661396.721, "dur": 5.298, + "args": { + "External id": 3293547,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596661404.001, "dur": 33962.863, + "args": { + "External id": 3293548,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596695523.883, "dur": 11.030, + "args": { + "External id": 3293549,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596695527.945, "dur": 6.584, + "args": { + "External id": 3293550,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596695568.338, "dur": 355.217, + "args": { + "External id": 3293551,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596695594.321, "dur": 324.266, + "args": { + "External id": 3293552,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3951, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596695604.889, "dur": 307.833, + "args": { + "External id": 3293553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596695943.935, "dur": 2.659, + "args": { + "External id": 3293554,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3953, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696041.120, "dur": 7.255, + "args": { + "External id": 3293555,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696099.532, "dur": 1.677, + "args": { + "External id": 3293556,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696119.057, "dur": 1.139, + "args": { + "External id": 3293557,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696135.304, "dur": 0.884, + "args": { + "External id": 3293558,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696147.514, "dur": 0.938, + "args": { + "External id": 3293559,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696159.413, "dur": 1.088, + "args": { + "External id": 3293560,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696171.784, "dur": 0.855, + "args": { + "External id": 3293561,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696187.144, "dur": 1.848, + "args": { + "External id": 3293562,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696199.861, "dur": 0.800, + "args": { + "External id": 3293563,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596696298.288, "dur": 2756.642, + "args": { + "External id": 3293564,"Record function id": 0, "Ev Idx": 3963 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 1336756, "tid": 1381189, + "ts": 1587596696317.331, "dur": 1011.291, + "args": { + "External id": 3293565,"Record function id": 0, "Ev Idx": 3964 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 1336756, "tid": 1381189, + "ts": 1587596696335.464, "dur": 312.035, + "args": { + "External id": 3293566,"Record function id": 0, "Ev Idx": 3965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596696412.500, "dur": 4.366, + "args": { + "External id": 3293567,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596696420.098, "dur": 0.981, + "args": { + "External id": 3293568,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596696422.935, "dur": 0.845, + "args": { + "External id": 3293569,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596696425.218, "dur": 2.401, + "args": { + "External id": 3293570,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596696429.386, "dur": 1.092, + "args": { + "External id": 3293571,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596696432.467, "dur": 0.884, + "args": { + "External id": 3293572,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596696434.901, "dur": 1.752, + "args": { + "External id": 3293573,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596696438.059, "dur": 0.899, + "args": { + "External id": 3293574,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596696440.372, "dur": 0.775, + "args": { + "External id": 3293575,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596696442.560, "dur": 1.025, + "args": { + "External id": 3293576,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596696462.007, "dur": 153.889, + "args": { + "External id": 3293577,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596696477.570, "dur": 133.904, + "args": { + "External id": 3293578,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596696495.932, "dur": 13.052, + "args": { + "External id": 3293579,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596696512.390, "dur": 67.386, + "args": { + "External id": 3293580,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596696515.136, "dur": 64.217, + "args": { + "External id": 3293581,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696518.848, "dur": 7.806, + "args": { + "External id": 3293582,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596696528.426, "dur": 50.179, + "args": { + "External id": 3293583,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3982 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 1336756, "tid": 1381189, + "ts": 1587596696720.993, "dur": 598.805, + "args": { + "External id": 3293584,"Record function id": 0, "Ev Idx": 3983 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 1336756, "tid": 1381189, + "ts": 1587596696736.525, "dur": 570.746, + "args": { + "External id": 3293585,"Record function id": 0, "Ev Idx": 3984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596696790.671, "dur": 4.733, + "args": { + "External id": 3293586,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596696810.361, "dur": 28.594, + "args": { + "External id": 3293587,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696814.971, "dur": 1.365, + "args": { + "External id": 3293588,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696818.463, "dur": 0.441, + "args": { + "External id": 3293589,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696820.141, "dur": 0.534, + "args": { + "External id": 3293590,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696822.769, "dur": 0.481, + "args": { + "External id": 3293591,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696824.834, "dur": 0.540, + "args": { + "External id": 3293592,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696827.225, "dur": 0.568, + "args": { + "External id": 3293593,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696829.384, "dur": 2.135, + "args": { + "External id": 3293594,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696833.000, "dur": 0.529, + "args": { + "External id": 3293595,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696835.145, "dur": 0.332, + "args": { + "External id": 3293596,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596696848.362, "dur": 51.864, + "args": { + "External id": 3293597,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596696934.198, "dur": 152.559, + "args": { + "External id": 3293598,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596696944.697, "dur": 3.933, + "args": { + "External id": 3293599,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596696954.191, "dur": 10.076, + "args": { + "External id": 3293600,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596696958.319, "dur": 5.493, + "args": { + "External id": 3293601,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696961.903, "dur": 0.574, + "args": { + "External id": 3293602,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596696971.260, "dur": 66.138, + "args": { + "External id": 3293603,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696973.485, "dur": 0.491, + "args": { + "External id": 3293604,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696975.569, "dur": 0.441, + "args": { + "External id": 3293605,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696977.783, "dur": 0.504, + "args": { + "External id": 3293606,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596696980.202, "dur": 38.117, + "args": { + "External id": 3293607,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596697024.118, "dur": 0.535, + "args": { + "External id": 3293608,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596697026.451, "dur": 0.543, + "args": { + "External id": 3293609,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596697028.604, "dur": 0.434, + "args": { + "External id": 3293610,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596697030.943, "dur": 0.572, + "args": { + "External id": 3293611,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596697033.270, "dur": 0.440, + "args": { + "External id": 3293612,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596697050.483, "dur": 25.623, + "args": { + "External id": 3293613,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596697136.190, "dur": 107.549, + "args": { + "External id": 3293614,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596697158.906, "dur": 81.250, + "args": { + "External id": 3293615,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4014, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596697168.997, "dur": 66.935, + "args": { + "External id": 3293616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596697257.090, "dur": 1.786, + "args": { + "External id": 3293617,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4016, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596697335.746, "dur": 1698.269, + "args": { + "External id": 3293618,"Sequence number": 32987128, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4017 + } + }, + { + "ph": "f", "id": 204, "pid": 1336756, "tid": 1381189, "ts": 1587596697335.746, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596697441.250, "dur": 100.968, + "args": { + "External id": 3293619,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596697579.992, "dur": 38.207, + "args": { + "External id": 3293620,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596697634.716, "dur": 52.434, + "args": { + "External id": 3293621,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596697697.102, "dur": 34.029, + "args": { + "External id": 3293622,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596697739.561, "dur": 46.623, + "args": { + "External id": 3293623,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596697793.573, "dur": 28.956, + "args": { + "External id": 3293624,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596697829.622, "dur": 67.974, + "args": { + "External id": 3293625,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596697923.271, "dur": 30.149, + "args": { + "External id": 3293626,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596697973.139, "dur": 71.923, + "args": { + "External id": 3293627,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596698071.490, "dur": 21.061, + "args": { + "External id": 3293628,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596698106.060, "dur": 20.818, + "args": { + "External id": 3293629,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596698136.586, "dur": 39.544, + "args": { + "External id": 3293630,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596698179.856, "dur": 36.363, + "args": { + "External id": 3293631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596698246.909, "dur": 174.624, + "args": { + "External id": 3293632,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596698328.668, "dur": 5.368, + "args": { + "External id": 3293633,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596698336.208, "dur": 1.913, + "args": { + "External id": 3293634,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596698454.457, "dur": 26.524, + "args": { + "External id": 3293635,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596698491.847, "dur": 16.633, + "args": { + "External id": 3293636,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596698517.112, "dur": 36.777, + "args": { + "External id": 3293637,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596698560.623, "dur": 39.340, + "args": { + "External id": 3293638,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596698608.674, "dur": 25.267, + "args": { + "External id": 3293639,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596698639.768, "dur": 31.639, + "args": { + "External id": 3293640,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596698677.782, "dur": 38.970, + "args": { + "External id": 3293641,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596698729.224, "dur": 39.584, + "args": { + "External id": 3293642,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596698786.275, "dur": 22.739, + "args": { + "External id": 3293643,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596698827.823, "dur": 22.529, + "args": { + "External id": 3293644,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596698862.669, "dur": 41.138, + "args": { + "External id": 3293645,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596698924.437, "dur": 15.934, + "args": { + "External id": 3293646,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596698952.431, "dur": 18.701, + "args": { + "External id": 3293647,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699077.220, "dur": 14.687, + "args": { + "External id": 3293648,"Record function id": 0, "Ev Idx": 4047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699080.578, "dur": 10.363, + "args": { + "External id": 3293649,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699084.541, "dur": 5.646, + "args": { + "External id": 3293650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699086.434, "dur": 3.609, + "args": { + "External id": 3293651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699095.773, "dur": 4.992, + "args": { + "External id": 3293652,"Record function id": 0, "Ev Idx": 4051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699096.876, "dur": 3.409, + "args": { + "External id": 3293653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699097.611, "dur": 2.186, + "args": { + "External id": 3293654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699098.651, "dur": 1.056, + "args": { + "External id": 3293655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699104.061, "dur": 6.631, + "args": { + "External id": 3293656,"Record function id": 0, "Ev Idx": 4055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699105.153, "dur": 5.123, + "args": { + "External id": 3293657,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699106.030, "dur": 3.816, + "args": { + "External id": 3293658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699106.538, "dur": 3.224, + "args": { + "External id": 3293659,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699113.949, "dur": 5.271, + "args": { + "External id": 3293660,"Record function id": 0, "Ev Idx": 4059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699115.610, "dur": 3.197, + "args": { + "External id": 3293661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699116.509, "dur": 1.845, + "args": { + "External id": 3293662,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699117.194, "dur": 1.087, + "args": { + "External id": 3293663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699122.272, "dur": 3.579, + "args": { + "External id": 3293664,"Record function id": 0, "Ev Idx": 4063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699123.215, "dur": 2.229, + "args": { + "External id": 3293665,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699123.767, "dur": 1.251, + "args": { + "External id": 3293666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699124.351, "dur": 0.594, + "args": { + "External id": 3293667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699128.912, "dur": 4.101, + "args": { + "External id": 3293668,"Record function id": 0, "Ev Idx": 4067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699130.304, "dur": 2.275, + "args": { + "External id": 3293669,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699130.960, "dur": 1.215, + "args": { + "External id": 3293670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699131.458, "dur": 0.605, + "args": { + "External id": 3293671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699136.242, "dur": 3.849, + "args": { + "External id": 3293672,"Record function id": 0, "Ev Idx": 4071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699137.424, "dur": 2.248, + "args": { + "External id": 3293673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699137.978, "dur": 1.277, + "args": { + "External id": 3293674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699138.489, "dur": 0.669, + "args": { + "External id": 3293675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699143.102, "dur": 3.778, + "args": { + "External id": 3293676,"Record function id": 0, "Ev Idx": 4075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699144.326, "dur": 2.153, + "args": { + "External id": 3293677,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699144.958, "dur": 1.099, + "args": { + "External id": 3293678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699145.274, "dur": 0.717, + "args": { + "External id": 3293679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699149.899, "dur": 3.684, + "args": { + "External id": 3293680,"Record function id": 0, "Ev Idx": 4079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596699150.897, "dur": 2.263, + "args": { + "External id": 3293681,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699151.358, "dur": 1.377, + "args": { + "External id": 3293682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596699151.921, "dur": 0.716, + "args": { + "External id": 3293683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596699157.122, "dur": 35620.745, + "args": { + "External id": 3293684,"Record function id": 0, "Sequence number": 32987127, "Fwd thread id": 1, "Ev Idx": 4083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596699158.441, "dur": 35611.273, + "args": { + "External id": 3293685,"Sequence number": 32987127, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4084 + } + }, + { + "ph": "f", "id": 205, "pid": 1336756, "tid": 1381189, "ts": 1587596699158.441, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 1336756, "tid": 1381189, + "ts": 1587596699187.834, "dur": 36.102, + "args": { + "External id": 3293686,"Record function id": 0, "Ev Idx": 4085 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 1336756, "tid": 1381189, + "ts": 1587596699231.636, "dur": 59.909, + "args": { + "External id": 3293687,"Record function id": 0, "Ev Idx": 4086 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 1336756, "tid": 1381189, + "ts": 1587596699298.720, "dur": 35462.784, + "args": { + "External id": 3293688,"Record function id": 0, "Ev Idx": 4087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596699382.735, "dur": 6.610, + "args": { + "External id": 3293689,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596699398.591, "dur": 6.265, + "args": { + "External id": 3293690,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596699418.166, "dur": 34523.640, + "args": { + "External id": 3293691,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596699431.471, "dur": 34499.676, + "args": { + "External id": 3293692,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596699490.825, "dur": 12.999, + "args": { + "External id": 3293693,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596699509.857, "dur": 34377.540, + "args": { + "External id": 3293694,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596699512.417, "dur": 34373.993, + "args": { + "External id": 3293695,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596699516.920, "dur": 4.567, + "args": { + "External id": 3293696,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596699542.447, "dur": 34339.351, + "args": { + "External id": 3293697,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596734081.098, "dur": 11.182, + "args": { + "External id": 3293698,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596734084.650, "dur": 7.055, + "args": { + "External id": 3293699,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596734123.518, "dur": 349.098, + "args": { + "External id": 3293700,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596734149.580, "dur": 318.290, + "args": { + "External id": 3293701,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4100, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596734160.810, "dur": 302.105, + "args": { + "External id": 3293702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596734491.170, "dur": 2.334, + "args": { + "External id": 3293703,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4102, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596734548.982, "dur": 6.395, + "args": { + "External id": 3293704,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596734600.093, "dur": 1.478, + "args": { + "External id": 3293705,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596734617.537, "dur": 2.579, + "args": { + "External id": 3293706,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596734631.724, "dur": 0.877, + "args": { + "External id": 3293707,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596734644.538, "dur": 0.804, + "args": { + "External id": 3293708,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596734655.905, "dur": 1.202, + "args": { + "External id": 3293709,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596734673.568, "dur": 2.320, + "args": { + "External id": 3293710,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596734687.229, "dur": 2.014, + "args": { + "External id": 3293711,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596734700.383, "dur": 1.005, + "args": { + "External id": 3293712,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596734791.521, "dur": 2767.721, + "args": { + "External id": 3293713,"Record function id": 0, "Ev Idx": 4112 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 1336756, "tid": 1381189, + "ts": 1587596734810.904, "dur": 1007.256, + "args": { + "External id": 3293714,"Record function id": 0, "Ev Idx": 4113 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 1336756, "tid": 1381189, + "ts": 1587596734826.136, "dur": 377.098, + "args": { + "External id": 3293715,"Record function id": 0, "Ev Idx": 4114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596734927.625, "dur": 4.348, + "args": { + "External id": 3293716,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596734935.332, "dur": 0.977, + "args": { + "External id": 3293717,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596734938.407, "dur": 2.352, + "args": { + "External id": 3293718,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596734942.272, "dur": 0.843, + "args": { + "External id": 3293719,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596734944.458, "dur": 0.961, + "args": { + "External id": 3293720,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596734947.016, "dur": 0.932, + "args": { + "External id": 3293721,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596734949.341, "dur": 1.865, + "args": { + "External id": 3293722,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596734952.819, "dur": 1.028, + "args": { + "External id": 3293723,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596734955.151, "dur": 0.779, + "args": { + "External id": 3293724,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596734957.423, "dur": 0.648, + "args": { + "External id": 3293725,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596734976.933, "dur": 191.389, + "args": { + "External id": 3293726,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596735029.312, "dur": 134.644, + "args": { + "External id": 3293727,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596735048.721, "dur": 14.246, + "args": { + "External id": 3293728,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596735066.505, "dur": 67.403, + "args": { + "External id": 3293729,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596735069.173, "dur": 64.372, + "args": { + "External id": 3293730,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735073.238, "dur": 6.223, + "args": { + "External id": 3293731,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596735081.301, "dur": 51.732, + "args": { + "External id": 3293732,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4131 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 1336756, "tid": 1381189, + "ts": 1587596735281.680, "dur": 528.619, + "args": { + "External id": 3293733,"Record function id": 0, "Ev Idx": 4132 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 1336756, "tid": 1381189, + "ts": 1587596735299.413, "dur": 498.530, + "args": { + "External id": 3293734,"Record function id": 0, "Ev Idx": 4133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596735355.940, "dur": 4.628, + "args": { + "External id": 3293735,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596735375.097, "dur": 34.381, + "args": { + "External id": 3293736,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735379.987, "dur": 1.495, + "args": { + "External id": 3293737,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735383.493, "dur": 0.616, + "args": { + "External id": 3293738,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735385.575, "dur": 0.493, + "args": { + "External id": 3293739,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735387.672, "dur": 0.475, + "args": { + "External id": 3293740,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735389.740, "dur": 0.402, + "args": { + "External id": 3293741,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735391.870, "dur": 2.392, + "args": { + "External id": 3293742,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735400.780, "dur": 0.435, + "args": { + "External id": 3293743,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735402.871, "dur": 0.617, + "args": { + "External id": 3293744,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735405.454, "dur": 0.361, + "args": { + "External id": 3293745,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596735419.150, "dur": 33.169, + "args": { + "External id": 3293746,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596735482.694, "dur": 98.890, + "args": { + "External id": 3293747,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596735492.998, "dur": 2.808, + "args": { + "External id": 3293748,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596735500.997, "dur": 9.976, + "args": { + "External id": 3293749,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596735504.951, "dur": 5.615, + "args": { + "External id": 3293750,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735508.778, "dur": 0.745, + "args": { + "External id": 3293751,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596735521.092, "dur": 23.782, + "args": { + "External id": 3293752,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735523.446, "dur": 0.511, + "args": { + "External id": 3293753,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735525.819, "dur": 0.381, + "args": { + "External id": 3293754,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735527.750, "dur": 2.306, + "args": { + "External id": 3293755,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735531.953, "dur": 0.434, + "args": { + "External id": 3293756,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735534.153, "dur": 0.624, + "args": { + "External id": 3293757,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735535.995, "dur": 0.415, + "args": { + "External id": 3293758,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735537.795, "dur": 0.360, + "args": { + "External id": 3293759,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735539.478, "dur": 0.391, + "args": { + "External id": 3293760,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596735541.138, "dur": 0.384, + "args": { + "External id": 3293761,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596735554.031, "dur": 20.165, + "args": { + "External id": 3293762,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596735624.143, "dur": 109.414, + "args": { + "External id": 3293763,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596735646.679, "dur": 83.245, + "args": { + "External id": 3293764,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4163, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596735655.690, "dur": 70.461, + "args": { + "External id": 3293765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596735748.283, "dur": 2.012, + "args": { + "External id": 3293766,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4165, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596735825.014, "dur": 1716.276, + "args": { + "External id": 3293767,"Sequence number": 32987126, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4166 + } + }, + { + "ph": "f", "id": 206, "pid": 1336756, "tid": 1381189, "ts": 1587596735825.014, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596735954.739, "dur": 142.726, + "args": { + "External id": 3293768,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596736138.265, "dur": 39.037, + "args": { + "External id": 3293769,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596736195.107, "dur": 52.512, + "args": { + "External id": 3293770,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596736259.145, "dur": 34.743, + "args": { + "External id": 3293771,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596736300.843, "dur": 46.467, + "args": { + "External id": 3293772,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596736355.094, "dur": 29.098, + "args": { + "External id": 3293773,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596736394.680, "dur": 42.259, + "args": { + "External id": 3293774,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596736462.192, "dur": 23.749, + "args": { + "External id": 3293775,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596736517.785, "dur": 34.542, + "args": { + "External id": 3293776,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596736572.129, "dur": 19.711, + "args": { + "External id": 3293777,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596736609.853, "dur": 16.811, + "args": { + "External id": 3293778,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596736635.252, "dur": 29.172, + "args": { + "External id": 3293779,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596736667.402, "dur": 37.437, + "args": { + "External id": 3293780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596736732.802, "dur": 191.696, + "args": { + "External id": 3293781,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596736810.073, "dur": 5.726, + "args": { + "External id": 3293782,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596736817.850, "dur": 2.298, + "args": { + "External id": 3293783,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596736958.164, "dur": 61.037, + "args": { + "External id": 3293784,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596737033.639, "dur": 19.114, + "args": { + "External id": 3293785,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596737063.849, "dur": 45.959, + "args": { + "External id": 3293786,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596737117.754, "dur": 39.725, + "args": { + "External id": 3293787,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596737166.961, "dur": 25.167, + "args": { + "External id": 3293788,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596737197.726, "dur": 31.767, + "args": { + "External id": 3293789,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596737236.320, "dur": 24.331, + "args": { + "External id": 3293790,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596737281.216, "dur": 45.652, + "args": { + "External id": 3293791,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596737347.793, "dur": 26.952, + "args": { + "External id": 3293792,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596737391.519, "dur": 23.306, + "args": { + "External id": 3293793,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596737431.556, "dur": 19.184, + "args": { + "External id": 3293794,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596737464.936, "dur": 17.649, + "args": { + "External id": 3293795,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596737495.028, "dur": 17.643, + "args": { + "External id": 3293796,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737580.421, "dur": 15.751, + "args": { + "External id": 3293797,"Record function id": 0, "Ev Idx": 4196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737583.916, "dur": 11.331, + "args": { + "External id": 3293798,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737588.488, "dur": 5.825, + "args": { + "External id": 3293799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737590.148, "dur": 4.015, + "args": { + "External id": 3293800,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737599.895, "dur": 5.276, + "args": { + "External id": 3293801,"Record function id": 0, "Ev Idx": 4200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737601.142, "dur": 3.559, + "args": { + "External id": 3293802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737601.915, "dur": 2.273, + "args": { + "External id": 3293803,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737603.029, "dur": 1.065, + "args": { + "External id": 3293804,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737608.453, "dur": 4.279, + "args": { + "External id": 3293805,"Record function id": 0, "Ev Idx": 4204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737610.027, "dur": 2.303, + "args": { + "External id": 3293806,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737610.670, "dur": 1.245, + "args": { + "External id": 3293807,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737611.032, "dur": 0.795, + "args": { + "External id": 3293808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737615.819, "dur": 5.266, + "args": { + "External id": 3293809,"Record function id": 0, "Ev Idx": 4208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737617.017, "dur": 3.668, + "args": { + "External id": 3293810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737617.513, "dur": 2.745, + "args": { + "External id": 3293811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737617.799, "dur": 2.391, + "args": { + "External id": 3293812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737624.169, "dur": 4.005, + "args": { + "External id": 3293813,"Record function id": 0, "Ev Idx": 4212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737625.258, "dur": 2.514, + "args": { + "External id": 3293814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737625.939, "dur": 1.408, + "args": { + "External id": 3293815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737626.200, "dur": 1.079, + "args": { + "External id": 3293816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737631.123, "dur": 3.613, + "args": { + "External id": 3293817,"Record function id": 0, "Ev Idx": 4216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737632.207, "dur": 2.105, + "args": { + "External id": 3293818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737632.691, "dur": 1.180, + "args": { + "External id": 3293819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737633.190, "dur": 0.570, + "args": { + "External id": 3293820,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737638.058, "dur": 3.703, + "args": { + "External id": 3293821,"Record function id": 0, "Ev Idx": 4220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737639.284, "dur": 2.054, + "args": { + "External id": 3293822,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737639.935, "dur": 0.957, + "args": { + "External id": 3293823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737640.211, "dur": 0.597, + "args": { + "External id": 3293824,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737644.733, "dur": 3.660, + "args": { + "External id": 3293825,"Record function id": 0, "Ev Idx": 4224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737645.971, "dur": 2.027, + "args": { + "External id": 3293826,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737646.438, "dur": 1.184, + "args": { + "External id": 3293827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737646.893, "dur": 0.656, + "args": { + "External id": 3293828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737651.327, "dur": 4.662, + "args": { + "External id": 3293829,"Record function id": 0, "Ev Idx": 4228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596737652.809, "dur": 2.761, + "args": { + "External id": 3293830,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737653.491, "dur": 1.665, + "args": { + "External id": 3293831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596737654.314, "dur": 0.742, + "args": { + "External id": 3293832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596737659.351, "dur": 38476.546, + "args": { + "External id": 3293833,"Record function id": 0, "Sequence number": 32987125, "Fwd thread id": 1, "Ev Idx": 4232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596737660.784, "dur": 38466.930, + "args": { + "External id": 3293834,"Sequence number": 32987125, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4233 + } + }, + { + "ph": "f", "id": 207, "pid": 1336756, "tid": 1381189, "ts": 1587596737660.784, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 1336756, "tid": 1381189, + "ts": 1587596737686.775, "dur": 36.280, + "args": { + "External id": 3293835,"Record function id": 0, "Ev Idx": 4234 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 1336756, "tid": 1381189, + "ts": 1587596737730.539, "dur": 59.713, + "args": { + "External id": 3293836,"Record function id": 0, "Ev Idx": 4235 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 1336756, "tid": 1381189, + "ts": 1587596737796.172, "dur": 38322.850, + "args": { + "External id": 3293837,"Record function id": 0, "Ev Idx": 4236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596737900.800, "dur": 7.391, + "args": { + "External id": 3293838,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596737918.416, "dur": 5.202, + "args": { + "External id": 3293839,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596737939.938, "dur": 37318.930, + "args": { + "External id": 3293840,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596737953.223, "dur": 37296.753, + "args": { + "External id": 3293841,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596738048.472, "dur": 16.172, + "args": { + "External id": 3293842,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596738071.138, "dur": 37139.658, + "args": { + "External id": 3293843,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596738073.807, "dur": 37136.069, + "args": { + "External id": 3293844,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596738077.931, "dur": 6.082, + "args": { + "External id": 3293845,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596738085.760, "dur": 37120.518, + "args": { + "External id": 3293846,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596775346.942, "dur": 9.373, + "args": { + "External id": 3293847,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596775350.365, "dur": 5.596, + "args": { + "External id": 3293848,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596775388.394, "dur": 374.185, + "args": { + "External id": 3293849,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596775413.812, "dur": 344.413, + "args": { + "External id": 3293850,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4249, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596775424.317, "dur": 328.149, + "args": { + "External id": 3293851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596775781.353, "dur": 2.398, + "args": { + "External id": 3293852,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4251, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596775840.613, "dur": 6.285, + "args": { + "External id": 3293853,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596775911.445, "dur": 2.133, + "args": { + "External id": 3293854,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596775931.043, "dur": 0.964, + "args": { + "External id": 3293855,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596775946.095, "dur": 0.764, + "args": { + "External id": 3293856,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596775958.438, "dur": 0.893, + "args": { + "External id": 3293857,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596775970.233, "dur": 1.011, + "args": { + "External id": 3293858,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776024.408, "dur": 1.662, + "args": { + "External id": 3293859,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776043.879, "dur": 2.000, + "args": { + "External id": 3293860,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776056.211, "dur": 0.839, + "args": { + "External id": 3293861,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596776151.765, "dur": 2659.976, + "args": { + "External id": 3293862,"Record function id": 0, "Ev Idx": 4261 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 1336756, "tid": 1381189, + "ts": 1587596776170.443, "dur": 979.443, + "args": { + "External id": 3293863,"Record function id": 0, "Ev Idx": 4262 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 1336756, "tid": 1381189, + "ts": 1587596776185.316, "dur": 306.975, + "args": { + "External id": 3293864,"Record function id": 0, "Ev Idx": 4263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596776261.292, "dur": 3.881, + "args": { + "External id": 3293865,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596776268.267, "dur": 0.900, + "args": { + "External id": 3293866,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596776270.877, "dur": 0.766, + "args": { + "External id": 3293867,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596776273.460, "dur": 2.681, + "args": { + "External id": 3293868,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596776277.421, "dur": 0.956, + "args": { + "External id": 3293869,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596776279.510, "dur": 1.006, + "args": { + "External id": 3293870,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596776281.988, "dur": 1.425, + "args": { + "External id": 3293871,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596776285.276, "dur": 1.009, + "args": { + "External id": 3293872,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596776287.523, "dur": 0.851, + "args": { + "External id": 3293873,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596776289.821, "dur": 1.068, + "args": { + "External id": 3293874,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596776308.026, "dur": 154.901, + "args": { + "External id": 3293875,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596776323.779, "dur": 134.704, + "args": { + "External id": 3293876,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596776345.003, "dur": 11.561, + "args": { + "External id": 3293877,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596776359.837, "dur": 70.111, + "args": { + "External id": 3293878,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596776362.658, "dur": 66.955, + "args": { + "External id": 3293879,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776366.587, "dur": 8.359, + "args": { + "External id": 3293880,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596776378.645, "dur": 50.380, + "args": { + "External id": 3293881,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4280 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 1336756, "tid": 1381189, + "ts": 1587596776564.533, "dur": 577.214, + "args": { + "External id": 3293882,"Record function id": 0, "Ev Idx": 4281 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 1336756, "tid": 1381189, + "ts": 1587596776580.490, "dur": 547.677, + "args": { + "External id": 3293883,"Record function id": 0, "Ev Idx": 4282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596776635.214, "dur": 4.403, + "args": { + "External id": 3293884,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596776654.282, "dur": 28.742, + "args": { + "External id": 3293885,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776659.187, "dur": 1.460, + "args": { + "External id": 3293886,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776663.161, "dur": 0.633, + "args": { + "External id": 3293887,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776665.098, "dur": 0.643, + "args": { + "External id": 3293888,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776667.339, "dur": 0.402, + "args": { + "External id": 3293889,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776669.572, "dur": 0.414, + "args": { + "External id": 3293890,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776671.662, "dur": 0.396, + "args": { + "External id": 3293891,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776673.790, "dur": 2.151, + "args": { + "External id": 3293892,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776677.566, "dur": 0.363, + "args": { + "External id": 3293893,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776679.342, "dur": 0.278, + "args": { + "External id": 3293894,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596776692.506, "dur": 29.223, + "args": { + "External id": 3293895,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596776751.026, "dur": 92.799, + "args": { + "External id": 3293896,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596776761.020, "dur": 2.811, + "args": { + "External id": 3293897,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596776769.030, "dur": 9.229, + "args": { + "External id": 3293898,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596776773.064, "dur": 4.748, + "args": { + "External id": 3293899,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776776.303, "dur": 0.425, + "args": { + "External id": 3293900,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596776784.522, "dur": 22.619, + "args": { + "External id": 3293901,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776786.790, "dur": 0.695, + "args": { + "External id": 3293902,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776789.238, "dur": 0.478, + "args": { + "External id": 3293903,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776791.083, "dur": 0.585, + "args": { + "External id": 3293904,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776793.126, "dur": 1.700, + "args": { + "External id": 3293905,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776796.462, "dur": 0.501, + "args": { + "External id": 3293906,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776798.294, "dur": 0.374, + "args": { + "External id": 3293907,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776800.128, "dur": 0.375, + "args": { + "External id": 3293908,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776802.177, "dur": 0.388, + "args": { + "External id": 3293909,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596776804.094, "dur": 0.417, + "args": { + "External id": 3293910,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596776816.986, "dur": 19.819, + "args": { + "External id": 3293911,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596776909.400, "dur": 147.723, + "args": { + "External id": 3293912,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596776930.497, "dur": 122.606, + "args": { + "External id": 3293913,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4312, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596776939.987, "dur": 108.042, + "args": { + "External id": 3293914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596777071.855, "dur": 1.974, + "args": { + "External id": 3293915,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4314, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596777157.134, "dur": 1636.012, + "args": { + "External id": 3293916,"Sequence number": 32987124, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4315 + } + }, + { + "ph": "f", "id": 208, "pid": 1336756, "tid": 1381189, "ts": 1587596777157.134, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596777263.481, "dur": 103.574, + "args": { + "External id": 3293917,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596777405.894, "dur": 37.844, + "args": { + "External id": 3293918,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596777460.785, "dur": 50.497, + "args": { + "External id": 3293919,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596777520.152, "dur": 37.566, + "args": { + "External id": 3293920,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596777566.283, "dur": 49.281, + "args": { + "External id": 3293921,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596777623.737, "dur": 33.125, + "args": { + "External id": 3293922,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596777663.265, "dur": 45.671, + "args": { + "External id": 3293923,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596777729.443, "dur": 22.802, + "args": { + "External id": 3293924,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596777769.675, "dur": 31.411, + "args": { + "External id": 3293925,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596777820.326, "dur": 19.951, + "args": { + "External id": 3293926,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596777852.337, "dur": 35.633, + "args": { + "External id": 3293927,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596777899.710, "dur": 37.844, + "args": { + "External id": 3293928,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596777941.094, "dur": 37.204, + "args": { + "External id": 3293929,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596778044.193, "dur": 174.561, + "args": { + "External id": 3293930,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596778121.821, "dur": 6.120, + "args": { + "External id": 3293931,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596778130.167, "dur": 1.905, + "args": { + "External id": 3293932,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596778250.657, "dur": 25.941, + "args": { + "External id": 3293933,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596778288.362, "dur": 16.383, + "args": { + "External id": 3293934,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596778313.554, "dur": 39.880, + "args": { + "External id": 3293935,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596778360.392, "dur": 38.438, + "args": { + "External id": 3293936,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596778405.526, "dur": 22.387, + "args": { + "External id": 3293937,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596778451.919, "dur": 33.342, + "args": { + "External id": 3293938,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596778495.511, "dur": 21.994, + "args": { + "External id": 3293939,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596778524.253, "dur": 33.780, + "args": { + "External id": 3293940,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596778573.910, "dur": 40.625, + "args": { + "External id": 3293941,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596778635.436, "dur": 23.171, + "args": { + "External id": 3293942,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596778676.400, "dur": 18.176, + "args": { + "External id": 3293943,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596778709.787, "dur": 14.249, + "args": { + "External id": 3293944,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596778741.291, "dur": 17.151, + "args": { + "External id": 3293945,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778833.007, "dur": 14.550, + "args": { + "External id": 3293946,"Record function id": 0, "Ev Idx": 4345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778836.274, "dur": 10.397, + "args": { + "External id": 3293947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778840.198, "dur": 5.727, + "args": { + "External id": 3293948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778842.145, "dur": 3.656, + "args": { + "External id": 3293949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778851.398, "dur": 4.587, + "args": { + "External id": 3293950,"Record function id": 0, "Ev Idx": 4349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778852.656, "dur": 2.852, + "args": { + "External id": 3293951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778853.384, "dur": 1.670, + "args": { + "External id": 3293952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778854.136, "dur": 0.842, + "args": { + "External id": 3293953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778859.307, "dur": 4.074, + "args": { + "External id": 3293954,"Record function id": 0, "Ev Idx": 4353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778860.534, "dur": 2.421, + "args": { + "External id": 3293955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778861.159, "dur": 1.388, + "args": { + "External id": 3293956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778861.472, "dur": 1.002, + "args": { + "External id": 3293957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778884.932, "dur": 6.738, + "args": { + "External id": 3293958,"Record function id": 0, "Ev Idx": 4357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778886.779, "dur": 4.177, + "args": { + "External id": 3293959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778887.974, "dur": 2.176, + "args": { + "External id": 3293960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778888.645, "dur": 1.303, + "args": { + "External id": 3293961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778895.330, "dur": 5.953, + "args": { + "External id": 3293962,"Record function id": 0, "Ev Idx": 4361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778896.708, "dur": 4.139, + "args": { + "External id": 3293963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778897.320, "dur": 3.127, + "args": { + "External id": 3293964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778897.581, "dur": 2.801, + "args": { + "External id": 3293965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778904.393, "dur": 4.631, + "args": { + "External id": 3293966,"Record function id": 0, "Ev Idx": 4365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778905.931, "dur": 2.627, + "args": { + "External id": 3293967,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778906.837, "dur": 1.309, + "args": { + "External id": 3293968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778907.307, "dur": 0.725, + "args": { + "External id": 3293969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778912.297, "dur": 3.432, + "args": { + "External id": 3293970,"Record function id": 0, "Ev Idx": 4369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778913.394, "dur": 1.880, + "args": { + "External id": 3293971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778913.900, "dur": 0.991, + "args": { + "External id": 3293972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778914.175, "dur": 0.618, + "args": { + "External id": 3293973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778918.728, "dur": 3.816, + "args": { + "External id": 3293974,"Record function id": 0, "Ev Idx": 4373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778920.020, "dur": 2.090, + "args": { + "External id": 3293975,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778920.520, "dur": 1.183, + "args": { + "External id": 3293976,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778920.919, "dur": 0.670, + "args": { + "External id": 3293977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778925.508, "dur": 3.953, + "args": { + "External id": 3293978,"Record function id": 0, "Ev Idx": 4377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596778926.561, "dur": 2.456, + "args": { + "External id": 3293979,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778927.139, "dur": 1.456, + "args": { + "External id": 3293980,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596778927.781, "dur": 0.700, + "args": { + "External id": 3293981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596778932.971, "dur": 35987.744, + "args": { + "External id": 3293982,"Record function id": 0, "Sequence number": 32987123, "Fwd thread id": 1, "Ev Idx": 4381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596778934.583, "dur": 35977.344, + "args": { + "External id": 3293983,"Sequence number": 32987123, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4382 + } + }, + { + "ph": "f", "id": 209, "pid": 1336756, "tid": 1381189, "ts": 1587596778934.583, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 1336756, "tid": 1381189, + "ts": 1587596778962.086, "dur": 72.584, + "args": { + "External id": 3293984,"Record function id": 0, "Ev Idx": 4383 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 1336756, "tid": 1381189, + "ts": 1587596779043.825, "dur": 60.914, + "args": { + "External id": 3293985,"Record function id": 0, "Ev Idx": 4384 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 1336756, "tid": 1381189, + "ts": 1587596779111.019, "dur": 35792.003, + "args": { + "External id": 3293986,"Record function id": 0, "Ev Idx": 4385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596779199.496, "dur": 6.547, + "args": { + "External id": 3293987,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596779219.353, "dur": 4.981, + "args": { + "External id": 3293988,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596779239.935, "dur": 34881.817, + "args": { + "External id": 3293989,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596779252.768, "dur": 34857.652, + "args": { + "External id": 3293990,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596779303.787, "dur": 13.121, + "args": { + "External id": 3293991,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596779323.028, "dur": 34745.193, + "args": { + "External id": 3293992,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596779325.828, "dur": 34741.584, + "args": { + "External id": 3293993,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596779332.902, "dur": 7.064, + "args": { + "External id": 3293994,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596779341.776, "dur": 34720.960, + "args": { + "External id": 3293995,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596814221.722, "dur": 10.819, + "args": { + "External id": 3293996,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596814225.448, "dur": 6.689, + "args": { + "External id": 3293997,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596814263.505, "dur": 337.303, + "args": { + "External id": 3293998,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596814288.875, "dur": 307.232, + "args": { + "External id": 3293999,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4398, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596814300.302, "dur": 290.773, + "args": { + "External id": 3294000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596814617.929, "dur": 2.259, + "args": { + "External id": 3294001,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4400, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596814677.165, "dur": 6.451, + "args": { + "External id": 3294002,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596814729.317, "dur": 1.624, + "args": { + "External id": 3294003,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596814747.106, "dur": 0.973, + "args": { + "External id": 3294004,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596814760.207, "dur": 2.993, + "args": { + "External id": 3294005,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596814775.456, "dur": 0.948, + "args": { + "External id": 3294006,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596814787.172, "dur": 0.919, + "args": { + "External id": 3294007,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596814799.348, "dur": 1.120, + "args": { + "External id": 3294008,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596814812.076, "dur": 3.282, + "args": { + "External id": 3294009,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596814825.042, "dur": 0.646, + "args": { + "External id": 3294010,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596814936.573, "dur": 2694.278, + "args": { + "External id": 3294011,"Record function id": 0, "Ev Idx": 4410 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 1336756, "tid": 1381189, + "ts": 1587596814956.568, "dur": 980.806, + "args": { + "External id": 3294012,"Record function id": 0, "Ev Idx": 4411 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 1336756, "tid": 1381189, + "ts": 1587596814971.932, "dur": 351.193, + "args": { + "External id": 3294013,"Record function id": 0, "Ev Idx": 4412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596815089.250, "dur": 4.690, + "args": { + "External id": 3294014,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596815097.376, "dur": 1.034, + "args": { + "External id": 3294015,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596815100.040, "dur": 0.766, + "args": { + "External id": 3294016,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596815102.835, "dur": 0.841, + "args": { + "External id": 3294017,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596815104.860, "dur": 0.982, + "args": { + "External id": 3294018,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596815107.190, "dur": 2.631, + "args": { + "External id": 3294019,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596815111.169, "dur": 1.557, + "args": { + "External id": 3294020,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596815115.053, "dur": 0.646, + "args": { + "External id": 3294021,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596815117.022, "dur": 0.873, + "args": { + "External id": 3294022,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596815119.143, "dur": 0.600, + "args": { + "External id": 3294023,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596815139.106, "dur": 151.481, + "args": { + "External id": 3294024,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596815155.382, "dur": 131.038, + "args": { + "External id": 3294025,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596815177.785, "dur": 12.262, + "args": { + "External id": 3294026,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596815193.627, "dur": 65.104, + "args": { + "External id": 3294027,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596815196.219, "dur": 62.185, + "args": { + "External id": 3294028,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815200.462, "dur": 5.952, + "args": { + "External id": 3294029,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596815208.576, "dur": 48.953, + "args": { + "External id": 3294030,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4429 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 1336756, "tid": 1381189, + "ts": 1587596815398.451, "dur": 531.531, + "args": { + "External id": 3294031,"Record function id": 0, "Ev Idx": 4430 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 1336756, "tid": 1381189, + "ts": 1587596815415.891, "dur": 501.438, + "args": { + "External id": 3294032,"Record function id": 0, "Ev Idx": 4431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596815470.507, "dur": 5.002, + "args": { + "External id": 3294033,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596815490.473, "dur": 30.225, + "args": { + "External id": 3294034,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815495.112, "dur": 3.393, + "args": { + "External id": 3294035,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815500.292, "dur": 0.654, + "args": { + "External id": 3294036,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815502.499, "dur": 0.738, + "args": { + "External id": 3294037,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815504.721, "dur": 0.565, + "args": { + "External id": 3294038,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815507.163, "dur": 0.574, + "args": { + "External id": 3294039,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815509.272, "dur": 0.365, + "args": { + "External id": 3294040,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815511.263, "dur": 0.528, + "args": { + "External id": 3294041,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815513.434, "dur": 0.407, + "args": { + "External id": 3294042,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815515.500, "dur": 2.150, + "args": { + "External id": 3294043,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596815530.219, "dur": 29.795, + "args": { + "External id": 3294044,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596815588.865, "dur": 97.226, + "args": { + "External id": 3294045,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596815598.515, "dur": 2.434, + "args": { + "External id": 3294046,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596815606.156, "dur": 9.582, + "args": { + "External id": 3294047,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596815610.254, "dur": 5.043, + "args": { + "External id": 3294048,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815613.724, "dur": 0.427, + "args": { + "External id": 3294049,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596815622.304, "dur": 22.709, + "args": { + "External id": 3294050,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815624.648, "dur": 0.349, + "args": { + "External id": 3294051,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815626.472, "dur": 0.583, + "args": { + "External id": 3294052,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815628.631, "dur": 0.465, + "args": { + "External id": 3294053,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815630.474, "dur": 0.403, + "args": { + "External id": 3294054,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815632.610, "dur": 0.480, + "args": { + "External id": 3294055,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815634.557, "dur": 2.032, + "args": { + "External id": 3294056,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815638.175, "dur": 0.400, + "args": { + "External id": 3294057,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815639.990, "dur": 0.364, + "args": { + "External id": 3294058,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596815641.598, "dur": 0.307, + "args": { + "External id": 3294059,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596815660.283, "dur": 19.088, + "args": { + "External id": 3294060,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596815727.937, "dur": 106.420, + "args": { + "External id": 3294061,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596815751.444, "dur": 79.754, + "args": { + "External id": 3294062,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4461, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596815760.872, "dur": 66.194, + "args": { + "External id": 3294063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596815846.377, "dur": 1.724, + "args": { + "External id": 3294064,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4463, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596815944.580, "dur": 1667.761, + "args": { + "External id": 3294065,"Sequence number": 32987122, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4464 + } + }, + { + "ph": "f", "id": 210, "pid": 1336756, "tid": 1381189, "ts": 1587596815944.580, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596816092.033, "dur": 111.076, + "args": { + "External id": 3294066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596816243.663, "dur": 40.206, + "args": { + "External id": 3294067,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596816301.592, "dur": 51.345, + "args": { + "External id": 3294068,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596816362.177, "dur": 37.770, + "args": { + "External id": 3294069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596816406.940, "dur": 49.458, + "args": { + "External id": 3294070,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596816465.068, "dur": 29.598, + "args": { + "External id": 3294071,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596816501.391, "dur": 46.831, + "args": { + "External id": 3294072,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596816573.141, "dur": 23.820, + "args": { + "External id": 3294073,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596816614.182, "dur": 33.191, + "args": { + "External id": 3294074,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596816664.135, "dur": 21.868, + "args": { + "External id": 3294075,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596816698.815, "dur": 14.727, + "args": { + "External id": 3294076,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596816723.353, "dur": 28.304, + "args": { + "External id": 3294077,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596816754.752, "dur": 33.433, + "args": { + "External id": 3294078,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596816814.354, "dur": 238.876, + "args": { + "External id": 3294079,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596816912.782, "dur": 9.936, + "args": { + "External id": 3294080,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596816925.063, "dur": 3.743, + "args": { + "External id": 3294081,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596817088.278, "dur": 27.400, + "args": { + "External id": 3294082,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596817126.968, "dur": 16.482, + "args": { + "External id": 3294083,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596817154.014, "dur": 52.936, + "args": { + "External id": 3294084,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596817213.583, "dur": 41.029, + "args": { + "External id": 3294085,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596817261.894, "dur": 22.777, + "args": { + "External id": 3294086,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596817290.909, "dur": 30.798, + "args": { + "External id": 3294087,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596817327.611, "dur": 21.315, + "args": { + "External id": 3294088,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596817356.613, "dur": 30.878, + "args": { + "External id": 3294089,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596817404.085, "dur": 34.694, + "args": { + "External id": 3294090,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596817462.213, "dur": 25.577, + "args": { + "External id": 3294091,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596817501.682, "dur": 18.140, + "args": { + "External id": 3294092,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596817536.850, "dur": 14.171, + "args": { + "External id": 3294093,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596817564.445, "dur": 17.107, + "args": { + "External id": 3294094,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817652.004, "dur": 14.932, + "args": { + "External id": 3294095,"Record function id": 0, "Ev Idx": 4494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817655.285, "dur": 10.682, + "args": { + "External id": 3294096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817659.435, "dur": 5.625, + "args": { + "External id": 3294097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817660.931, "dur": 3.998, + "args": { + "External id": 3294098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817671.070, "dur": 5.061, + "args": { + "External id": 3294099,"Record function id": 0, "Ev Idx": 4498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817672.362, "dur": 3.346, + "args": { + "External id": 3294100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817673.156, "dur": 2.053, + "args": { + "External id": 3294101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817674.192, "dur": 0.918, + "args": { + "External id": 3294102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817679.228, "dur": 4.148, + "args": { + "External id": 3294103,"Record function id": 0, "Ev Idx": 4502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817680.388, "dur": 2.584, + "args": { + "External id": 3294104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817681.109, "dur": 1.427, + "args": { + "External id": 3294105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817681.551, "dur": 0.912, + "args": { + "External id": 3294106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817686.753, "dur": 4.210, + "args": { + "External id": 3294107,"Record function id": 0, "Ev Idx": 4506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817687.780, "dur": 2.780, + "args": { + "External id": 3294108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817688.938, "dur": 1.241, + "args": { + "External id": 3294109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817689.422, "dur": 0.686, + "args": { + "External id": 3294110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817694.322, "dur": 5.216, + "args": { + "External id": 3294111,"Record function id": 0, "Ev Idx": 4510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817695.422, "dur": 3.653, + "args": { + "External id": 3294112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817696.028, "dur": 2.638, + "args": { + "External id": 3294113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817696.353, "dur": 2.251, + "args": { + "External id": 3294114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817702.577, "dur": 3.701, + "args": { + "External id": 3294115,"Record function id": 0, "Ev Idx": 4514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817703.632, "dur": 2.199, + "args": { + "External id": 3294116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817704.076, "dur": 1.343, + "args": { + "External id": 3294117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817704.550, "dur": 0.758, + "args": { + "External id": 3294118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817709.420, "dur": 6.625, + "args": { + "External id": 3294119,"Record function id": 0, "Ev Idx": 4518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817713.752, "dur": 1.870, + "args": { + "External id": 3294120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817714.314, "dur": 0.880, + "args": { + "External id": 3294121,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817714.610, "dur": 0.522, + "args": { + "External id": 3294122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817719.027, "dur": 4.162, + "args": { + "External id": 3294123,"Record function id": 0, "Ev Idx": 4522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817720.364, "dur": 2.412, + "args": { + "External id": 3294124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817720.935, "dur": 1.437, + "args": { + "External id": 3294125,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817721.212, "dur": 1.062, + "args": { + "External id": 3294126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817726.076, "dur": 3.872, + "args": { + "External id": 3294127,"Record function id": 0, "Ev Idx": 4526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596817727.034, "dur": 2.520, + "args": { + "External id": 3294128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817727.643, "dur": 1.269, + "args": { + "External id": 3294129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596817728.157, "dur": 0.642, + "args": { + "External id": 3294130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596817733.203, "dur": 36532.364, + "args": { + "External id": 3294131,"Record function id": 0, "Sequence number": 32987121, "Fwd thread id": 1, "Ev Idx": 4530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596817734.482, "dur": 36522.519, + "args": { + "External id": 3294132,"Sequence number": 32987121, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4531 + } + }, + { + "ph": "f", "id": 211, "pid": 1336756, "tid": 1381189, "ts": 1587596817734.482, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 1336756, "tid": 1381189, + "ts": 1587596817761.734, "dur": 39.098, + "args": { + "External id": 3294133,"Record function id": 0, "Ev Idx": 4532 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 1336756, "tid": 1381189, + "ts": 1587596817808.306, "dur": 78.514, + "args": { + "External id": 3294134,"Record function id": 0, "Ev Idx": 4533 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 1336756, "tid": 1381189, + "ts": 1587596817894.786, "dur": 36354.317, + "args": { + "External id": 3294135,"Record function id": 0, "Ev Idx": 4534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596818015.969, "dur": 7.931, + "args": { + "External id": 3294136,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596818035.676, "dur": 5.105, + "args": { + "External id": 3294137,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596818055.771, "dur": 35368.017, + "args": { + "External id": 3294138,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596818069.171, "dur": 35344.705, + "args": { + "External id": 3294139,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596818127.685, "dur": 14.188, + "args": { + "External id": 3294140,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596818148.435, "dur": 35223.157, + "args": { + "External id": 3294141,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596818151.068, "dur": 35219.724, + "args": { + "External id": 3294142,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596818157.905, "dur": 4.602, + "args": { + "External id": 3294143,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596818164.499, "dur": 35202.097, + "args": { + "External id": 3294144,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596853520.304, "dur": 10.359, + "args": { + "External id": 3294145,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596853523.896, "dur": 6.382, + "args": { + "External id": 3294146,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596853561.474, "dur": 357.075, + "args": { + "External id": 3294147,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596853586.297, "dur": 327.050, + "args": { + "External id": 3294148,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4547, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596853597.018, "dur": 310.606, + "args": { + "External id": 3294149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596853938.133, "dur": 2.387, + "args": { + "External id": 3294150,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4549, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854037.460, "dur": 7.085, + "args": { + "External id": 3294151,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854092.738, "dur": 1.487, + "args": { + "External id": 3294152,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854110.270, "dur": 1.174, + "args": { + "External id": 3294153,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854122.827, "dur": 1.004, + "args": { + "External id": 3294154,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854139.085, "dur": 0.887, + "args": { + "External id": 3294155,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854152.076, "dur": 1.057, + "args": { + "External id": 3294156,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854163.468, "dur": 0.776, + "args": { + "External id": 3294157,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854175.131, "dur": 1.868, + "args": { + "External id": 3294158,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854186.299, "dur": 0.733, + "args": { + "External id": 3294159,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596854280.598, "dur": 2735.624, + "args": { + "External id": 3294160,"Record function id": 0, "Ev Idx": 4559 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 1336756, "tid": 1381189, + "ts": 1587596854299.874, "dur": 1014.393, + "args": { + "External id": 3294161,"Record function id": 0, "Ev Idx": 4560 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 1336756, "tid": 1381189, + "ts": 1587596854314.273, "dur": 294.252, + "args": { + "External id": 3294162,"Record function id": 0, "Ev Idx": 4561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596854388.371, "dur": 3.707, + "args": { + "External id": 3294163,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596854394.944, "dur": 0.681, + "args": { + "External id": 3294164,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596854398.677, "dur": 0.970, + "args": { + "External id": 3294165,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596854401.257, "dur": 0.898, + "args": { + "External id": 3294166,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596854404.101, "dur": 0.708, + "args": { + "External id": 3294167,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596854406.492, "dur": 0.491, + "args": { + "External id": 3294168,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596854410.220, "dur": 0.924, + "args": { + "External id": 3294169,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596854412.494, "dur": 2.868, + "args": { + "External id": 3294170,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596854416.888, "dur": 1.093, + "args": { + "External id": 3294171,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596854419.695, "dur": 1.038, + "args": { + "External id": 3294172,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596854439.410, "dur": 141.976, + "args": { + "External id": 3294173,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596854455.559, "dur": 121.835, + "args": { + "External id": 3294174,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596854470.003, "dur": 13.052, + "args": { + "External id": 3294175,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596854486.385, "dur": 63.817, + "args": { + "External id": 3294176,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596854489.121, "dur": 60.755, + "args": { + "External id": 3294177,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854492.415, "dur": 5.614, + "args": { + "External id": 3294178,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596854499.945, "dur": 49.202, + "args": { + "External id": 3294179,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4578 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 1336756, "tid": 1381189, + "ts": 1587596854679.217, "dur": 623.897, + "args": { + "External id": 3294180,"Record function id": 0, "Ev Idx": 4579 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 1336756, "tid": 1381189, + "ts": 1587596854695.257, "dur": 594.966, + "args": { + "External id": 3294181,"Record function id": 0, "Ev Idx": 4580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596854745.997, "dur": 4.367, + "args": { + "External id": 3294182,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596854768.903, "dur": 26.178, + "args": { + "External id": 3294183,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854773.094, "dur": 1.257, + "args": { + "External id": 3294184,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854776.092, "dur": 0.377, + "args": { + "External id": 3294185,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854777.539, "dur": 2.476, + "args": { + "External id": 3294186,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854782.112, "dur": 0.271, + "args": { + "External id": 3294187,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854783.910, "dur": 0.330, + "args": { + "External id": 3294188,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854785.878, "dur": 0.282, + "args": { + "External id": 3294189,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854787.649, "dur": 0.271, + "args": { + "External id": 3294190,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854789.627, "dur": 0.566, + "args": { + "External id": 3294191,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854791.583, "dur": 0.338, + "args": { + "External id": 3294192,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596854805.442, "dur": 29.484, + "args": { + "External id": 3294193,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596854884.867, "dur": 172.179, + "args": { + "External id": 3294194,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596854896.522, "dur": 4.348, + "args": { + "External id": 3294195,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596854906.477, "dur": 12.138, + "args": { + "External id": 3294196,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596854910.395, "dur": 7.778, + "args": { + "External id": 3294197,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854914.098, "dur": 2.314, + "args": { + "External id": 3294198,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596854928.916, "dur": 25.161, + "args": { + "External id": 3294199,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854931.297, "dur": 0.619, + "args": { + "External id": 3294200,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854933.988, "dur": 0.376, + "args": { + "External id": 3294201,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854935.945, "dur": 0.261, + "args": { + "External id": 3294202,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854938.048, "dur": 0.378, + "args": { + "External id": 3294203,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854940.136, "dur": 0.291, + "args": { + "External id": 3294204,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854941.655, "dur": 0.323, + "args": { + "External id": 3294205,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854943.277, "dur": 0.246, + "args": { + "External id": 3294206,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854945.063, "dur": 2.041, + "args": { + "External id": 3294207,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596854948.312, "dur": 0.246, + "args": { + "External id": 3294208,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596855018.116, "dur": 27.910, + "args": { + "External id": 3294209,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596855106.608, "dur": 110.485, + "args": { + "External id": 3294210,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596855131.248, "dur": 82.372, + "args": { + "External id": 3294211,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4610, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596855140.710, "dur": 68.325, + "args": { + "External id": 3294212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596855235.170, "dur": 1.785, + "args": { + "External id": 3294213,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4612, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596855322.016, "dur": 1639.517, + "args": { + "External id": 3294214,"Sequence number": 32987120, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4613 + } + }, + { + "ph": "f", "id": 212, "pid": 1336756, "tid": 1381189, "ts": 1587596855322.016, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596855427.104, "dur": 105.749, + "args": { + "External id": 3294215,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596855577.586, "dur": 43.095, + "args": { + "External id": 3294216,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596855635.833, "dur": 52.628, + "args": { + "External id": 3294217,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596855699.516, "dur": 33.233, + "args": { + "External id": 3294218,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596855743.303, "dur": 45.303, + "args": { + "External id": 3294219,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596855795.489, "dur": 27.655, + "args": { + "External id": 3294220,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596855831.422, "dur": 61.502, + "args": { + "External id": 3294221,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596855923.415, "dur": 33.092, + "args": { + "External id": 3294222,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596855975.158, "dur": 71.790, + "args": { + "External id": 3294223,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596856071.992, "dur": 21.272, + "args": { + "External id": 3294224,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596856105.073, "dur": 15.629, + "args": { + "External id": 3294225,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596856129.400, "dur": 35.063, + "args": { + "External id": 3294226,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596856167.580, "dur": 33.613, + "args": { + "External id": 3294227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596856229.250, "dur": 172.484, + "args": { + "External id": 3294228,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596856306.806, "dur": 5.743, + "args": { + "External id": 3294229,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596856314.733, "dur": 2.404, + "args": { + "External id": 3294230,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596856431.207, "dur": 25.009, + "args": { + "External id": 3294231,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596856468.244, "dur": 18.722, + "args": { + "External id": 3294232,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596856493.799, "dur": 33.584, + "args": { + "External id": 3294233,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596856536.212, "dur": 34.433, + "args": { + "External id": 3294234,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596856578.405, "dur": 24.571, + "args": { + "External id": 3294235,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596856607.759, "dur": 30.733, + "args": { + "External id": 3294236,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596856644.598, "dur": 24.420, + "args": { + "External id": 3294237,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596856675.788, "dur": 31.276, + "args": { + "External id": 3294238,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596856723.741, "dur": 37.255, + "args": { + "External id": 3294239,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596856783.736, "dur": 25.808, + "args": { + "External id": 3294240,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596856822.942, "dur": 18.626, + "args": { + "External id": 3294241,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596856856.135, "dur": 32.611, + "args": { + "External id": 3294242,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596856910.503, "dur": 19.535, + "args": { + "External id": 3294243,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857038.911, "dur": 16.857, + "args": { + "External id": 3294244,"Record function id": 0, "Ev Idx": 4643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857042.811, "dur": 11.664, + "args": { + "External id": 3294245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857047.126, "dur": 5.958, + "args": { + "External id": 3294246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857048.684, "dur": 4.269, + "args": { + "External id": 3294247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857059.431, "dur": 4.653, + "args": { + "External id": 3294248,"Record function id": 0, "Ev Idx": 4647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857060.869, "dur": 2.815, + "args": { + "External id": 3294249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857061.454, "dur": 1.697, + "args": { + "External id": 3294250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857062.242, "dur": 0.842, + "args": { + "External id": 3294251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857067.372, "dur": 7.258, + "args": { + "External id": 3294252,"Record function id": 0, "Ev Idx": 4651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857068.430, "dur": 5.795, + "args": { + "External id": 3294253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857068.975, "dur": 1.426, + "args": { + "External id": 3294254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857069.542, "dur": 0.787, + "args": { + "External id": 3294255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857077.660, "dur": 3.805, + "args": { + "External id": 3294256,"Record function id": 0, "Ev Idx": 4655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857078.732, "dur": 2.319, + "args": { + "External id": 3294257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857079.339, "dur": 1.298, + "args": { + "External id": 3294258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857079.824, "dur": 0.744, + "args": { + "External id": 3294259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857084.437, "dur": 5.610, + "args": { + "External id": 3294260,"Record function id": 0, "Ev Idx": 4659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857086.123, "dur": 3.522, + "args": { + "External id": 3294261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857086.628, "dur": 2.546, + "args": { + "External id": 3294262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857086.916, "dur": 2.187, + "args": { + "External id": 3294263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857093.175, "dur": 3.751, + "args": { + "External id": 3294264,"Record function id": 0, "Ev Idx": 4663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857094.295, "dur": 2.180, + "args": { + "External id": 3294265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857094.827, "dur": 1.229, + "args": { + "External id": 3294266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857095.259, "dur": 0.678, + "args": { + "External id": 3294267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857100.196, "dur": 3.793, + "args": { + "External id": 3294268,"Record function id": 0, "Ev Idx": 4667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857101.284, "dur": 2.267, + "args": { + "External id": 3294269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857101.941, "dur": 1.188, + "args": { + "External id": 3294270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857102.297, "dur": 0.725, + "args": { + "External id": 3294271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857106.968, "dur": 3.595, + "args": { + "External id": 3294272,"Record function id": 0, "Ev Idx": 4671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857108.012, "dur": 2.140, + "args": { + "External id": 3294273,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857108.592, "dur": 1.134, + "args": { + "External id": 3294274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857108.855, "dur": 0.750, + "args": { + "External id": 3294275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857113.536, "dur": 4.104, + "args": { + "External id": 3294276,"Record function id": 0, "Ev Idx": 4675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596857114.719, "dur": 2.464, + "args": { + "External id": 3294277,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857115.211, "dur": 1.556, + "args": { + "External id": 3294278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596857116.092, "dur": 0.559, + "args": { + "External id": 3294279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596857121.195, "dur": 37248.022, + "args": { + "External id": 3294280,"Record function id": 0, "Sequence number": 32987119, "Fwd thread id": 1, "Ev Idx": 4679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596857122.534, "dur": 37238.487, + "args": { + "External id": 3294281,"Sequence number": 32987119, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4680 + } + }, + { + "ph": "f", "id": 213, "pid": 1336756, "tid": 1381189, "ts": 1587596857122.534, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 1336756, "tid": 1381189, + "ts": 1587596857153.795, "dur": 40.664, + "args": { + "External id": 3294282,"Record function id": 0, "Ev Idx": 4681 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 1336756, "tid": 1381189, + "ts": 1587596857202.258, "dur": 63.031, + "args": { + "External id": 3294283,"Record function id": 0, "Ev Idx": 4682 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 1336756, "tid": 1381189, + "ts": 1587596857271.139, "dur": 37081.940, + "args": { + "External id": 3294284,"Record function id": 0, "Ev Idx": 4683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596857359.372, "dur": 6.772, + "args": { + "External id": 3294285,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596857375.569, "dur": 4.754, + "args": { + "External id": 3294286,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596857393.935, "dur": 36102.669, + "args": { + "External id": 3294287,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596857406.362, "dur": 36080.288, + "args": { + "External id": 3294288,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596857451.676, "dur": 13.419, + "args": { + "External id": 3294289,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596857471.299, "dur": 35975.091, + "args": { + "External id": 3294290,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596857473.866, "dur": 35971.706, + "args": { + "External id": 3294291,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596857478.040, "dur": 5.292, + "args": { + "External id": 3294292,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596857485.041, "dur": 35956.227, + "args": { + "External id": 3294293,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596893590.977, "dur": 9.938, + "args": { + "External id": 3294294,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596893594.112, "dur": 6.409, + "args": { + "External id": 3294295,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596893633.280, "dur": 426.926, + "args": { + "External id": 3294296,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596893664.157, "dur": 390.778, + "args": { + "External id": 3294297,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4696, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596893674.486, "dur": 374.298, + "args": { + "External id": 3294298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596894083.851, "dur": 2.535, + "args": { + "External id": 3294299,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4698, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894149.635, "dur": 8.875, + "args": { + "External id": 3294300,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894201.827, "dur": 1.413, + "args": { + "External id": 3294301,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894218.567, "dur": 1.288, + "args": { + "External id": 3294302,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894232.164, "dur": 0.679, + "args": { + "External id": 3294303,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894244.187, "dur": 2.835, + "args": { + "External id": 3294304,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894257.241, "dur": 0.975, + "args": { + "External id": 3294305,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894269.087, "dur": 0.811, + "args": { + "External id": 3294306,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894281.607, "dur": 1.362, + "args": { + "External id": 3294307,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894292.742, "dur": 2.639, + "args": { + "External id": 3294308,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596894382.390, "dur": 2729.687, + "args": { + "External id": 3294309,"Record function id": 0, "Ev Idx": 4708 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 1336756, "tid": 1381189, + "ts": 1587596894401.160, "dur": 1011.124, + "args": { + "External id": 3294310,"Record function id": 0, "Ev Idx": 4709 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 1336756, "tid": 1381189, + "ts": 1587596894415.038, "dur": 297.587, + "args": { + "External id": 3294311,"Record function id": 0, "Ev Idx": 4710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596894489.039, "dur": 4.003, + "args": { + "External id": 3294312,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596894496.046, "dur": 0.933, + "args": { + "External id": 3294313,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596894498.398, "dur": 1.210, + "args": { + "External id": 3294314,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596894502.119, "dur": 0.915, + "args": { + "External id": 3294315,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596894504.559, "dur": 0.625, + "args": { + "External id": 3294316,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596894506.884, "dur": 0.896, + "args": { + "External id": 3294317,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596894509.529, "dur": 0.984, + "args": { + "External id": 3294318,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596894512.485, "dur": 2.329, + "args": { + "External id": 3294319,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596894516.109, "dur": 0.835, + "args": { + "External id": 3294320,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596894518.597, "dur": 1.386, + "args": { + "External id": 3294321,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596894538.553, "dur": 147.614, + "args": { + "External id": 3294322,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596894554.132, "dur": 127.974, + "args": { + "External id": 3294323,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596894572.516, "dur": 13.162, + "args": { + "External id": 3294324,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596894588.478, "dur": 65.347, + "args": { + "External id": 3294325,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596894591.072, "dur": 62.454, + "args": { + "External id": 3294326,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894597.620, "dur": 4.916, + "args": { + "External id": 3294327,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596894604.046, "dur": 48.761, + "args": { + "External id": 3294328,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4727 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 1336756, "tid": 1381189, + "ts": 1587596894783.521, "dur": 621.151, + "args": { + "External id": 3294329,"Record function id": 0, "Ev Idx": 4728 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 1336756, "tid": 1381189, + "ts": 1587596894799.497, "dur": 593.129, + "args": { + "External id": 3294330,"Record function id": 0, "Ev Idx": 4729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596894850.064, "dur": 4.624, + "args": { + "External id": 3294331,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596894894.248, "dur": 29.372, + "args": { + "External id": 3294332,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894899.347, "dur": 1.408, + "args": { + "External id": 3294333,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894902.870, "dur": 0.461, + "args": { + "External id": 3294334,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894905.918, "dur": 1.715, + "args": { + "External id": 3294335,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894909.040, "dur": 0.440, + "args": { + "External id": 3294336,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894910.886, "dur": 0.786, + "args": { + "External id": 3294337,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894913.146, "dur": 0.307, + "args": { + "External id": 3294338,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894914.903, "dur": 0.374, + "args": { + "External id": 3294339,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894916.819, "dur": 0.586, + "args": { + "External id": 3294340,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596894918.590, "dur": 0.326, + "args": { + "External id": 3294341,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596894934.122, "dur": 34.881, + "args": { + "External id": 3294342,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596895039.917, "dur": 123.154, + "args": { + "External id": 3294343,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596895051.342, "dur": 5.164, + "args": { + "External id": 3294344,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596895061.722, "dur": 12.230, + "args": { + "External id": 3294345,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596895065.552, "dur": 7.993, + "args": { + "External id": 3294346,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596895069.268, "dur": 2.569, + "args": { + "External id": 3294347,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596895081.021, "dur": 24.695, + "args": { + "External id": 3294348,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596895083.551, "dur": 0.515, + "args": { + "External id": 3294349,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596895085.971, "dur": 0.316, + "args": { + "External id": 3294350,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596895090.743, "dur": 0.303, + "args": { + "External id": 3294351,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596895092.554, "dur": 0.518, + "args": { + "External id": 3294352,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596895094.569, "dur": 0.398, + "args": { + "External id": 3294353,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596895096.398, "dur": 0.295, + "args": { + "External id": 3294354,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596895098.010, "dur": 0.451, + "args": { + "External id": 3294355,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596895099.665, "dur": 1.837, + "args": { + "External id": 3294356,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596895102.797, "dur": 0.325, + "args": { + "External id": 3294357,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596895124.068, "dur": 31.235, + "args": { + "External id": 3294358,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596895212.664, "dur": 112.010, + "args": { + "External id": 3294359,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596895239.303, "dur": 82.106, + "args": { + "External id": 3294360,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4759, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596895248.954, "dur": 68.587, + "args": { + "External id": 3294361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596895340.168, "dur": 2.003, + "args": { + "External id": 3294362,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4761, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596895418.865, "dur": 1672.330, + "args": { + "External id": 3294363,"Sequence number": 32987118, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4762 + } + }, + { + "ph": "f", "id": 214, "pid": 1336756, "tid": 1381189, "ts": 1587596895418.865, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596895531.564, "dur": 103.465, + "args": { + "External id": 3294364,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596895677.036, "dur": 41.886, + "args": { + "External id": 3294365,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596895733.581, "dur": 51.843, + "args": { + "External id": 3294366,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596895796.602, "dur": 31.458, + "args": { + "External id": 3294367,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596895834.014, "dur": 66.080, + "args": { + "External id": 3294368,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596895910.249, "dur": 31.202, + "args": { + "External id": 3294369,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596895948.561, "dur": 81.951, + "args": { + "External id": 3294370,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596896059.445, "dur": 28.834, + "args": { + "External id": 3294371,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596896109.248, "dur": 30.387, + "args": { + "External id": 3294372,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596896159.421, "dur": 19.516, + "args": { + "External id": 3294373,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596896193.361, "dur": 14.328, + "args": { + "External id": 3294374,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596896215.210, "dur": 33.769, + "args": { + "External id": 3294375,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596896252.195, "dur": 36.139, + "args": { + "External id": 3294376,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596896324.122, "dur": 173.675, + "args": { + "External id": 3294377,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596896401.229, "dur": 5.727, + "args": { + "External id": 3294378,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596896408.671, "dur": 2.186, + "args": { + "External id": 3294379,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596896530.039, "dur": 22.737, + "args": { + "External id": 3294380,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596896564.059, "dur": 15.775, + "args": { + "External id": 3294381,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596896586.866, "dur": 32.519, + "args": { + "External id": 3294382,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596896624.831, "dur": 33.970, + "args": { + "External id": 3294383,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596896666.253, "dur": 21.144, + "args": { + "External id": 3294384,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596896691.627, "dur": 29.325, + "args": { + "External id": 3294385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596896726.292, "dur": 21.044, + "args": { + "External id": 3294386,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596896754.325, "dur": 32.522, + "args": { + "External id": 3294387,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596896805.332, "dur": 25.089, + "args": { + "External id": 3294388,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596896849.857, "dur": 58.732, + "args": { + "External id": 3294389,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596896933.896, "dur": 20.183, + "args": { + "External id": 3294390,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596896972.944, "dur": 48.920, + "args": { + "External id": 3294391,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596897040.697, "dur": 19.395, + "args": { + "External id": 3294392,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897136.398, "dur": 15.578, + "args": { + "External id": 3294393,"Record function id": 0, "Ev Idx": 4792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897139.743, "dur": 11.359, + "args": { + "External id": 3294394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897144.144, "dur": 6.042, + "args": { + "External id": 3294395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897145.813, "dur": 4.256, + "args": { + "External id": 3294396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897155.845, "dur": 4.884, + "args": { + "External id": 3294397,"Record function id": 0, "Ev Idx": 4796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897157.415, "dur": 2.847, + "args": { + "External id": 3294398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897158.326, "dur": 1.453, + "args": { + "External id": 3294399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897158.922, "dur": 0.782, + "args": { + "External id": 3294400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897164.271, "dur": 3.804, + "args": { + "External id": 3294401,"Record function id": 0, "Ev Idx": 4800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897165.549, "dur": 2.118, + "args": { + "External id": 3294402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897166.024, "dur": 1.236, + "args": { + "External id": 3294403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897166.333, "dur": 0.858, + "args": { + "External id": 3294404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897171.151, "dur": 4.427, + "args": { + "External id": 3294405,"Record function id": 0, "Ev Idx": 4804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897172.534, "dur": 2.610, + "args": { + "External id": 3294406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897173.265, "dur": 1.458, + "args": { + "External id": 3294407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897173.712, "dur": 0.920, + "args": { + "External id": 3294408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897178.586, "dur": 9.207, + "args": { + "External id": 3294409,"Record function id": 0, "Ev Idx": 4808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897179.789, "dur": 7.555, + "args": { + "External id": 3294410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897180.398, "dur": 6.533, + "args": { + "External id": 3294411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897180.847, "dur": 5.977, + "args": { + "External id": 3294412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897190.791, "dur": 3.435, + "args": { + "External id": 3294413,"Record function id": 0, "Ev Idx": 4812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897191.836, "dur": 1.948, + "args": { + "External id": 3294414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897192.286, "dur": 1.089, + "args": { + "External id": 3294415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897192.543, "dur": 0.717, + "args": { + "External id": 3294416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897197.413, "dur": 5.215, + "args": { + "External id": 3294417,"Record function id": 0, "Ev Idx": 4816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897198.660, "dur": 3.537, + "args": { + "External id": 3294418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897199.158, "dur": 2.627, + "args": { + "External id": 3294419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897201.035, "dur": 0.669, + "args": { + "External id": 3294420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897205.662, "dur": 3.502, + "args": { + "External id": 3294421,"Record function id": 0, "Ev Idx": 4820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897206.751, "dur": 1.971, + "args": { + "External id": 3294422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897207.295, "dur": 1.000, + "args": { + "External id": 3294423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897207.570, "dur": 0.651, + "args": { + "External id": 3294424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897212.170, "dur": 3.308, + "args": { + "External id": 3294425,"Record function id": 0, "Ev Idx": 4824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596897213.188, "dur": 1.845, + "args": { + "External id": 3294426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897213.616, "dur": 1.007, + "args": { + "External id": 3294427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596897213.897, "dur": 0.655, + "args": { + "External id": 3294428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596897219.182, "dur": 36501.820, + "args": { + "External id": 3294429,"Record function id": 0, "Sequence number": 32987117, "Fwd thread id": 1, "Ev Idx": 4828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596897220.424, "dur": 36492.218, + "args": { + "External id": 3294430,"Sequence number": 32987117, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4829 + } + }, + { + "ph": "f", "id": 215, "pid": 1336756, "tid": 1381189, "ts": 1587596897220.424, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 1336756, "tid": 1381189, + "ts": 1587596897246.303, "dur": 33.524, + "args": { + "External id": 3294431,"Record function id": 0, "Ev Idx": 4830 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 1336756, "tid": 1381189, + "ts": 1587596897289.260, "dur": 59.637, + "args": { + "External id": 3294432,"Record function id": 0, "Ev Idx": 4831 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 1336756, "tid": 1381189, + "ts": 1587596897354.042, "dur": 36350.864, + "args": { + "External id": 3294433,"Record function id": 0, "Ev Idx": 4832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596897439.843, "dur": 6.165, + "args": { + "External id": 3294434,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596897455.389, "dur": 7.757, + "args": { + "External id": 3294435,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596897476.768, "dur": 35454.929, + "args": { + "External id": 3294436,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596897490.810, "dur": 35430.011, + "args": { + "External id": 3294437,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596897528.887, "dur": 13.581, + "args": { + "External id": 3294438,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596897548.870, "dur": 35330.259, + "args": { + "External id": 3294439,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596897551.736, "dur": 35326.610, + "args": { + "External id": 3294440,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596897555.664, "dur": 5.160, + "args": { + "External id": 3294441,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596897562.530, "dur": 35311.338, + "args": { + "External id": 3294442,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596933049.437, "dur": 10.830, + "args": { + "External id": 3294443,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596933052.600, "dur": 7.051, + "args": { + "External id": 3294444,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596933092.183, "dur": 339.828, + "args": { + "External id": 3294445,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596933120.770, "dur": 306.375, + "args": { + "External id": 3294446,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4845, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596933131.380, "dur": 290.134, + "args": { + "External id": 3294447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596933452.072, "dur": 2.168, + "args": { + "External id": 3294448,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4847, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596933508.422, "dur": 6.625, + "args": { + "External id": 3294449,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596933559.797, "dur": 1.628, + "args": { + "External id": 3294450,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596933576.061, "dur": 1.214, + "args": { + "External id": 3294451,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596933588.896, "dur": 0.969, + "args": { + "External id": 3294452,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596933601.208, "dur": 0.825, + "args": { + "External id": 3294453,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596933613.497, "dur": 0.777, + "args": { + "External id": 3294454,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596933624.574, "dur": 0.684, + "args": { + "External id": 3294455,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596933636.571, "dur": 0.692, + "args": { + "External id": 3294456,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596933647.094, "dur": 1.001, + "args": { + "External id": 3294457,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596933734.128, "dur": 2701.968, + "args": { + "External id": 3294458,"Record function id": 0, "Ev Idx": 4857 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 1336756, "tid": 1381189, + "ts": 1587596933751.394, "dur": 971.161, + "args": { + "External id": 3294459,"Record function id": 0, "Ev Idx": 4858 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 1336756, "tid": 1381189, + "ts": 1587596933765.399, "dur": 358.768, + "args": { + "External id": 3294460,"Record function id": 0, "Ev Idx": 4859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596933838.851, "dur": 4.061, + "args": { + "External id": 3294461,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596933846.027, "dur": 0.740, + "args": { + "External id": 3294462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596933848.461, "dur": 0.704, + "args": { + "External id": 3294463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596933852.370, "dur": 0.716, + "args": { + "External id": 3294464,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596933854.557, "dur": 0.760, + "args": { + "External id": 3294465,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596933856.696, "dur": 0.775, + "args": { + "External id": 3294466,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596933859.138, "dur": 0.860, + "args": { + "External id": 3294467,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596933862.303, "dur": 21.721, + "args": { + "External id": 3294468,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596933888.051, "dur": 1.067, + "args": { + "External id": 3294469,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596933890.849, "dur": 0.787, + "args": { + "External id": 3294470,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596933910.237, "dur": 182.599, + "args": { + "External id": 3294471,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596933926.863, "dur": 161.245, + "args": { + "External id": 3294472,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596933946.055, "dur": 11.838, + "args": { + "External id": 3294473,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596933961.108, "dur": 98.863, + "args": { + "External id": 3294474,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596933963.515, "dur": 96.059, + "args": { + "External id": 3294475,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596933967.976, "dur": 5.980, + "args": { + "External id": 3294476,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596933975.563, "dur": 82.495, + "args": { + "External id": 3294477,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4876 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 1336756, "tid": 1381189, + "ts": 1587596934199.351, "dur": 516.163, + "args": { + "External id": 3294478,"Record function id": 0, "Ev Idx": 4877 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 1336756, "tid": 1381189, + "ts": 1587596934214.745, "dur": 489.702, + "args": { + "External id": 3294479,"Record function id": 0, "Ev Idx": 4878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596934268.418, "dur": 5.613, + "args": { + "External id": 3294480,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596934292.524, "dur": 27.713, + "args": { + "External id": 3294481,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934297.303, "dur": 1.345, + "args": { + "External id": 3294482,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934300.658, "dur": 0.410, + "args": { + "External id": 3294483,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934303.630, "dur": 2.238, + "args": { + "External id": 3294484,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934307.588, "dur": 0.428, + "args": { + "External id": 3294485,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934309.538, "dur": 0.383, + "args": { + "External id": 3294486,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934311.707, "dur": 0.237, + "args": { + "External id": 3294487,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934313.314, "dur": 0.288, + "args": { + "External id": 3294488,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934315.145, "dur": 0.851, + "args": { + "External id": 3294489,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934317.387, "dur": 0.237, + "args": { + "External id": 3294490,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596934330.310, "dur": 31.833, + "args": { + "External id": 3294491,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596934392.099, "dur": 99.731, + "args": { + "External id": 3294492,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596934401.551, "dur": 3.256, + "args": { + "External id": 3294493,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596934410.074, "dur": 11.328, + "args": { + "External id": 3294494,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596934414.171, "dur": 6.822, + "args": { + "External id": 3294495,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934417.524, "dur": 2.313, + "args": { + "External id": 3294496,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596934427.901, "dur": 26.104, + "args": { + "External id": 3294497,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934430.388, "dur": 0.367, + "args": { + "External id": 3294498,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934432.596, "dur": 0.760, + "args": { + "External id": 3294499,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934434.603, "dur": 0.334, + "args": { + "External id": 3294500,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934436.166, "dur": 0.465, + "args": { + "External id": 3294501,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934438.596, "dur": 0.479, + "args": { + "External id": 3294502,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934440.596, "dur": 0.411, + "args": { + "External id": 3294503,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934442.387, "dur": 0.227, + "args": { + "External id": 3294504,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934443.917, "dur": 2.723, + "args": { + "External id": 3294505,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596934448.102, "dur": 0.275, + "args": { + "External id": 3294506,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596934463.613, "dur": 21.387, + "args": { + "External id": 3294507,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596934534.115, "dur": 106.413, + "args": { + "External id": 3294508,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596934558.447, "dur": 78.832, + "args": { + "External id": 3294509,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4908, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596934568.018, "dur": 65.130, + "args": { + "External id": 3294510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596934657.237, "dur": 1.804, + "args": { + "External id": 3294511,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4910, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596934729.228, "dur": 1684.266, + "args": { + "External id": 3294512,"Sequence number": 32987116, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4911 + } + }, + { + "ph": "f", "id": 216, "pid": 1336756, "tid": 1381189, "ts": 1587596934729.228, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596934831.595, "dur": 123.820, + "args": { + "External id": 3294513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596935038.434, "dur": 44.003, + "args": { + "External id": 3294514,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596935100.621, "dur": 61.875, + "args": { + "External id": 3294515,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596935174.594, "dur": 32.515, + "args": { + "External id": 3294516,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596935213.480, "dur": 45.248, + "args": { + "External id": 3294517,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596935265.913, "dur": 26.577, + "args": { + "External id": 3294518,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596935299.575, "dur": 42.280, + "args": { + "External id": 3294519,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596935366.452, "dur": 26.992, + "args": { + "External id": 3294520,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596935412.189, "dur": 30.569, + "args": { + "External id": 3294521,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596935466.955, "dur": 19.728, + "args": { + "External id": 3294522,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596935499.490, "dur": 17.867, + "args": { + "External id": 3294523,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596935525.692, "dur": 28.470, + "args": { + "External id": 3294524,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596935557.248, "dur": 36.038, + "args": { + "External id": 3294525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596935625.300, "dur": 168.528, + "args": { + "External id": 3294526,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596935699.858, "dur": 6.151, + "args": { + "External id": 3294527,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596935707.823, "dur": 2.962, + "args": { + "External id": 3294528,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596935827.695, "dur": 22.379, + "args": { + "External id": 3294529,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596935891.706, "dur": 19.390, + "args": { + "External id": 3294530,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596935924.972, "dur": 39.989, + "args": { + "External id": 3294531,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596935970.554, "dur": 75.262, + "args": { + "External id": 3294532,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596936056.549, "dur": 25.532, + "args": { + "External id": 3294533,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596936088.777, "dur": 29.512, + "args": { + "External id": 3294534,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596936126.027, "dur": 20.384, + "args": { + "External id": 3294535,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596936152.968, "dur": 29.408, + "args": { + "External id": 3294536,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596936204.002, "dur": 22.638, + "args": { + "External id": 3294537,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596936247.072, "dur": 39.411, + "args": { + "External id": 3294538,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596936306.474, "dur": 18.132, + "args": { + "External id": 3294539,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596936340.587, "dur": 14.306, + "args": { + "External id": 3294540,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596936369.545, "dur": 16.504, + "args": { + "External id": 3294541,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936457.527, "dur": 14.463, + "args": { + "External id": 3294542,"Record function id": 0, "Ev Idx": 4941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936460.870, "dur": 10.227, + "args": { + "External id": 3294543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936465.041, "dur": 5.181, + "args": { + "External id": 3294544,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936466.489, "dur": 3.620, + "args": { + "External id": 3294545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936475.997, "dur": 4.595, + "args": { + "External id": 3294546,"Record function id": 0, "Ev Idx": 4945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936477.372, "dur": 2.795, + "args": { + "External id": 3294547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936478.008, "dur": 1.698, + "args": { + "External id": 3294548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936478.731, "dur": 0.888, + "args": { + "External id": 3294549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936483.741, "dur": 4.115, + "args": { + "External id": 3294550,"Record function id": 0, "Ev Idx": 4949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936485.000, "dur": 2.443, + "args": { + "External id": 3294551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936485.483, "dur": 1.547, + "args": { + "External id": 3294552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936486.113, "dur": 0.797, + "args": { + "External id": 3294553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936490.841, "dur": 3.860, + "args": { + "External id": 3294554,"Record function id": 0, "Ev Idx": 4953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936491.891, "dur": 2.404, + "args": { + "External id": 3294555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936492.454, "dur": 1.421, + "args": { + "External id": 3294556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936493.099, "dur": 0.703, + "args": { + "External id": 3294557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936497.684, "dur": 3.224, + "args": { + "External id": 3294558,"Record function id": 0, "Ev Idx": 4957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936498.669, "dur": 1.829, + "args": { + "External id": 3294559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936499.161, "dur": 0.918, + "args": { + "External id": 3294560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936499.427, "dur": 0.581, + "args": { + "External id": 3294561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936503.823, "dur": 5.002, + "args": { + "External id": 3294562,"Record function id": 0, "Ev Idx": 4961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936504.972, "dur": 3.442, + "args": { + "External id": 3294563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936505.432, "dur": 2.599, + "args": { + "External id": 3294564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936507.195, "dur": 0.762, + "args": { + "External id": 3294565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936511.891, "dur": 5.701, + "args": { + "External id": 3294566,"Record function id": 0, "Ev Idx": 4965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936513.076, "dur": 4.103, + "args": { + "External id": 3294567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936513.747, "dur": 3.041, + "args": { + "External id": 3294568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936514.213, "dur": 2.507, + "args": { + "External id": 3294569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936520.524, "dur": 3.374, + "args": { + "External id": 3294570,"Record function id": 0, "Ev Idx": 4969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936521.505, "dur": 1.980, + "args": { + "External id": 3294571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936521.956, "dur": 1.130, + "args": { + "External id": 3294572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936522.233, "dur": 0.784, + "args": { + "External id": 3294573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936526.947, "dur": 3.267, + "args": { + "External id": 3294574,"Record function id": 0, "Ev Idx": 4973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596936527.837, "dur": 1.947, + "args": { + "External id": 3294575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936528.378, "dur": 0.990, + "args": { + "External id": 3294576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596936528.720, "dur": 0.574, + "args": { + "External id": 3294577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596936534.076, "dur": 36623.111, + "args": { + "External id": 3294578,"Record function id": 0, "Sequence number": 32987115, "Fwd thread id": 1, "Ev Idx": 4977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596936535.567, "dur": 36613.265, + "args": { + "External id": 3294579,"Sequence number": 32987115, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4978 + } + }, + { + "ph": "f", "id": 217, "pid": 1336756, "tid": 1381189, "ts": 1587596936535.567, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 1336756, "tid": 1381189, + "ts": 1587596936565.468, "dur": 35.816, + "args": { + "External id": 3294580,"Record function id": 0, "Ev Idx": 4979 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 1336756, "tid": 1381189, + "ts": 1587596936608.837, "dur": 58.540, + "args": { + "External id": 3294581,"Record function id": 0, "Ev Idx": 4980 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 1336756, "tid": 1381189, + "ts": 1587596936672.757, "dur": 36467.458, + "args": { + "External id": 3294582,"Record function id": 0, "Ev Idx": 4981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596936762.216, "dur": 5.935, + "args": { + "External id": 3294583,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596936777.416, "dur": 4.448, + "args": { + "External id": 3294584,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596936794.915, "dur": 35498.924, + "args": { + "External id": 3294585,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596936807.588, "dur": 35477.893, + "args": { + "External id": 3294586,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596936845.681, "dur": 13.304, + "args": { + "External id": 3294587,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596936884.481, "dur": 35361.724, + "args": { + "External id": 3294588,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596936887.156, "dur": 35358.423, + "args": { + "External id": 3294589,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596936891.431, "dur": 6.319, + "args": { + "External id": 3294590,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596936899.683, "dur": 35342.364, + "args": { + "External id": 3294591,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596972379.235, "dur": 8.287, + "args": { + "External id": 3294592,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596972381.875, "dur": 5.269, + "args": { + "External id": 3294593,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596972419.309, "dur": 379.239, + "args": { + "External id": 3294594,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596972449.061, "dur": 344.832, + "args": { + "External id": 3294595,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4994, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596972459.233, "dur": 329.220, + "args": { + "External id": 3294596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596972818.394, "dur": 2.545, + "args": { + "External id": 3294597,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4996, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596972897.469, "dur": 7.240, + "args": { + "External id": 3294598,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596972949.340, "dur": 1.511, + "args": { + "External id": 3294599,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596972967.551, "dur": 1.267, + "args": { + "External id": 3294600,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596972980.449, "dur": 33.205, + "args": { + "External id": 3294601,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973031.961, "dur": 1.378, + "args": { + "External id": 3294602,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973043.438, "dur": 0.945, + "args": { + "External id": 3294603,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973056.184, "dur": 0.819, + "args": { + "External id": 3294604,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973068.469, "dur": 1.121, + "args": { + "External id": 3294605,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973078.729, "dur": 0.900, + "args": { + "External id": 3294606,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596973173.039, "dur": 2625.082, + "args": { + "External id": 3294607,"Record function id": 0, "Ev Idx": 5006 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 1336756, "tid": 1381189, + "ts": 1587596973191.937, "dur": 980.132, + "args": { + "External id": 3294608,"Record function id": 0, "Ev Idx": 5007 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 1336756, "tid": 1381189, + "ts": 1587596973206.334, "dur": 304.235, + "args": { + "External id": 3294609,"Record function id": 0, "Ev Idx": 5008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596973283.314, "dur": 3.924, + "args": { + "External id": 3294610,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596973290.215, "dur": 1.087, + "args": { + "External id": 3294611,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596973293.099, "dur": 1.097, + "args": { + "External id": 3294612,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596973297.898, "dur": 2.068, + "args": { + "External id": 3294613,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596973301.441, "dur": 0.912, + "args": { + "External id": 3294614,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596973303.926, "dur": 0.649, + "args": { + "External id": 3294615,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596973306.175, "dur": 0.918, + "args": { + "External id": 3294616,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596973309.685, "dur": 0.695, + "args": { + "External id": 3294617,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596973311.909, "dur": 0.860, + "args": { + "External id": 3294618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596973314.685, "dur": 1.141, + "args": { + "External id": 3294619,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596973333.574, "dur": 149.795, + "args": { + "External id": 3294620,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596973349.666, "dur": 129.717, + "args": { + "External id": 3294621,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596973367.385, "dur": 12.498, + "args": { + "External id": 3294622,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596973384.494, "dur": 67.364, + "args": { + "External id": 3294623,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596973387.021, "dur": 64.438, + "args": { + "External id": 3294624,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973391.267, "dur": 7.080, + "args": { + "External id": 3294625,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596973400.079, "dur": 50.744, + "args": { + "External id": 3294626,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5025 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 1336756, "tid": 1381189, + "ts": 1587596973585.569, "dur": 578.823, + "args": { + "External id": 3294627,"Record function id": 0, "Ev Idx": 5026 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 1336756, "tid": 1381189, + "ts": 1587596973600.596, "dur": 551.363, + "args": { + "External id": 3294628,"Record function id": 0, "Ev Idx": 5027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596973652.739, "dur": 4.279, + "args": { + "External id": 3294629,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596973674.829, "dur": 26.686, + "args": { + "External id": 3294630,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973679.778, "dur": 1.149, + "args": { + "External id": 3294631,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973683.144, "dur": 0.564, + "args": { + "External id": 3294632,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973685.218, "dur": 0.445, + "args": { + "External id": 3294633,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973687.709, "dur": 0.321, + "args": { + "External id": 3294634,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973689.439, "dur": 0.420, + "args": { + "External id": 3294635,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973691.160, "dur": 1.189, + "args": { + "External id": 3294636,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973693.619, "dur": 1.718, + "args": { + "External id": 3294637,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973696.502, "dur": 0.386, + "args": { + "External id": 3294638,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973698.869, "dur": 0.368, + "args": { + "External id": 3294639,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596973710.834, "dur": 30.524, + "args": { + "External id": 3294640,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587596973770.249, "dur": 90.734, + "args": { + "External id": 3294641,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596973779.285, "dur": 3.087, + "args": { + "External id": 3294642,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587596973787.230, "dur": 8.984, + "args": { + "External id": 3294643,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587596973791.102, "dur": 4.673, + "args": { + "External id": 3294644,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973794.183, "dur": 0.501, + "args": { + "External id": 3294645,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587596973802.825, "dur": 23.693, + "args": { + "External id": 3294646,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973804.866, "dur": 0.274, + "args": { + "External id": 3294647,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973806.669, "dur": 0.336, + "args": { + "External id": 3294648,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973809.098, "dur": 0.361, + "args": { + "External id": 3294649,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973810.784, "dur": 1.814, + "args": { + "External id": 3294650,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973813.872, "dur": 0.318, + "args": { + "External id": 3294651,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973815.572, "dur": 0.397, + "args": { + "External id": 3294652,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973817.456, "dur": 0.256, + "args": { + "External id": 3294653,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973818.858, "dur": 0.514, + "args": { + "External id": 3294654,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596973820.660, "dur": 0.314, + "args": { + "External id": 3294655,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596973835.868, "dur": 18.642, + "args": { + "External id": 3294656,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587596973926.425, "dur": 152.392, + "args": { + "External id": 3294657,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596973953.745, "dur": 120.842, + "args": { + "External id": 3294658,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5057, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587596973963.015, "dur": 106.185, + "args": { + "External id": 3294659,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587596974096.957, "dur": 2.071, + "args": { + "External id": 3294660,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5059, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596974179.287, "dur": 1600.273, + "args": { + "External id": 3294661,"Sequence number": 32987114, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5060 + } + }, + { + "ph": "f", "id": 218, "pid": 1336756, "tid": 1381189, "ts": 1587596974179.287, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596974285.893, "dur": 101.542, + "args": { + "External id": 3294662,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596974428.041, "dur": 38.043, + "args": { + "External id": 3294663,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 5062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587596974482.308, "dur": 49.393, + "args": { + "External id": 3294664,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 5063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596974540.723, "dur": 32.337, + "args": { + "External id": 3294665,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596974579.951, "dur": 46.054, + "args": { + "External id": 3294666,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596974634.044, "dur": 27.244, + "args": { + "External id": 3294667,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596974667.525, "dur": 41.753, + "args": { + "External id": 3294668,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596974730.646, "dur": 23.636, + "args": { + "External id": 3294669,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 5068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587596974771.379, "dur": 32.278, + "args": { + "External id": 3294670,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596974823.674, "dur": 20.019, + "args": { + "External id": 3294671,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596974860.456, "dur": 36.016, + "args": { + "External id": 3294672,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596974908.792, "dur": 32.988, + "args": { + "External id": 3294673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596974945.303, "dur": 33.085, + "args": { + "External id": 3294674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587596975045.561, "dur": 169.643, + "args": { + "External id": 3294675,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596975122.702, "dur": 5.789, + "args": { + "External id": 3294676,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596975130.428, "dur": 2.333, + "args": { + "External id": 3294677,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596975248.154, "dur": 26.437, + "args": { + "External id": 3294678,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587596975287.491, "dur": 20.231, + "args": { + "External id": 3294679,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596975315.490, "dur": 45.193, + "args": { + "External id": 3294680,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596975368.207, "dur": 36.918, + "args": { + "External id": 3294681,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596975411.878, "dur": 21.968, + "args": { + "External id": 3294682,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596975438.238, "dur": 33.101, + "args": { + "External id": 3294683,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596975476.940, "dur": 20.829, + "args": { + "External id": 3294684,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587596975506.433, "dur": 33.148, + "args": { + "External id": 3294685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587596975557.901, "dur": 26.948, + "args": { + "External id": 3294686,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 5085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596975615.591, "dur": 30.841, + "args": { + "External id": 3294687,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587596975666.693, "dur": 18.173, + "args": { + "External id": 3294688,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587596975705.143, "dur": 15.019, + "args": { + "External id": 3294689,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587596975732.331, "dur": 21.507, + "args": { + "External id": 3294690,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 5089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975822.927, "dur": 14.015, + "args": { + "External id": 3294691,"Record function id": 0, "Ev Idx": 5090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975825.984, "dur": 10.131, + "args": { + "External id": 3294692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975830.062, "dur": 5.118, + "args": { + "External id": 3294693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975831.535, "dur": 3.520, + "args": { + "External id": 3294694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975840.693, "dur": 4.368, + "args": { + "External id": 3294695,"Record function id": 0, "Ev Idx": 5094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975842.120, "dur": 2.501, + "args": { + "External id": 3294696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975842.801, "dur": 1.256, + "args": { + "External id": 3294697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975843.265, "dur": 0.710, + "args": { + "External id": 3294698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975848.333, "dur": 4.187, + "args": { + "External id": 3294699,"Record function id": 0, "Ev Idx": 5098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975849.335, "dur": 2.752, + "args": { + "External id": 3294700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975849.869, "dur": 1.775, + "args": { + "External id": 3294701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975850.439, "dur": 1.086, + "args": { + "External id": 3294702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975855.730, "dur": 3.519, + "args": { + "External id": 3294703,"Record function id": 0, "Ev Idx": 5102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975856.759, "dur": 2.093, + "args": { + "External id": 3294704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975857.358, "dur": 1.093, + "args": { + "External id": 3294705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975857.766, "dur": 0.599, + "args": { + "External id": 3294706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975884.342, "dur": 9.072, + "args": { + "External id": 3294707,"Record function id": 0, "Ev Idx": 5106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975886.263, "dur": 6.500, + "args": { + "External id": 3294708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975887.261, "dur": 4.664, + "args": { + "External id": 3294709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975887.942, "dur": 3.758, + "args": { + "External id": 3294710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975897.298, "dur": 3.884, + "args": { + "External id": 3294711,"Record function id": 0, "Ev Idx": 5110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975898.566, "dur": 2.215, + "args": { + "External id": 3294712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975899.204, "dur": 1.165, + "args": { + "External id": 3294713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975899.565, "dur": 0.734, + "args": { + "External id": 3294714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975904.281, "dur": 6.431, + "args": { + "External id": 3294715,"Record function id": 0, "Ev Idx": 5114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975905.969, "dur": 4.315, + "args": { + "External id": 3294716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975906.519, "dur": 3.262, + "args": { + "External id": 3294717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975908.925, "dur": 0.760, + "args": { + "External id": 3294718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975913.739, "dur": 28.842, + "args": { + "External id": 3294719,"Record function id": 0, "Ev Idx": 5118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975914.863, "dur": 27.287, + "args": { + "External id": 3294720,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975940.392, "dur": 1.333, + "args": { + "External id": 3294721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975940.932, "dur": 0.698, + "args": { + "External id": 3294722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975945.612, "dur": 3.597, + "args": { + "External id": 3294723,"Record function id": 0, "Ev Idx": 5122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587596975946.824, "dur": 1.963, + "args": { + "External id": 3294724,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975947.334, "dur": 0.951, + "args": { + "External id": 3294725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587596975947.636, "dur": 0.575, + "args": { + "External id": 3294726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596975952.956, "dur": 37663.950, + "args": { + "External id": 3294727,"Record function id": 0, "Sequence number": 32987113, "Fwd thread id": 1, "Ev Idx": 5126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587596975954.278, "dur": 37654.647, + "args": { + "External id": 3294728,"Sequence number": 32987113, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5127 + } + }, + { + "ph": "f", "id": 219, "pid": 1336756, "tid": 1381189, "ts": 1587596975954.278, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 1336756, "tid": 1381189, + "ts": 1587596976021.107, "dur": 39.232, + "args": { + "External id": 3294729,"Record function id": 0, "Ev Idx": 5128 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 1336756, "tid": 1381189, + "ts": 1587596976068.678, "dur": 62.489, + "args": { + "External id": 3294730,"Record function id": 0, "Ev Idx": 5129 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 1336756, "tid": 1381189, + "ts": 1587596976137.201, "dur": 37464.350, + "args": { + "External id": 3294731,"Record function id": 0, "Ev Idx": 5130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596976225.229, "dur": 6.931, + "args": { + "External id": 3294732,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587596976242.123, "dur": 4.980, + "args": { + "External id": 3294733,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596976260.309, "dur": 36534.925, + "args": { + "External id": 3294734,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587596976273.048, "dur": 36511.887, + "args": { + "External id": 3294735,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587596976308.872, "dur": 13.421, + "args": { + "External id": 3294736,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587596976328.294, "dur": 36415.754, + "args": { + "External id": 3294737,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 5136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587596976330.846, "dur": 36412.396, + "args": { + "External id": 3294738,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 5137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587596976334.968, "dur": 5.026, + "args": { + "External id": 3294739,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587596976343.008, "dur": 36395.801, + "args": { + "External id": 3294740,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 5139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597012901.843, "dur": 11.561, + "args": { + "External id": 3294741,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 5140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597012904.936, "dur": 7.836, + "args": { + "External id": 3294742,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587597012946.312, "dur": 366.020, + "args": { + "External id": 3294743,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 5142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597012976.056, "dur": 331.052, + "args": { + "External id": 3294744,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5143, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587597013011.865, "dur": 290.022, + "args": { + "External id": 3294745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 5144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597013335.193, "dur": 2.068, + "args": { + "External id": 3294746,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5145, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597013396.425, "dur": 6.829, + "args": { + "External id": 3294747,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597013447.791, "dur": 1.352, + "args": { + "External id": 3294748,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597013463.802, "dur": 1.155, + "args": { + "External id": 3294749,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597013477.019, "dur": 1.186, + "args": { + "External id": 3294750,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597013489.588, "dur": 0.982, + "args": { + "External id": 3294751,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597013502.301, "dur": 0.791, + "args": { + "External id": 3294752,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597013515.397, "dur": 1.061, + "args": { + "External id": 3294753,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597013526.930, "dur": 1.311, + "args": { + "External id": 3294754,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597013540.735, "dur": 0.743, + "args": { + "External id": 3294755,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597013629.552, "dur": 2708.909, + "args": { + "External id": 3294756,"Record function id": 0, "Ev Idx": 5155 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 1336756, "tid": 1381189, + "ts": 1587597013647.964, "dur": 1000.241, + "args": { + "External id": 3294757,"Record function id": 0, "Ev Idx": 5156 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 1336756, "tid": 1381189, + "ts": 1587597013662.060, "dur": 361.699, + "args": { + "External id": 3294758,"Record function id": 0, "Ev Idx": 5157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597013736.908, "dur": 4.030, + "args": { + "External id": 3294759,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597013743.974, "dur": 0.732, + "args": { + "External id": 3294760,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597013748.585, "dur": 0.878, + "args": { + "External id": 3294761,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597013751.376, "dur": 0.824, + "args": { + "External id": 3294762,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597013753.547, "dur": 0.835, + "args": { + "External id": 3294763,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597013756.007, "dur": 0.803, + "args": { + "External id": 3294764,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597013760.054, "dur": 0.677, + "args": { + "External id": 3294765,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597013762.077, "dur": 2.584, + "args": { + "External id": 3294766,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597013765.991, "dur": 0.880, + "args": { + "External id": 3294767,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597013768.327, "dur": 0.759, + "args": { + "External id": 3294768,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597013787.666, "dur": 165.602, + "args": { + "External id": 3294769,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597013802.938, "dur": 145.645, + "args": { + "External id": 3294770,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597013820.043, "dur": 12.694, + "args": { + "External id": 3294771,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587597013835.764, "dur": 84.312, + "args": { + "External id": 3294772,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597013838.260, "dur": 81.387, + "args": { + "External id": 3294773,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597013842.345, "dur": 6.071, + "args": { + "External id": 3294774,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587597013850.068, "dur": 68.534, + "args": { + "External id": 3294775,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5174 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 1336756, "tid": 1381189, + "ts": 1587597014107.911, "dur": 532.452, + "args": { + "External id": 3294776,"Record function id": 0, "Ev Idx": 5175 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 1336756, "tid": 1381189, + "ts": 1587597014126.649, "dur": 501.548, + "args": { + "External id": 3294777,"Record function id": 0, "Ev Idx": 5176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597014184.874, "dur": 5.861, + "args": { + "External id": 3294778,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587597014209.376, "dur": 30.647, + "args": { + "External id": 3294779,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014214.684, "dur": 1.635, + "args": { + "External id": 3294780,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014218.709, "dur": 0.587, + "args": { + "External id": 3294781,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014221.042, "dur": 2.369, + "args": { + "External id": 3294782,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014226.304, "dur": 0.437, + "args": { + "External id": 3294783,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014228.087, "dur": 0.363, + "args": { + "External id": 3294784,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014230.008, "dur": 0.556, + "args": { + "External id": 3294785,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014232.252, "dur": 0.252, + "args": { + "External id": 3294786,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014234.086, "dur": 0.304, + "args": { + "External id": 3294787,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014236.470, "dur": 0.620, + "args": { + "External id": 3294788,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587597014251.454, "dur": 31.883, + "args": { + "External id": 3294789,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587597014312.287, "dur": 102.270, + "args": { + "External id": 3294790,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597014321.923, "dur": 3.139, + "args": { + "External id": 3294791,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587597014330.101, "dur": 12.079, + "args": { + "External id": 3294792,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587597014334.448, "dur": 7.279, + "args": { + "External id": 3294793,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014338.190, "dur": 2.363, + "args": { + "External id": 3294794,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587597014348.729, "dur": 26.397, + "args": { + "External id": 3294795,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014350.822, "dur": 0.624, + "args": { + "External id": 3294796,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014352.940, "dur": 0.459, + "args": { + "External id": 3294797,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014355.535, "dur": 0.467, + "args": { + "External id": 3294798,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014357.156, "dur": 0.562, + "args": { + "External id": 3294799,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014358.999, "dur": 0.379, + "args": { + "External id": 3294800,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014360.905, "dur": 0.435, + "args": { + "External id": 3294801,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014362.618, "dur": 0.409, + "args": { + "External id": 3294802,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014364.921, "dur": 2.385, + "args": { + "External id": 3294803,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597014368.726, "dur": 0.529, + "args": { + "External id": 3294804,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587597014386.592, "dur": 20.591, + "args": { + "External id": 3294805,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587597014456.866, "dur": 106.886, + "args": { + "External id": 3294806,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597014482.000, "dur": 78.614, + "args": { + "External id": 3294807,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5206, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587597014491.376, "dur": 65.378, + "args": { + "External id": 3294808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597014578.934, "dur": 2.090, + "args": { + "External id": 3294809,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5208, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597014655.286, "dur": 1664.641, + "args": { + "External id": 3294810,"Sequence number": 32987112, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5209 + } + }, + { + "ph": "f", "id": 220, "pid": 1336756, "tid": 1381189, "ts": 1587597014655.286, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597014759.481, "dur": 100.762, + "args": { + "External id": 3294811,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587597014923.745, "dur": 42.540, + "args": { + "External id": 3294812,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 5211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587597015021.668, "dur": 61.008, + "args": { + "External id": 3294813,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 5212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597015096.182, "dur": 33.492, + "args": { + "External id": 3294814,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597015136.472, "dur": 45.770, + "args": { + "External id": 3294815,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597015188.992, "dur": 27.929, + "args": { + "External id": 3294816,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597015223.567, "dur": 45.852, + "args": { + "External id": 3294817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587597015293.550, "dur": 28.763, + "args": { + "External id": 3294818,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 5217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587597015342.866, "dur": 29.150, + "args": { + "External id": 3294819,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587597015391.266, "dur": 19.272, + "args": { + "External id": 3294820,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587597015424.056, "dur": 14.169, + "args": { + "External id": 3294821,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597015445.865, "dur": 32.308, + "args": { + "External id": 3294822,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597015481.337, "dur": 32.846, + "args": { + "External id": 3294823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587597015546.469, "dur": 171.266, + "args": { + "External id": 3294824,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597015622.786, "dur": 5.453, + "args": { + "External id": 3294825,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597015629.928, "dur": 2.707, + "args": { + "External id": 3294826,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587597015748.887, "dur": 27.171, + "args": { + "External id": 3294827,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587597015788.062, "dur": 15.554, + "args": { + "External id": 3294828,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597015810.286, "dur": 36.093, + "args": { + "External id": 3294829,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597015851.872, "dur": 54.611, + "args": { + "External id": 3294830,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597015918.410, "dur": 25.113, + "args": { + "External id": 3294831,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597015948.049, "dur": 65.425, + "args": { + "External id": 3294832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597016027.341, "dur": 25.831, + "args": { + "External id": 3294833,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597016060.338, "dur": 42.125, + "args": { + "External id": 3294834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587597016129.659, "dur": 24.690, + "args": { + "External id": 3294835,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 5234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587597016173.342, "dur": 22.943, + "args": { + "External id": 3294836,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587597016213.361, "dur": 17.769, + "args": { + "External id": 3294837,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587597016248.479, "dur": 17.886, + "args": { + "External id": 3294838,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587597016277.915, "dur": 15.943, + "args": { + "External id": 3294839,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 5238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016360.277, "dur": 14.208, + "args": { + "External id": 3294840,"Record function id": 0, "Ev Idx": 5239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016363.197, "dur": 10.325, + "args": { + "External id": 3294841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016367.459, "dur": 5.182, + "args": { + "External id": 3294842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016368.699, "dur": 3.865, + "args": { + "External id": 3294843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016378.248, "dur": 4.630, + "args": { + "External id": 3294844,"Record function id": 0, "Ev Idx": 5243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016379.841, "dur": 2.580, + "args": { + "External id": 3294845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016380.570, "dur": 1.327, + "args": { + "External id": 3294846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016381.016, "dur": 0.791, + "args": { + "External id": 3294847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016386.086, "dur": 4.218, + "args": { + "External id": 3294848,"Record function id": 0, "Ev Idx": 5247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016387.341, "dur": 2.558, + "args": { + "External id": 3294849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016387.961, "dur": 1.549, + "args": { + "External id": 3294850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016388.714, "dur": 0.724, + "args": { + "External id": 3294851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016393.422, "dur": 3.736, + "args": { + "External id": 3294852,"Record function id": 0, "Ev Idx": 5251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016394.898, "dur": 1.840, + "args": { + "External id": 3294853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016395.390, "dur": 0.962, + "args": { + "External id": 3294854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016395.708, "dur": 0.571, + "args": { + "External id": 3294855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016400.201, "dur": 5.517, + "args": { + "External id": 3294856,"Record function id": 0, "Ev Idx": 5255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016401.249, "dur": 4.034, + "args": { + "External id": 3294857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016401.860, "dur": 2.849, + "args": { + "External id": 3294858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016402.119, "dur": 2.531, + "args": { + "External id": 3294859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016408.801, "dur": 3.773, + "args": { + "External id": 3294860,"Record function id": 0, "Ev Idx": 5259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016409.944, "dur": 2.234, + "args": { + "External id": 3294861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016410.469, "dur": 1.313, + "args": { + "External id": 3294862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016410.887, "dur": 0.833, + "args": { + "External id": 3294863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016415.911, "dur": 5.287, + "args": { + "External id": 3294864,"Record function id": 0, "Ev Idx": 5263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016416.943, "dur": 3.835, + "args": { + "External id": 3294865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016417.443, "dur": 2.919, + "args": { + "External id": 3294866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016419.363, "dur": 0.933, + "args": { + "External id": 3294867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016424.257, "dur": 7.228, + "args": { + "External id": 3294868,"Record function id": 0, "Ev Idx": 5267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016425.608, "dur": 5.450, + "args": { + "External id": 3294869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016429.305, "dur": 1.322, + "args": { + "External id": 3294870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016429.634, "dur": 0.928, + "args": { + "External id": 3294871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016434.520, "dur": 4.278, + "args": { + "External id": 3294872,"Record function id": 0, "Ev Idx": 5271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597016436.028, "dur": 2.355, + "args": { + "External id": 3294873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016436.481, "dur": 1.480, + "args": { + "External id": 3294874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597016436.836, "dur": 1.061, + "args": { + "External id": 3294875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597016442.730, "dur": 38656.933, + "args": { + "External id": 3294876,"Record function id": 0, "Sequence number": 32987111, "Fwd thread id": 1, "Ev Idx": 5275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597016444.492, "dur": 38646.703, + "args": { + "External id": 3294877,"Sequence number": 32987111, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5276 + } + }, + { + "ph": "f", "id": 221, "pid": 1336756, "tid": 1381189, "ts": 1587597016444.492, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 1336756, "tid": 1381189, + "ts": 1587597016472.062, "dur": 36.298, + "args": { + "External id": 3294878,"Record function id": 0, "Ev Idx": 5277 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 1336756, "tid": 1381189, + "ts": 1587597016515.695, "dur": 62.317, + "args": { + "External id": 3294879,"Record function id": 0, "Ev Idx": 5278 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 1336756, "tid": 1381189, + "ts": 1587597016583.443, "dur": 38499.107, + "args": { + "External id": 3294880,"Record function id": 0, "Ev Idx": 5279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597016665.780, "dur": 6.346, + "args": { + "External id": 3294881,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597016681.387, "dur": 4.466, + "args": { + "External id": 3294882,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587597016698.585, "dur": 37556.884, + "args": { + "External id": 3294883,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587597016711.944, "dur": 37532.450, + "args": { + "External id": 3294884,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597016749.068, "dur": 13.303, + "args": { + "External id": 3294885,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587597016768.332, "dur": 37432.809, + "args": { + "External id": 3294886,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 5285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597016770.702, "dur": 37429.610, + "args": { + "External id": 3294887,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 5286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597016775.108, "dur": 5.136, + "args": { + "External id": 3294888,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587597016783.430, "dur": 37412.395, + "args": { + "External id": 3294889,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 5288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597054352.741, "dur": 10.527, + "args": { + "External id": 3294890,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 5289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597054355.921, "dur": 6.928, + "args": { + "External id": 3294891,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587597054395.172, "dur": 335.898, + "args": { + "External id": 3294892,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 5291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597054423.676, "dur": 303.088, + "args": { + "External id": 3294893,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5292, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587597054435.183, "dur": 286.524, + "args": { + "External id": 3294894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 5293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597054749.563, "dur": 2.087, + "args": { + "External id": 3294895,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5294, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597054805.221, "dur": 6.546, + "args": { + "External id": 3294896,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597054857.052, "dur": 1.339, + "args": { + "External id": 3294897,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597054895.335, "dur": 2.057, + "args": { + "External id": 3294898,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597054910.842, "dur": 0.975, + "args": { + "External id": 3294899,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597054923.203, "dur": 0.680, + "args": { + "External id": 3294900,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597054935.830, "dur": 0.872, + "args": { + "External id": 3294901,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597054947.854, "dur": 0.674, + "args": { + "External id": 3294902,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597054962.304, "dur": 0.993, + "args": { + "External id": 3294903,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597054973.588, "dur": 0.717, + "args": { + "External id": 3294904,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597055115.432, "dur": 2661.137, + "args": { + "External id": 3294905,"Record function id": 0, "Ev Idx": 5304 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 1336756, "tid": 1381189, + "ts": 1587597055133.991, "dur": 1018.499, + "args": { + "External id": 3294906,"Record function id": 0, "Ev Idx": 5305 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 1336756, "tid": 1381189, + "ts": 1587597055147.687, "dur": 301.714, + "args": { + "External id": 3294907,"Record function id": 0, "Ev Idx": 5306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597055224.575, "dur": 4.801, + "args": { + "External id": 3294908,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597055232.791, "dur": 0.930, + "args": { + "External id": 3294909,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597055236.713, "dur": 0.627, + "args": { + "External id": 3294910,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597055239.051, "dur": 0.584, + "args": { + "External id": 3294911,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597055241.084, "dur": 0.887, + "args": { + "External id": 3294912,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597055244.005, "dur": 0.947, + "args": { + "External id": 3294913,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597055247.317, "dur": 0.740, + "args": { + "External id": 3294914,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597055249.840, "dur": 2.377, + "args": { + "External id": 3294915,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597055253.756, "dur": 1.046, + "args": { + "External id": 3294916,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597055256.639, "dur": 0.976, + "args": { + "External id": 3294917,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597055276.198, "dur": 146.157, + "args": { + "External id": 3294918,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597055292.635, "dur": 125.420, + "args": { + "External id": 3294919,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597055309.933, "dur": 11.888, + "args": { + "External id": 3294920,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587597055325.092, "dur": 65.020, + "args": { + "External id": 3294921,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597055327.937, "dur": 61.898, + "args": { + "External id": 3294922,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055332.078, "dur": 6.032, + "args": { + "External id": 3294923,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587597055339.790, "dur": 49.296, + "args": { + "External id": 3294924,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5323 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 1336756, "tid": 1381189, + "ts": 1587597055527.068, "dur": 617.950, + "args": { + "External id": 3294925,"Record function id": 0, "Ev Idx": 5324 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 1336756, "tid": 1381189, + "ts": 1587597055541.585, "dur": 590.793, + "args": { + "External id": 3294926,"Record function id": 0, "Ev Idx": 5325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597055600.837, "dur": 7.391, + "args": { + "External id": 3294927,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587597055632.087, "dur": 34.947, + "args": { + "External id": 3294928,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055638.431, "dur": 2.109, + "args": { + "External id": 3294929,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055642.668, "dur": 0.652, + "args": { + "External id": 3294930,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055644.910, "dur": 0.598, + "args": { + "External id": 3294931,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055648.152, "dur": 2.220, + "args": { + "External id": 3294932,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055652.255, "dur": 0.379, + "args": { + "External id": 3294933,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055654.283, "dur": 0.341, + "args": { + "External id": 3294934,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055656.987, "dur": 0.352, + "args": { + "External id": 3294935,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055658.650, "dur": 0.678, + "args": { + "External id": 3294936,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055664.010, "dur": 0.515, + "args": { + "External id": 3294937,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587597055677.040, "dur": 32.188, + "args": { + "External id": 3294938,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587597055738.786, "dur": 102.044, + "args": { + "External id": 3294939,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597055748.323, "dur": 3.335, + "args": { + "External id": 3294940,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587597055756.696, "dur": 9.638, + "args": { + "External id": 3294941,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587597055760.570, "dur": 5.340, + "args": { + "External id": 3294942,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055764.302, "dur": 0.462, + "args": { + "External id": 3294943,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587597055773.352, "dur": 29.271, + "args": { + "External id": 3294944,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055775.508, "dur": 2.022, + "args": { + "External id": 3294945,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055779.077, "dur": 0.561, + "args": { + "External id": 3294946,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055781.369, "dur": 0.961, + "args": { + "External id": 3294947,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055783.675, "dur": 0.735, + "args": { + "External id": 3294948,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055785.938, "dur": 0.511, + "args": { + "External id": 3294949,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055788.532, "dur": 0.356, + "args": { + "External id": 3294950,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055791.781, "dur": 0.282, + "args": { + "External id": 3294951,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055793.465, "dur": 0.328, + "args": { + "External id": 3294952,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597055795.465, "dur": 2.352, + "args": { + "External id": 3294953,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587597055813.046, "dur": 20.318, + "args": { + "External id": 3294954,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587597055907.139, "dur": 153.561, + "args": { + "External id": 3294955,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597055936.974, "dur": 119.829, + "args": { + "External id": 3294956,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5355, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587597055946.074, "dur": 105.199, + "args": { + "External id": 3294957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597056077.458, "dur": 1.850, + "args": { + "External id": 3294958,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5357, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597056159.697, "dur": 1598.050, + "args": { + "External id": 3294959,"Sequence number": 32987110, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5358 + } + }, + { + "ph": "f", "id": 222, "pid": 1336756, "tid": 1381189, "ts": 1587597056159.697, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597056274.510, "dur": 104.522, + "args": { + "External id": 3294960,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587597056422.550, "dur": 40.047, + "args": { + "External id": 3294961,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 5360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587597056478.347, "dur": 51.203, + "args": { + "External id": 3294962,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 5361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597056540.908, "dur": 32.320, + "args": { + "External id": 3294963,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597056579.874, "dur": 44.666, + "args": { + "External id": 3294964,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597056631.296, "dur": 27.139, + "args": { + "External id": 3294965,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597056665.918, "dur": 41.361, + "args": { + "External id": 3294966,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587597056729.726, "dur": 23.036, + "args": { + "External id": 3294967,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 5366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587597056771.996, "dur": 28.177, + "args": { + "External id": 3294968,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587597056818.373, "dur": 19.707, + "args": { + "External id": 3294969,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587597056851.222, "dur": 38.122, + "args": { + "External id": 3294970,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597056900.274, "dur": 36.740, + "args": { + "External id": 3294971,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597056940.163, "dur": 33.169, + "args": { + "External id": 3294972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587597057041.949, "dur": 177.195, + "args": { + "External id": 3294973,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597057122.849, "dur": 5.869, + "args": { + "External id": 3294974,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597057130.744, "dur": 2.331, + "args": { + "External id": 3294975,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587597057255.156, "dur": 24.932, + "args": { + "External id": 3294976,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587597057291.180, "dur": 18.743, + "args": { + "External id": 3294977,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597057318.136, "dur": 39.361, + "args": { + "External id": 3294978,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597057362.860, "dur": 35.728, + "args": { + "External id": 3294979,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597057406.535, "dur": 21.126, + "args": { + "External id": 3294980,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597057431.772, "dur": 29.997, + "args": { + "External id": 3294981,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597057466.725, "dur": 20.593, + "args": { + "External id": 3294982,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597057493.625, "dur": 46.446, + "args": { + "External id": 3294983,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587597057566.231, "dur": 26.844, + "args": { + "External id": 3294984,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 5383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587597057610.714, "dur": 27.080, + "args": { + "External id": 3294985,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587597057651.699, "dur": 17.631, + "args": { + "External id": 3294986,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587597057684.298, "dur": 18.056, + "args": { + "External id": 3294987,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587597057712.905, "dur": 16.371, + "args": { + "External id": 3294988,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 5387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057800.932, "dur": 14.624, + "args": { + "External id": 3294989,"Record function id": 0, "Ev Idx": 5388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057804.254, "dur": 10.434, + "args": { + "External id": 3294990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057808.584, "dur": 5.262, + "args": { + "External id": 3294991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057810.010, "dur": 3.747, + "args": { + "External id": 3294992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057819.571, "dur": 4.927, + "args": { + "External id": 3294993,"Record function id": 0, "Ev Idx": 5392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057820.839, "dur": 3.202, + "args": { + "External id": 3294994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057821.810, "dur": 1.707, + "args": { + "External id": 3294995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057822.463, "dur": 0.923, + "args": { + "External id": 3294996,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057827.817, "dur": 4.275, + "args": { + "External id": 3294997,"Record function id": 0, "Ev Idx": 5396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057829.164, "dur": 2.539, + "args": { + "External id": 3294998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057829.875, "dur": 1.337, + "args": { + "External id": 3294999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057830.297, "dur": 0.839, + "args": { + "External id": 3295000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057835.147, "dur": 3.949, + "args": { + "External id": 3295001,"Record function id": 0, "Ev Idx": 5400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057836.348, "dur": 2.354, + "args": { + "External id": 3295002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057837.046, "dur": 1.201, + "args": { + "External id": 3295003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057837.527, "dur": 0.643, + "args": { + "External id": 3295004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057842.082, "dur": 3.771, + "args": { + "External id": 3295005,"Record function id": 0, "Ev Idx": 5404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057843.296, "dur": 2.133, + "args": { + "External id": 3295006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057843.776, "dur": 1.239, + "args": { + "External id": 3295007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057844.056, "dur": 0.896, + "args": { + "External id": 3295008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057848.861, "dur": 6.231, + "args": { + "External id": 3295009,"Record function id": 0, "Ev Idx": 5408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057849.759, "dur": 4.929, + "args": { + "External id": 3295010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057850.217, "dur": 4.019, + "args": { + "External id": 3295011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057853.599, "dur": 0.564, + "args": { + "External id": 3295012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057858.525, "dur": 25.758, + "args": { + "External id": 3295013,"Record function id": 0, "Ev Idx": 5412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057859.552, "dur": 23.550, + "args": { + "External id": 3295014,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057860.704, "dur": 21.402, + "args": { + "External id": 3295015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057861.957, "dur": 19.717, + "args": { + "External id": 3295016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057889.830, "dur": 5.114, + "args": { + "External id": 3295017,"Record function id": 0, "Ev Idx": 5416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057891.303, "dur": 3.201, + "args": { + "External id": 3295018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057892.320, "dur": 1.738, + "args": { + "External id": 3295019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057892.924, "dur": 1.059, + "args": { + "External id": 3295020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057897.934, "dur": 4.081, + "args": { + "External id": 3295021,"Record function id": 0, "Ev Idx": 5420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597057899.132, "dur": 2.491, + "args": { + "External id": 3295022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057899.933, "dur": 1.271, + "args": { + "External id": 3295023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597057900.444, "dur": 0.687, + "args": { + "External id": 3295024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597057905.838, "dur": 39511.667, + "args": { + "External id": 3295025,"Record function id": 0, "Sequence number": 32987109, "Fwd thread id": 1, "Ev Idx": 5424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597057907.672, "dur": 39501.948, + "args": { + "External id": 3295026,"Sequence number": 32987109, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5425 + } + }, + { + "ph": "f", "id": 223, "pid": 1336756, "tid": 1381189, "ts": 1587597057907.672, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 1336756, "tid": 1381189, + "ts": 1587597057936.280, "dur": 40.435, + "args": { + "External id": 3295027,"Record function id": 0, "Ev Idx": 5426 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 1336756, "tid": 1381189, + "ts": 1587597058023.565, "dur": 66.153, + "args": { + "External id": 3295028,"Record function id": 0, "Ev Idx": 5427 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 1336756, "tid": 1381189, + "ts": 1587597058097.345, "dur": 39304.539, + "args": { + "External id": 3295029,"Record function id": 0, "Ev Idx": 5428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597058187.113, "dur": 6.609, + "args": { + "External id": 3295030,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597058204.170, "dur": 5.170, + "args": { + "External id": 3295031,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587597058224.439, "dur": 38296.159, + "args": { + "External id": 3295032,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587597058237.702, "dur": 38272.452, + "args": { + "External id": 3295033,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597058285.491, "dur": 13.713, + "args": { + "External id": 3295034,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587597058305.135, "dur": 38163.643, + "args": { + "External id": 3295035,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 5434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597058307.537, "dur": 38160.339, + "args": { + "External id": 3295036,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 5435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597058312.186, "dur": 4.618, + "args": { + "External id": 3295037,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587597058318.518, "dur": 38145.135, + "args": { + "External id": 3295038,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 5437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597096616.117, "dur": 10.149, + "args": { + "External id": 3295039,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 5438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597096619.103, "dur": 6.713, + "args": { + "External id": 3295040,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587597096659.124, "dur": 442.448, + "args": { + "External id": 3295041,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 5440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597096691.774, "dur": 404.813, + "args": { + "External id": 3295042,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5441, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587597096702.407, "dur": 388.120, + "args": { + "External id": 3295043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 5442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597097124.887, "dur": 3.000, + "args": { + "External id": 3295044,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5443, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597097193.600, "dur": 7.031, + "args": { + "External id": 3295045,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597097248.098, "dur": 1.627, + "args": { + "External id": 3295046,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597097266.322, "dur": 1.243, + "args": { + "External id": 3295047,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597097279.878, "dur": 1.062, + "args": { + "External id": 3295048,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597097293.509, "dur": 0.971, + "args": { + "External id": 3295049,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597097305.244, "dur": 0.673, + "args": { + "External id": 3295050,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597097319.045, "dur": 1.119, + "args": { + "External id": 3295051,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597097331.098, "dur": 1.536, + "args": { + "External id": 3295052,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597097342.375, "dur": 0.816, + "args": { + "External id": 3295053,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597097434.502, "dur": 2774.327, + "args": { + "External id": 3295054,"Record function id": 0, "Ev Idx": 5453 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 1336756, "tid": 1381189, + "ts": 1587597097454.676, "dur": 1053.499, + "args": { + "External id": 3295055,"Record function id": 0, "Ev Idx": 5454 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 1336756, "tid": 1381189, + "ts": 1587597097470.218, "dur": 317.964, + "args": { + "External id": 3295056,"Record function id": 0, "Ev Idx": 5455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597097552.037, "dur": 3.991, + "args": { + "External id": 3295057,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597097559.241, "dur": 0.776, + "args": { + "External id": 3295058,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597097561.608, "dur": 0.844, + "args": { + "External id": 3295059,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597097564.064, "dur": 3.416, + "args": { + "External id": 3295060,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597097569.098, "dur": 0.774, + "args": { + "External id": 3295061,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597097572.629, "dur": 1.201, + "args": { + "External id": 3295062,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597097576.155, "dur": 0.688, + "args": { + "External id": 3295063,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597097578.355, "dur": 0.663, + "args": { + "External id": 3295064,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597097580.775, "dur": 1.093, + "args": { + "External id": 3295065,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597097584.617, "dur": 1.146, + "args": { + "External id": 3295066,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597097604.090, "dur": 156.237, + "args": { + "External id": 3295067,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597097620.757, "dur": 134.898, + "args": { + "External id": 3295068,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597097642.075, "dur": 12.459, + "args": { + "External id": 3295069,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587597097657.811, "dur": 69.989, + "args": { + "External id": 3295070,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597097660.522, "dur": 66.884, + "args": { + "External id": 3295071,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597097664.748, "dur": 8.527, + "args": { + "External id": 3295072,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587597097675.068, "dur": 51.703, + "args": { + "External id": 3295073,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5472 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 1336756, "tid": 1381189, + "ts": 1587597097891.133, "dur": 609.518, + "args": { + "External id": 3295074,"Record function id": 0, "Ev Idx": 5473 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 1336756, "tid": 1381189, + "ts": 1587597097909.206, "dur": 578.877, + "args": { + "External id": 3295075,"Record function id": 0, "Ev Idx": 5474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597097976.778, "dur": 43.696, + "args": { + "External id": 3295076,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587597098045.964, "dur": 31.360, + "args": { + "External id": 3295077,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098051.060, "dur": 2.886, + "args": { + "External id": 3295078,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098056.411, "dur": 0.447, + "args": { + "External id": 3295079,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098058.591, "dur": 0.308, + "args": { + "External id": 3295080,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098061.701, "dur": 0.371, + "args": { + "External id": 3295081,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098063.353, "dur": 0.546, + "args": { + "External id": 3295082,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098065.665, "dur": 0.526, + "args": { + "External id": 3295083,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098068.460, "dur": 1.951, + "args": { + "External id": 3295084,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098071.624, "dur": 0.385, + "args": { + "External id": 3295085,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098073.054, "dur": 1.393, + "args": { + "External id": 3295086,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587597098088.769, "dur": 39.963, + "args": { + "External id": 3295087,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1381189, + "ts": 1587597098160.589, "dur": 103.517, + "args": { + "External id": 3295088,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597098171.174, "dur": 4.094, + "args": { + "External id": 3295089,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1381189, + "ts": 1587597098180.325, "dur": 9.684, + "args": { + "External id": 3295090,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1381189, + "ts": 1587597098184.499, "dur": 5.099, + "args": { + "External id": 3295091,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098187.798, "dur": 0.598, + "args": { + "External id": 3295092,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1381189, + "ts": 1587597098197.054, "dur": 25.871, + "args": { + "External id": 3295093,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098199.579, "dur": 0.314, + "args": { + "External id": 3295094,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098201.303, "dur": 0.170, + "args": { + "External id": 3295095,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098202.732, "dur": 1.147, + "args": { + "External id": 3295096,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098205.159, "dur": 2.047, + "args": { + "External id": 3295097,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098208.338, "dur": 0.530, + "args": { + "External id": 3295098,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098210.879, "dur": 0.706, + "args": { + "External id": 3295099,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098212.502, "dur": 0.576, + "args": { + "External id": 3295100,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098214.259, "dur": 0.568, + "args": { + "External id": 3295101,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597098216.027, "dur": 0.454, + "args": { + "External id": 3295102,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587597098232.356, "dur": 24.329, + "args": { + "External id": 3295103,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587597098312.701, "dur": 112.399, + "args": { + "External id": 3295104,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597098336.028, "dur": 85.787, + "args": { + "External id": 3295105,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5504, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1381189, + "ts": 1587597098345.105, "dur": 69.580, + "args": { + "External id": 3295106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597098439.953, "dur": 2.065, + "args": { + "External id": 3295107,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5506, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597098515.463, "dur": 1671.857, + "args": { + "External id": 3295108,"Sequence number": 32987108, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5507 + } + }, + { + "ph": "f", "id": 224, "pid": 1336756, "tid": 1381189, "ts": 1587597098515.463, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597098622.169, "dur": 100.575, + "args": { + "External id": 3295109,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587597098762.594, "dur": 40.791, + "args": { + "External id": 3295110,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 5509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587597098819.281, "dur": 72.879, + "args": { + "External id": 3295111,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 5510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597098906.774, "dur": 35.836, + "args": { + "External id": 3295112,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597098949.460, "dur": 84.328, + "args": { + "External id": 3295113,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597099047.102, "dur": 34.437, + "args": { + "External id": 3295114,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597099088.407, "dur": 43.702, + "args": { + "External id": 3295115,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587597099159.699, "dur": 26.971, + "args": { + "External id": 3295116,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 5515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587597099203.105, "dur": 29.727, + "args": { + "External id": 3295117,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587597099256.051, "dur": 20.325, + "args": { + "External id": 3295118,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587597099289.456, "dur": 18.339, + "args": { + "External id": 3295119,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597099316.569, "dur": 29.684, + "args": { + "External id": 3295120,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597099349.581, "dur": 33.331, + "args": { + "External id": 3295121,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587597099408.199, "dur": 166.657, + "args": { + "External id": 3295122,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597099484.649, "dur": 5.797, + "args": { + "External id": 3295123,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597099492.191, "dur": 1.890, + "args": { + "External id": 3295124,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587597099608.586, "dur": 24.745, + "args": { + "External id": 3295125,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587597099645.463, "dur": 15.358, + "args": { + "External id": 3295126,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597099671.621, "dur": 33.356, + "args": { + "External id": 3295127,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597099711.147, "dur": 35.976, + "args": { + "External id": 3295128,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597099753.861, "dur": 20.893, + "args": { + "External id": 3295129,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597099779.122, "dur": 28.714, + "args": { + "External id": 3295130,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597099813.453, "dur": 20.565, + "args": { + "External id": 3295131,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597099841.596, "dur": 50.846, + "args": { + "External id": 3295132,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587597099921.139, "dur": 22.936, + "args": { + "External id": 3295133,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 5532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587597099977.930, "dur": 70.283, + "args": { + "External id": 3295134,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587597100070.141, "dur": 21.063, + "args": { + "External id": 3295135,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587597100108.379, "dur": 16.175, + "args": { + "External id": 3295136,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587597100138.461, "dur": 20.556, + "args": { + "External id": 3295137,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 5536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100231.527, "dur": 15.364, + "args": { + "External id": 3295138,"Record function id": 0, "Ev Idx": 5537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100234.776, "dur": 11.149, + "args": { + "External id": 3295139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100239.073, "dur": 5.950, + "args": { + "External id": 3295140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100240.964, "dur": 3.974, + "args": { + "External id": 3295141,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100250.924, "dur": 4.807, + "args": { + "External id": 3295142,"Record function id": 0, "Ev Idx": 5541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100252.317, "dur": 2.975, + "args": { + "External id": 3295143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100252.966, "dur": 1.797, + "args": { + "External id": 3295144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100253.498, "dur": 1.178, + "args": { + "External id": 3295145,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100259.040, "dur": 4.785, + "args": { + "External id": 3295146,"Record function id": 0, "Ev Idx": 5545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100260.347, "dur": 3.066, + "args": { + "External id": 3295147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100261.125, "dur": 1.807, + "args": { + "External id": 3295148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100261.955, "dur": 0.883, + "args": { + "External id": 3295149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100267.063, "dur": 3.592, + "args": { + "External id": 3295150,"Record function id": 0, "Ev Idx": 5549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100268.329, "dur": 1.898, + "args": { + "External id": 3295151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100268.851, "dur": 0.946, + "args": { + "External id": 3295152,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100269.157, "dur": 0.548, + "args": { + "External id": 3295153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100273.648, "dur": 5.060, + "args": { + "External id": 3295154,"Record function id": 0, "Ev Idx": 5553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100274.727, "dur": 3.571, + "args": { + "External id": 3295155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100275.222, "dur": 2.650, + "args": { + "External id": 3295156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100275.477, "dur": 2.329, + "args": { + "External id": 3295157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100281.663, "dur": 3.991, + "args": { + "External id": 3295158,"Record function id": 0, "Ev Idx": 5557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100282.744, "dur": 2.482, + "args": { + "External id": 3295159,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100283.179, "dur": 1.607, + "args": { + "External id": 3295160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100283.885, "dur": 0.830, + "args": { + "External id": 3295161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100288.759, "dur": 3.873, + "args": { + "External id": 3295162,"Record function id": 0, "Ev Idx": 5561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100289.932, "dur": 2.311, + "args": { + "External id": 3295163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100290.737, "dur": 1.102, + "args": { + "External id": 3295164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100291.019, "dur": 0.746, + "args": { + "External id": 3295165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100295.621, "dur": 3.392, + "args": { + "External id": 3295166,"Record function id": 0, "Ev Idx": 5565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100296.650, "dur": 1.946, + "args": { + "External id": 3295167,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100297.075, "dur": 1.108, + "args": { + "External id": 3295168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100297.638, "dur": 0.472, + "args": { + "External id": 3295169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100301.935, "dur": 5.402, + "args": { + "External id": 3295170,"Record function id": 0, "Ev Idx": 5569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597100303.166, "dur": 3.780, + "args": { + "External id": 3295171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100303.935, "dur": 2.541, + "args": { + "External id": 3295172,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597100305.717, "dur": 0.635, + "args": { + "External id": 3295173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597100310.986, "dur": 36125.982, + "args": { + "External id": 3295174,"Record function id": 0, "Sequence number": 32987107, "Fwd thread id": 1, "Ev Idx": 5573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597100312.862, "dur": 36115.877, + "args": { + "External id": 3295175,"Sequence number": 32987107, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5574 + } + }, + { + "ph": "f", "id": 225, "pid": 1336756, "tid": 1381189, "ts": 1587597100312.862, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 1336756, "tid": 1381189, + "ts": 1587597100339.705, "dur": 36.424, + "args": { + "External id": 3295176,"Record function id": 0, "Ev Idx": 5575 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 1336756, "tid": 1381189, + "ts": 1587597100383.543, "dur": 63.441, + "args": { + "External id": 3295177,"Record function id": 0, "Ev Idx": 5576 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 1336756, "tid": 1381189, + "ts": 1587597100452.901, "dur": 35967.739, + "args": { + "External id": 3295178,"Record function id": 0, "Ev Idx": 5577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597100536.800, "dur": 5.978, + "args": { + "External id": 3295179,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597100552.311, "dur": 4.799, + "args": { + "External id": 3295180,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587597100572.140, "dur": 34995.935, + "args": { + "External id": 3295181,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587597100584.568, "dur": 34973.278, + "args": { + "External id": 3295182,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597100628.986, "dur": 13.177, + "args": { + "External id": 3295183,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587597100649.799, "dur": 34868.998, + "args": { + "External id": 3295184,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 5583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597100652.508, "dur": 34865.487, + "args": { + "External id": 3295185,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 5584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597100656.579, "dur": 4.460, + "args": { + "External id": 3295186,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587597100662.573, "dur": 34851.276, + "args": { + "External id": 3295187,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 5586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597135662.991, "dur": 10.145, + "args": { + "External id": 3295188,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 5587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597135666.244, "dur": 6.438, + "args": { + "External id": 3295189,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587597135703.666, "dur": 398.980, + "args": { + "External id": 3295190,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 5589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597135731.196, "dur": 366.386, + "args": { + "External id": 3295191,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5590, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587597135742.802, "dur": 349.000, + "args": { + "External id": 3295192,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 5591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597136126.244, "dur": 2.774, + "args": { + "External id": 3295193,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5592, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597136190.612, "dur": 6.653, + "args": { + "External id": 3295194,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597136242.556, "dur": 1.493, + "args": { + "External id": 3295195,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597136259.437, "dur": 1.520, + "args": { + "External id": 3295196,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597136272.931, "dur": 1.170, + "args": { + "External id": 3295197,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597136304.113, "dur": 1.514, + "args": { + "External id": 3295198,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597136323.329, "dur": 0.771, + "args": { + "External id": 3295199,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597136336.113, "dur": 0.766, + "args": { + "External id": 3295200,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597136347.545, "dur": 1.079, + "args": { + "External id": 3295201,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597136359.494, "dur": 0.872, + "args": { + "External id": 3295202,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597136452.408, "dur": 2159.083, + "args": { + "External id": 3295203,"Record function id": 0, "Ev Idx": 5602 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 1336756, "tid": 1381189, + "ts": 1587597136471.753, "dur": 460.350, + "args": { + "External id": 3295204,"Record function id": 0, "Ev Idx": 5603 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 1336756, "tid": 1381189, + "ts": 1587597136486.368, "dur": 318.732, + "args": { + "External id": 3295205,"Record function id": 0, "Ev Idx": 5604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597136574.045, "dur": 3.772, + "args": { + "External id": 3295206,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597136580.891, "dur": 0.679, + "args": { + "External id": 3295207,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597136583.204, "dur": 0.993, + "args": { + "External id": 3295208,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597136586.338, "dur": 0.731, + "args": { + "External id": 3295209,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597136589.959, "dur": 0.732, + "args": { + "External id": 3295210,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597136592.563, "dur": 1.090, + "args": { + "External id": 3295211,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597136595.085, "dur": 1.012, + "args": { + "External id": 3295212,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597136597.413, "dur": 2.329, + "args": { + "External id": 3295213,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597136602.304, "dur": 1.030, + "args": { + "External id": 3295214,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597136604.811, "dur": 0.860, + "args": { + "External id": 3295215,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597136624.212, "dur": 153.957, + "args": { + "External id": 3295216,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597136643.550, "dur": 130.414, + "args": { + "External id": 3295217,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597136659.418, "dur": 12.230, + "args": { + "External id": 3295218,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587597136674.810, "dur": 67.980, + "args": { + "External id": 3295219,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597136678.798, "dur": 63.686, + "args": { + "External id": 3295220,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597136682.486, "dur": 5.978, + "args": { + "External id": 3295221,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587597136690.115, "dur": 51.614, + "args": { + "External id": 3295222,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597136940.702, "dur": 1649.844, + "args": { + "External id": 3295223,"Sequence number": 32987106, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5622 + } + }, + { + "ph": "f", "id": 226, "pid": 1336756, "tid": 1381189, "ts": 1587597136940.702, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597137096.415, "dur": 109.744, + "args": { + "External id": 3295224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 1336756, "tid": 1381189, + "ts": 1587597137255.763, "dur": 39.423, + "args": { + "External id": 3295225,"kernel_hash": "c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/2z/c2zszzsomz26dv2qwvmiurvgyrl5qxtyn6ocoqkdpzzhfzwqp5q6.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 5624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 1336756, "tid": 1381189, + "ts": 1587597137311.209, "dur": 50.201, + "args": { + "External id": 3295226,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 5625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597137371.981, "dur": 32.020, + "args": { + "External id": 3295227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597137410.045, "dur": 46.599, + "args": { + "External id": 3295228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597137464.752, "dur": 27.318, + "args": { + "External id": 3295229,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 5628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597137501.777, "dur": 41.630, + "args": { + "External id": 3295230,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 1336756, "tid": 1381189, + "ts": 1587597137570.075, "dur": 23.349, + "args": { + "External id": 3295231,"kernel_hash": "cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jd/cjd6g3jfgntyoifzno6l2urmnhif7alxtw6zz7miov66vywxwv7d.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 5630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 1336756, "tid": 1381189, + "ts": 1587597137610.452, "dur": 34.917, + "args": { + "External id": 3295232,"kernel_hash": "cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/xh/cxhg545cyph4gbk5vymzyh72vho7oogk3ycan2lp4cxntklp5hew.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587597137691.360, "dur": 19.962, + "args": { + "External id": 3295233,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587597137727.027, "dur": 17.743, + "args": { + "External id": 3295234,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597137752.832, "dur": 29.060, + "args": { + "External id": 3295235,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597137785.107, "dur": 32.653, + "args": { + "External id": 3295236,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 1336756, "tid": 1381189, + "ts": 1587597137842.984, "dur": 225.882, + "args": { + "External id": 3295237,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597137935.235, "dur": 6.265, + "args": { + "External id": 3295238,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597137943.718, "dur": 1.943, + "args": { + "External id": 3295239,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587597138107.013, "dur": 27.777, + "args": { + "External id": 3295240,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1381189, + "ts": 1587597138147.779, "dur": 15.643, + "args": { + "External id": 3295241,"kernel_hash": "cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/b6/cb6oumcqb462ndf2ivcd2sc6qg6mfjahptjw4ih7cesqhfpziv3b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597138172.118, "dur": 41.711, + "args": { + "External id": 3295242,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597138221.304, "dur": 35.133, + "args": { + "External id": 3295243,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597138262.812, "dur": 21.740, + "args": { + "External id": 3295244,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597138289.445, "dur": 29.321, + "args": { + "External id": 3295245,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597138324.582, "dur": 21.594, + "args": { + "External id": 3295246,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 5645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1381189, + "ts": 1587597138354.268, "dur": 34.778, + "args": { + "External id": 3295247,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 1336756, "tid": 1381189, + "ts": 1587597138407.182, "dur": 22.215, + "args": { + "External id": 3295248,"kernel_hash": "c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7p/c7pqhdjojzeazwsh5g3ibnyk4m2nfh3q73e3zc7hljz4yvhe7c4h.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 5647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 1336756, "tid": 1381189, + "ts": 1587597138448.782, "dur": 23.832, + "args": { + "External id": 3295249,"kernel_hash": "cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vg/cvgvwkysjpcw75q5t3guce5bep2so25f7i5zxc6ipt5f56shof73.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 1336756, "tid": 1381189, + "ts": 1587597138487.580, "dur": 17.499, + "args": { + "External id": 3295250,"kernel_hash": "cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/u4/cu42i6zmxtvs2i7cs6x4ixaexke3lhgdiaye5oxwm7s3u6em6co4.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 5649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 1336756, "tid": 1381189, + "ts": 1587597138518.985, "dur": 16.104, + "args": { + "External id": 3295251,"kernel_hash": "ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/ki/ckihofrorhmzazt2wfn2dz2m5wfx3auisu2gvfo2jsepr4zyekmq.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 5650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 1336756, "tid": 1381189, + "ts": 1587597138547.821, "dur": 16.808, + "args": { + "External id": 3295252,"kernel_hash": "cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ck/cckrpn5uuxyvbancoio3sz2onrjto236n54xx4ycphq5p6r3jlfj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 5651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138633.098, "dur": 14.976, + "args": { + "External id": 3295253,"Record function id": 0, "Ev Idx": 5652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138636.444, "dur": 10.804, + "args": { + "External id": 3295254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138640.677, "dur": 5.722, + "args": { + "External id": 3295255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138642.450, "dur": 3.818, + "args": { + "External id": 3295256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138651.738, "dur": 5.180, + "args": { + "External id": 3295257,"Record function id": 0, "Ev Idx": 5656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138653.454, "dur": 3.007, + "args": { + "External id": 3295258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138654.258, "dur": 1.654, + "args": { + "External id": 3295259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138654.783, "dur": 1.057, + "args": { + "External id": 3295260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138660.060, "dur": 4.760, + "args": { + "External id": 3295261,"Record function id": 0, "Ev Idx": 5660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138662.032, "dur": 2.382, + "args": { + "External id": 3295262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138662.487, "dur": 1.415, + "args": { + "External id": 3295263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138663.003, "dur": 0.830, + "args": { + "External id": 3295264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 5663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138667.973, "dur": 5.101, + "args": { + "External id": 3295265,"Record function id": 0, "Ev Idx": 5664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138669.659, "dur": 2.957, + "args": { + "External id": 3295266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138670.579, "dur": 1.570, + "args": { + "External id": 3295267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138671.310, "dur": 0.712, + "args": { + "External id": 3295268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 5667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138676.202, "dur": 8.489, + "args": { + "External id": 3295269,"Record function id": 0, "Ev Idx": 5668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138677.659, "dur": 6.615, + "args": { + "External id": 3295270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138678.126, "dur": 5.680, + "args": { + "External id": 3295271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138678.433, "dur": 5.301, + "args": { + "External id": 3295272,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138687.892, "dur": 4.438, + "args": { + "External id": 3295273,"Record function id": 0, "Ev Idx": 5672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138689.466, "dur": 2.412, + "args": { + "External id": 3295274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138689.972, "dur": 1.465, + "args": { + "External id": 3295275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138690.432, "dur": 0.899, + "args": { + "External id": 3295276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138695.543, "dur": 4.406, + "args": { + "External id": 3295277,"Record function id": 0, "Ev Idx": 5676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138696.963, "dur": 2.571, + "args": { + "External id": 3295278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138697.659, "dur": 1.435, + "args": { + "External id": 3295279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138698.118, "dur": 0.866, + "args": { + "External id": 3295280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138702.977, "dur": 4.618, + "args": { + "External id": 3295281,"Record function id": 0, "Ev Idx": 5680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138704.385, "dur": 2.809, + "args": { + "External id": 3295282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138705.063, "dur": 1.696, + "args": { + "External id": 3295283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138705.773, "dur": 0.913, + "args": { + "External id": 3295284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 5683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138710.549, "dur": 3.984, + "args": { + "External id": 3295285,"Record function id": 0, "Ev Idx": 5684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597138711.677, "dur": 2.448, + "args": { + "External id": 3295286,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138712.671, "dur": 1.019, + "args": { + "External id": 3295287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597138712.932, "dur": 0.678, + "args": { + "External id": 3295288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 5687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597138717.974, "dur": 36671.553, + "args": { + "External id": 3295289,"Record function id": 0, "Sequence number": 32987105, "Fwd thread id": 1, "Ev Idx": 5688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597138719.107, "dur": 36662.460, + "args": { + "External id": 3295290,"Sequence number": 32987105, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5689 + } + }, + { + "ph": "f", "id": 227, "pid": 1336756, "tid": 1381189, "ts": 1587597138719.107, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 1336756, "tid": 1381189, + "ts": 1587597138748.881, "dur": 34.901, + "args": { + "External id": 3295291,"Record function id": 0, "Ev Idx": 5690 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 1336756, "tid": 1381189, + "ts": 1587597138790.786, "dur": 92.332, + "args": { + "External id": 3295292,"Record function id": 0, "Ev Idx": 5691 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 1336756, "tid": 1381189, + "ts": 1587597138892.891, "dur": 36481.430, + "args": { + "External id": 3295293,"Record function id": 0, "Ev Idx": 5692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597139030.089, "dur": 9.942, + "args": { + "External id": 3295294,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597139051.538, "dur": 5.245, + "args": { + "External id": 3295295,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587597139072.517, "dur": 35503.165, + "args": { + "External id": 3295296,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587597139090.847, "dur": 35476.943, + "args": { + "External id": 3295297,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 5696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597139134.882, "dur": 16.816, + "args": { + "External id": 3295298,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587597139157.714, "dur": 35374.300, + "args": { + "External id": 3295299,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 5698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597139160.046, "dur": 35371.403, + "args": { + "External id": 3295300,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 5699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597139164.130, "dur": 5.268, + "args": { + "External id": 3295301,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587597139170.901, "dur": 35357.628, + "args": { + "External id": 3295302,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 5701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597174658.549, "dur": 8.605, + "args": { + "External id": 3295303,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 5702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597174661.156, "dur": 5.624, + "args": { + "External id": 3295304,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587597174694.876, "dur": 392.888, + "args": { + "External id": 3295305,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 5704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597174720.968, "dur": 361.559, + "args": { + "External id": 3295306,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5705, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587597174731.930, "dur": 344.787, + "args": { + "External id": 3295307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 5706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597175108.553, "dur": 2.436, + "args": { + "External id": 3295308,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5707, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597175171.186, "dur": 6.399, + "args": { + "External id": 3295309,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597175223.274, "dur": 1.498, + "args": { + "External id": 3295310,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597175239.980, "dur": 1.256, + "args": { + "External id": 3295311,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597175253.602, "dur": 0.759, + "args": { + "External id": 3295312,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597175266.067, "dur": 0.902, + "args": { + "External id": 3295313,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597175278.017, "dur": 1.079, + "args": { + "External id": 3295314,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597175289.469, "dur": 0.814, + "args": { + "External id": 3295315,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597175301.662, "dur": 0.846, + "args": { + "External id": 3295316,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597175312.416, "dur": 0.838, + "args": { + "External id": 3295317,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597175403.792, "dur": 270.120, + "args": { + "External id": 3295318,"Record function id": 0, "Sequence number": 32987104, "Fwd thread id": 1, "Ev Idx": 5717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 1336756, "tid": 1381189, + "ts": 1587597175406.407, "dur": 259.670, + "args": { + "External id": 3295319,"Sequence number": 32987104, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5718 + } + }, + { + "ph": "f", "id": 228, "pid": 1336756, "tid": 1381189, "ts": 1587597175406.407, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 1336756, "tid": 1381189, + "ts": 1587597175522.229, "dur": 44.291, + "args": { + "External id": 3295320,"kernel_hash": "cqirzqrltfdqnxhjos6hp6gemkm4gwku4foo6uqui7opbq4ng4m5", "grid": "grid(65536000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "65536000"], "kernel_file": "/tmp/torchinductor_cvm/qi/cqirzqrltfdqnxhjos6hp6gemkm4gwku4foo6uqui7opbq4ng4m5.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 5719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 1336756, "tid": 1381189, + "ts": 1587597175582.022, "dur": 28.956, + "args": { + "External id": 3295321,"kernel_hash": "cjxnzfkkndlwdapjvgkkjvqxc72zinfqxyyoxiyd5swlefndj6n5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/jx/cjxnzfkkndlwdapjvgkkjvqxc72zinfqxyyoxiyd5swlefndj6n5.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096], [16, 4096, 2048], [32000, 2048], []], "Ev Idx": 5720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 1336756, "tid": 1381189, + "ts": 1587597175627.544, "dur": 24.089, + "args": { + "External id": 3295322,"kernel_hash": "cycx64w3vzu6rbiohboydiqd4wfbj7vazwzbvujyhnlqv6l7yuhz", "grid": "grid(65536000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "65536000"], "kernel_file": "/tmp/torchinductor_cvm/yc/cycx64w3vzu6rbiohboydiqd4wfbj7vazwzbvujyhnlqv6l7yuhz.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 5721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597175683.184, "dur": 15.223, + "args": { + "External id": 3295323,"Record function id": 0, "Ev Idx": 5722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 1336756, "tid": 1381189, + "ts": 1587597175686.912, "dur": 10.633, + "args": { + "External id": 3295324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 5723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597175690.958, "dur": 5.707, + "args": { + "External id": 3295325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 5724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1381189, + "ts": 1587597175692.475, "dur": 4.100, + "args": { + "External id": 3295326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 5725 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 1336756, "tid": 1381189, + "ts": 1587597175718.856, "dur": 10854.476, + "args": { + "External id": 3295327,"Record function id": 0, "Ev Idx": 5726 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 1336756, "tid": 1381189, + "ts": 1587597175738.917, "dur": 37.582, + "args": { + "External id": 3295328,"Record function id": 0, "Ev Idx": 5727 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 1336756, "tid": 1381189, + "ts": 1587597175782.529, "dur": 323.480, + "args": { + "External id": 3295329,"Record function id": 0, "Ev Idx": 5728 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 1336756, "tid": 1381189, + "ts": 1587597176114.630, "dur": 10196.412, + "args": { + "External id": 3295330,"Record function id": 0, "Ev Idx": 5729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597176237.483, "dur": 6.918, + "args": { + "External id": 3295331,"Record function id": 0, "Concrete Inputs": ["[336611328]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1381189, + "ts": 1587597176254.790, "dur": 4.820, + "args": { + "External id": 3295332,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[336611328], []], "Ev Idx": 5731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587597176280.387, "dur": 8950.105, + "args": { + "External id": 3295333,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [42076416, 1]], "Input Dims": [[], [], [], [8, 42076416]], "Ev Idx": 5732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 1336756, "tid": 1381189, + "ts": 1587597176302.085, "dur": 8915.204, + "args": { + "External id": 3295334,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [42076416, 1]], "Input Dims": [[], [], [], [8, 42076416]], "Ev Idx": 5733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597176648.659, "dur": 18.399, + "args": { + "External id": 3295335,"Record function id": 0, "Concrete Inputs": ["[82421]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1381189, + "ts": 1587597176759.693, "dur": 8406.856, + "args": { + "External id": 3295336,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[82421], [], [], [], [], [], [], []], "Ev Idx": 5735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1381189, + "ts": 1587597176762.481, "dur": 8403.061, + "args": { + "External id": 3295337,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[82421], [], [], [], [], [], []], "Ev Idx": 5736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597176767.872, "dur": 10.605, + "args": { + "External id": 3295338,"Record function id": 0, "Concrete Inputs": ["[82421]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1381189, + "ts": 1587597176780.215, "dur": 8379.754, + "args": { + "External id": 3295339,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[82421], [82421], []], "Ev Idx": 5738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597185358.014, "dur": 11.066, + "args": { + "External id": 3295340,"Record function id": 0, "Concrete Inputs": ["", "[42076416]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[336611328], [], [], [], [], []], "Ev Idx": 5739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1381189, + "ts": 1587597185361.818, "dur": 6.862, + "args": { + "External id": 3295341,"Record function id": 0, "Concrete Inputs": ["[42076416]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 1336756, "tid": 1381189, + "ts": 1587597185396.597, "dur": 239.917, + "args": { + "External id": 3295342,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[42076416], [336611328], [], [], [], []], "Ev Idx": 5741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597185420.680, "dur": 211.988, + "args": { + "External id": 3295343,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 42076416, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[336611328], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5742, "In msg nelems": 336611328 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 1336756, "tid": 1381189, + "ts": 1587597185435.203, "dur": 192.187, + "args": { + "External id": 3295344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[336611328]], "Ev Idx": 5743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1381189, + "ts": 1587597185651.586, "dur": 2.311, + "args": { + "External id": 3295345,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5744, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185700.929, "dur": 6.372, + "args": { + "External id": 3295346,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185749.103, "dur": 1.588, + "args": { + "External id": 3295347,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "8192000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185765.105, "dur": 1.254, + "args": { + "External id": 3295348,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "8192256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185780.271, "dur": 1.093, + "args": { + "External id": 3295349,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "8716544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185792.898, "dur": 1.161, + "args": { + "External id": 3295350,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "9240832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185804.101, "dur": 1.080, + "args": { + "External id": 3295351,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "9765120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185817.406, "dur": 1.135, + "args": { + "External id": 3295352,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "10289408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185830.646, "dur": 1.216, + "args": { + "External id": 3295353,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "10289664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185842.168, "dur": 0.938, + "args": { + "External id": 3295354,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "11731456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185853.051, "dur": 0.648, + "args": { + "External id": 3295355,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "13173248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185882.937, "dur": 1.819, + "args": { + "External id": 3295356,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14615040"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185901.038, "dur": 1.038, + "args": { + "External id": 3295357,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "14615296"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185911.839, "dur": 0.743, + "args": { + "External id": 3295358,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "15139584"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185922.339, "dur": 0.755, + "args": { + "External id": 3295359,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "15663872"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185932.158, "dur": 1.024, + "args": { + "External id": 3295360,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "16188160"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185943.888, "dur": 1.404, + "args": { + "External id": 3295361,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "16712448"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185954.651, "dur": 1.326, + "args": { + "External id": 3295362,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "16712704"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185965.839, "dur": 1.413, + "args": { + "External id": 3295363,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "18154496"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597185978.143, "dur": 1.137, + "args": { + "External id": 3295364,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "19596288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186025.102, "dur": 1.716, + "args": { + "External id": 3295365,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21038080"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186038.071, "dur": 1.377, + "args": { + "External id": 3295366,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "21038336"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186049.655, "dur": 1.342, + "args": { + "External id": 3295367,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "21562624"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186061.938, "dur": 1.485, + "args": { + "External id": 3295368,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "22086912"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186074.963, "dur": 1.806, + "args": { + "External id": 3295369,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "22611200"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186086.393, "dur": 1.438, + "args": { + "External id": 3295370,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "23135488"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186097.231, "dur": 1.326, + "args": { + "External id": 3295371,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "23135744"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186109.229, "dur": 1.162, + "args": { + "External id": 3295372,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "24577536"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186120.873, "dur": 1.608, + "args": { + "External id": 3295373,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "26019328"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186131.961, "dur": 1.450, + "args": { + "External id": 3295374,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "27461120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186143.954, "dur": 1.039, + "args": { + "External id": 3295375,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "27461376"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186156.351, "dur": 1.473, + "args": { + "External id": 3295376,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "27985664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186168.961, "dur": 1.608, + "args": { + "External id": 3295377,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "28509952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186180.219, "dur": 1.471, + "args": { + "External id": 3295378,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "29034240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186190.375, "dur": 1.587, + "args": { + "External id": 3295379,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "29558528"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186202.861, "dur": 1.342, + "args": { + "External id": 3295380,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "29558784"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186215.243, "dur": 1.173, + "args": { + "External id": 3295381,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "31000576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186225.961, "dur": 1.269, + "args": { + "External id": 3295382,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "32442368"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186237.214, "dur": 1.510, + "args": { + "External id": 3295383,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "33884160"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1381189, + "ts": 1587597186248.387, "dur": 1.278, + "args": { + "External id": 3295384,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "33884416"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5783 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "ProfilerStep#137215", "pid": 1336756, "tid": 1336756, + "ts": 1587595083552.063, "dur": 2120910.193, + "args": { + "External id": 3284481,"Record function id": 0, "Ev Idx": 5784 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.zero_grad#AdamW.zero_grad", "pid": 1336756, "tid": 1336756, + "ts": 1587595083587.450, "dur": 612.951, + "args": { + "External id": 3284482,"Record function id": 0, "Ev Idx": 5785 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 1336756, "tid": 1336756, + "ts": 1587595084248.619, "dur": 82.363, + "args": { + "External id": 3284483,"Record function id": 0, "Ev Idx": 5786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595085112.937, "dur": 18.127, + "args": { + "External id": 3284484,"Sequence number": 32987104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 5787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595085124.427, "dur": 2.780, + "args": { + "External id": 3284485,"Record function id": 0, "Concrete Inputs": ["", "[16, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 5788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595085133.103, "dur": 4.273, + "args": { + "External id": 3284486,"Sequence number": 32987104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 5789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595085135.357, "dur": 0.872, + "args": { + "External id": 3284487,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 5790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595085163.987, "dur": 538.449, + "args": { + "External id": 3284488,"Sequence number": 32987104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], [], []], "Ev Idx": 5791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595085170.920, "dur": 530.942, + "args": { + "External id": 3284489,"Sequence number": 32987104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], []], "Ev Idx": 5792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595085180.381, "dur": 9.736, + "args": { + "External id": 3284490,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595085192.731, "dur": 507.533, + "args": { + "External id": 3284491,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 5794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595085202.512, "dur": 0.538, + "args": { + "External id": 3284492,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 5795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595085206.573, "dur": 8.869, + "args": { + "External id": 3284493,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[16, 4096], [16, 4096]], "Ev Idx": 5796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 1336756, "tid": 1336756, + "ts": 1587595085211.778, "dur": 3.525, + "args": { + "External id": 3284494,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[16, 4096], [], []], "Ev Idx": 5797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595085214.530, "dur": 0.531, + "args": { + "External id": 3284495,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 5798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336756, "tid": 1336756, + "ts": 1587595085217.297, "dur": 242.102, + "args": { + "External id": 3284496,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 5799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1336756, + "ts": 1587595085220.477, "dur": 238.179, + "args": { + "External id": 3284497,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 5800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595085223.617, "dur": 12.620, + "args": { + "External id": 3284498,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 5801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595085231.030, "dur": 4.762, + "args": { + "External id": 3284499,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595085237.322, "dur": 220.618, + "args": { + "External id": 3284500,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 5803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595085464.094, "dur": 233.345, + "args": { + "External id": 3284501,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 5804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595085713.198, "dur": 635.100, + "args": { + "External id": 3284502,"Sequence number": 32987104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], [], []], "Ev Idx": 5805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595085714.577, "dur": 632.912, + "args": { + "External id": 3284503,"Sequence number": 32987104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], []], "Ev Idx": 5806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595085718.083, "dur": 7.156, + "args": { + "External id": 3284504,"Record function id": 0, "Concrete Inputs": ["[16, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595085726.210, "dur": 616.253, + "args": { + "External id": 3284505,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[16, 8192], [16, 8192], []], "Ev Idx": 5808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 1336756, "tid": 1336756, + "ts": 1587595086379.059, "dur": 58.442, + "args": { + "External id": 3284506,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595086384.539, "dur": 5.913, + "args": { + "External id": 3284507,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 1336756, "tid": 1336756, + "ts": 1587595086393.569, "dur": 43.512, + "args": { + "External id": 3284508,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 5811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595086400.959, "dur": 7.337, + "args": { + "External id": 3284509,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 5812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 1336756, "tid": 1336756, + "ts": 1587595086447.883, "dur": 73.353, + "args": { + "External id": 3284510,"Sequence number": 32987104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 1336756, "tid": 1336756, + "ts": 1587595086453.422, "dur": 8.032, + "args": { + "External id": 3284511,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 5814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595086459.345, "dur": 1.767, + "args": { + "External id": 3284512,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 5815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595086465.138, "dur": 3.393, + "args": { + "External id": 3284513,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1336756, + "ts": 1587595086471.028, "dur": 3.357, + "args": { + "External id": 3284514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[16, 4096]], "Ev Idx": 5817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 1336756, "tid": 1336756, + "ts": 1587595086477.347, "dur": 7.440, + "args": { + "External id": 3284515,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 5818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595086483.901, "dur": 0.696, + "args": { + "External id": 3284516,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 5819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 1336756, "tid": 1336756, + "ts": 1587595086485.969, "dur": 2.252, + "args": { + "External id": 3284517,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[16, 4096, 1], [], [], []], "Ev Idx": 5820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595086487.413, "dur": 0.721, + "args": { + "External id": 3284518,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[16, 4096, 1], [], [], []], "Ev Idx": 5821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595086489.975, "dur": 4.221, + "args": { + "External id": 3284519,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [16, 1, 1, 4096]], "Ev Idx": 5822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 1336756, "tid": 1336756, + "ts": 1587595086491.398, "dur": 2.678, + "args": { + "External id": 3284520,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 5823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595086493.321, "dur": 0.669, + "args": { + "External id": 3284521,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 5824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595086495.232, "dur": 25.407, + "args": { + "External id": 3284522,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[16, 1, 1, 4096], [16, 1, 1, 4096], []], "Ev Idx": 5825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595086529.297, "dur": 34.198, + "args": { + "External id": 3284523,"Sequence number": 32987104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 5826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595086533.947, "dur": 29.372, + "args": { + "External id": 3284524,"Sequence number": 32987104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], []], "Ev Idx": 5827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595086539.263, "dur": 3.358, + "args": { + "External id": 3284525,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595086543.597, "dur": 18.986, + "args": { + "External id": 3284526,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 5829 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595086671.238, "dur": 141.964, + "args": { + "External id": 3284527,"Record function id": 0, "Ev Idx": 5830 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 1336756, "tid": 1336756, + "ts": 1587595086743.478, "dur": 58.895, + "args": { + "External id": 3284528,"Record function id": 0, "Ev Idx": 5831 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595086819.708, "dur": 40.085, + "args": { + "External id": 3284529,"Record function id": 0, "Ev Idx": 5832 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595086871.211, "dur": 11604.406, + "args": { + "External id": 3284530,"Record function id": 0, "Ev Idx": 5833 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 1336756, "tid": 1336756, + "ts": 1587595086898.285, "dur": 1312.876, + "args": { + "External id": 3284531,"Record function id": 0, "Ev Idx": 5834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595087039.679, "dur": 8.434, + "args": { + "External id": 3284532,"Record function id": 0, "Concrete Inputs": ["[42076416]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595087067.370, "dur": 138.082, + "args": { + "External id": 3284533,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[42076416], [], []], "Ev Idx": 5836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087072.201, "dur": 1.844, + "args": { + "External id": 3284534,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087078.310, "dur": 2.562, + "args": { + "External id": 3284535,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "8192000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087081.675, "dur": 0.321, + "args": { + "External id": 3284536,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "8192256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087082.766, "dur": 1.540, + "args": { + "External id": 3284537,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "8716544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087089.812, "dur": 0.277, + "args": { + "External id": 3284538,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "9240832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087090.802, "dur": 0.389, + "args": { + "External id": 3284539,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "9765120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087094.220, "dur": 0.418, + "args": { + "External id": 3284540,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "10289408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087098.036, "dur": 0.536, + "args": { + "External id": 3284541,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "10289664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087099.414, "dur": 0.214, + "args": { + "External id": 3284542,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "11731456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087102.971, "dur": 2.258, + "args": { + "External id": 3284543,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "13173248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087106.076, "dur": 0.567, + "args": { + "External id": 3284544,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "14615040"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087107.398, "dur": 1.240, + "args": { + "External id": 3284545,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "14615296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087114.314, "dur": 0.180, + "args": { + "External id": 3284546,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "15139584"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087115.198, "dur": 0.389, + "args": { + "External id": 3284547,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "15663872"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087118.257, "dur": 0.354, + "args": { + "External id": 3284548,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "16188160"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087121.643, "dur": 0.385, + "args": { + "External id": 3284549,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "16712448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087122.853, "dur": 0.336, + "args": { + "External id": 3284550,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "16712704"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087126.108, "dur": 2.032, + "args": { + "External id": 3284551,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "18154496"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087129.311, "dur": 0.325, + "args": { + "External id": 3284552,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "19596288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087130.461, "dur": 1.219, + "args": { + "External id": 3284553,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21038080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087137.383, "dur": 0.139, + "args": { + "External id": 3284554,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "21038336"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087137.948, "dur": 0.655, + "args": { + "External id": 3284555,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "21562624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087141.466, "dur": 0.392, + "args": { + "External id": 3284556,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "22086912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087145.208, "dur": 0.162, + "args": { + "External id": 3284557,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "22611200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087146.371, "dur": 0.399, + "args": { + "External id": 3284558,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "23135488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087149.702, "dur": 2.330, + "args": { + "External id": 3284559,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "23135744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087153.088, "dur": 0.165, + "args": { + "External id": 3284560,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24577536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087153.879, "dur": 1.315, + "args": { + "External id": 3284561,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "26019328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087161.242, "dur": 0.337, + "args": { + "External id": 3284562,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "27461120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087162.222, "dur": 0.160, + "args": { + "External id": 3284563,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "27461376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087165.259, "dur": 0.492, + "args": { + "External id": 3284564,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "27985664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087168.959, "dur": 0.146, + "args": { + "External id": 3284565,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "28509952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087169.917, "dur": 0.148, + "args": { + "External id": 3284566,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "29034240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087173.179, "dur": 2.133, + "args": { + "External id": 3284567,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "29558528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087176.172, "dur": 0.439, + "args": { + "External id": 3284568,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "29558784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087177.517, "dur": 1.728, + "args": { + "External id": 3284569,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "31000576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087184.464, "dur": 0.148, + "args": { + "External id": 3284570,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "32442368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087185.276, "dur": 0.362, + "args": { + "External id": 3284571,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "33884160"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087188.639, "dur": 0.543, + "args": { + "External id": 3284572,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "33884416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595087229.692, "dur": 82.126, + "args": { + "External id": 3284573,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595087375.900, "dur": 268.900, + "args": { + "External id": 3284574,"Record function id": 0, "Concrete Inputs": ["", "", "42076416", "8", "3", "15", ""], "Input type": ["TensorList", "", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595087392.541, "dur": 5.280, + "args": { + "External id": 3284575,"Record function id": 0, "Concrete Inputs": ["[336611328]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595087403.967, "dur": 16.009, + "args": { + "External id": 3284576,"Record function id": 0, "Concrete Inputs": ["", "0", "126229248", "42076416"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[336611328], [], [], []], "Ev Idx": 5879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595087411.159, "dur": 8.417, + "args": { + "External id": 3284577,"Record function id": 0, "Concrete Inputs": ["", "0", "126229248", "168305664", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[336611328], [], [], [], []], "Ev Idx": 5880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087416.502, "dur": 0.816, + "args": { + "External id": 3284578,"Record function id": 0, "Concrete Inputs": ["", "[42076416]", "[1]", "126229248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[336611328], [], [], []], "Ev Idx": 5881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595087427.161, "dur": 120.758, + "args": { + "External id": 3284579,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[42076416], [], []], "Ev Idx": 5882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087429.027, "dur": 0.653, + "args": { + "External id": 3284580,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "126229248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087433.267, "dur": 0.647, + "args": { + "External id": 3284581,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "134421248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087434.756, "dur": 1.391, + "args": { + "External id": 3284582,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "134421504"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087437.146, "dur": 0.634, + "args": { + "External id": 3284583,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "134945792"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087440.913, "dur": 0.352, + "args": { + "External id": 3284584,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "135470080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087442.079, "dur": 0.159, + "args": { + "External id": 3284585,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "135994368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087443.205, "dur": 0.363, + "args": { + "External id": 3284586,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "136518656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087446.916, "dur": 0.458, + "args": { + "External id": 3284587,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "136518912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087448.499, "dur": 0.618, + "args": { + "External id": 3284588,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "137960704"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087452.247, "dur": 0.163, + "args": { + "External id": 3284589,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "139402496"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087455.328, "dur": 1.497, + "args": { + "External id": 3284590,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "140844288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087457.614, "dur": 0.162, + "args": { + "External id": 3284591,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "140844544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087460.570, "dur": 0.342, + "args": { + "External id": 3284592,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "141368832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087463.666, "dur": 0.171, + "args": { + "External id": 3284593,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "141893120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087464.836, "dur": 0.144, + "args": { + "External id": 3284594,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "142417408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087468.142, "dur": 2.348, + "args": { + "External id": 3284595,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "142941696"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087471.355, "dur": 0.169, + "args": { + "External id": 3284596,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "142941952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087475.056, "dur": 0.144, + "args": { + "External id": 3284597,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "144383744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087478.469, "dur": 1.173, + "args": { + "External id": 3284598,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "145825536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087480.435, "dur": 0.162, + "args": { + "External id": 3284599,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "147267328"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087483.744, "dur": 0.145, + "args": { + "External id": 3284600,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "147267584"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087486.551, "dur": 0.141, + "args": { + "External id": 3284601,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "147791872"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087487.495, "dur": 0.149, + "args": { + "External id": 3284602,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "148316160"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087490.640, "dur": 2.521, + "args": { + "External id": 3284603,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "148840448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087493.848, "dur": 0.176, + "args": { + "External id": 3284604,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "149364736"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087496.912, "dur": 0.146, + "args": { + "External id": 3284605,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "149364992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087500.514, "dur": 1.243, + "args": { + "External id": 3284606,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "150806784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087502.234, "dur": 0.173, + "args": { + "External id": 3284607,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "152248576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087505.485, "dur": 0.331, + "args": { + "External id": 3284608,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "153690368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087508.542, "dur": 0.173, + "args": { + "External id": 3284609,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "153690624"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087509.826, "dur": 0.260, + "args": { + "External id": 3284610,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "154214912"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087513.555, "dur": 2.256, + "args": { + "External id": 3284611,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "154739200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087516.728, "dur": 0.156, + "args": { + "External id": 3284612,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "155263488"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087519.833, "dur": 0.302, + "args": { + "External id": 3284613,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "155787776"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087523.364, "dur": 0.989, + "args": { + "External id": 3284614,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "155788032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087525.159, "dur": 0.207, + "args": { + "External id": 3284615,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "157229824"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087528.931, "dur": 0.337, + "args": { + "External id": 3284616,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "158671616"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087532.059, "dur": 0.280, + "args": { + "External id": 3284617,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "160113408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595087533.330, "dur": 0.159, + "args": { + "External id": 3284618,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "160113664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[42076416], [], [], []], "Ev Idx": 5921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595087569.219, "dur": 59.263, + "args": { + "External id": 3284619,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595087697.997, "dur": 404.175, + "args": { + "External id": 3284620,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[336611328], [42076416], [], [], []], "Ev Idx": 5923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595087734.507, "dur": 361.837, + "args": { + "External id": 3284621,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 336611328, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[42076416], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5924, "In msg nelems": 42076416 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595087744.405, "dur": 342.947, + "args": { + "External id": 3284622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[42076416]], "Ev Idx": 5925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595088130.454, "dur": 2.420, + "args": { + "External id": 3284623,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5926, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 1336756, "tid": 1336756, + "ts": 1587595088225.647, "dur": 10075.733, + "args": { + "External id": 3284624,"Record function id": 0, "Ev Idx": 5927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088403.522, "dur": 6.439, + "args": { + "External id": 3284625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[336611328], []], "Ev Idx": 5928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088413.613, "dur": 1.189, + "args": { + "External id": 3284626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 5929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088416.640, "dur": 1.331, + "args": { + "External id": 3284627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088419.935, "dur": 1.725, + "args": { + "External id": 3284628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088425.611, "dur": 0.942, + "args": { + "External id": 3284629,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088428.367, "dur": 0.730, + "args": { + "External id": 3284630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088430.842, "dur": 0.524, + "args": { + "External id": 3284631,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088435.675, "dur": 2.207, + "args": { + "External id": 3284632,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088441.379, "dur": 0.721, + "args": { + "External id": 3284633,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088443.856, "dur": 0.884, + "args": { + "External id": 3284634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088446.224, "dur": 0.614, + "args": { + "External id": 3284635,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088448.580, "dur": 2.414, + "args": { + "External id": 3284636,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088454.389, "dur": 0.675, + "args": { + "External id": 3284637,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088456.580, "dur": 0.840, + "args": { + "External id": 3284638,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088458.853, "dur": 0.573, + "args": { + "External id": 3284639,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088463.468, "dur": 1.982, + "args": { + "External id": 3284640,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088468.846, "dur": 0.596, + "args": { + "External id": 3284641,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088471.015, "dur": 0.812, + "args": { + "External id": 3284642,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088473.546, "dur": 1.346, + "args": { + "External id": 3284643,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088476.770, "dur": 1.527, + "args": { + "External id": 3284644,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088481.291, "dur": 1.035, + "args": { + "External id": 3284645,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088483.773, "dur": 1.161, + "args": { + "External id": 3284646,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088486.253, "dur": 0.463, + "args": { + "External id": 3284647,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088490.450, "dur": 1.822, + "args": { + "External id": 3284648,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088495.785, "dur": 0.774, + "args": { + "External id": 3284649,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088498.162, "dur": 0.805, + "args": { + "External id": 3284650,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088500.599, "dur": 0.898, + "args": { + "External id": 3284651,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088502.893, "dur": 2.012, + "args": { + "External id": 3284652,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088508.877, "dur": 0.633, + "args": { + "External id": 3284653,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088511.030, "dur": 0.756, + "args": { + "External id": 3284654,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088513.232, "dur": 0.465, + "args": { + "External id": 3284655,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088517.663, "dur": 2.218, + "args": { + "External id": 3284656,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088523.657, "dur": 0.620, + "args": { + "External id": 3284657,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088525.799, "dur": 0.848, + "args": { + "External id": 3284658,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088528.126, "dur": 0.581, + "args": { + "External id": 3284659,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088530.123, "dur": 1.631, + "args": { + "External id": 3284660,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088535.359, "dur": 0.529, + "args": { + "External id": 3284661,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088550.083, "dur": 0.892, + "args": { + "External id": 3284662,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088552.395, "dur": 0.934, + "args": { + "External id": 3284663,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595088557.095, "dur": 1.776, + "args": { + "External id": 3284664,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 5967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595088587.285, "dur": 9664.587, + "args": { + "External id": 3284665,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[42076416, 1], [], [], []], "Input Dims": [[8, 42076416], [], [], []], "Ev Idx": 5968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595088611.050, "dur": 9632.286, + "args": { + "External id": 3284666,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[42076416, 1], [], [], []], "Input Dims": [[8, 42076416], [], [], []], "Ev Idx": 5969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595088634.646, "dur": 5.298, + "args": { + "External id": 3284667,"Record function id": 0, "Concrete Inputs": ["[4290]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595088647.342, "dur": 9556.110, + "args": { + "External id": 3284668,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[4290], [], [], [], [], [], [], []], "Ev Idx": 5971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595088649.828, "dur": 9553.047, + "args": { + "External id": 3284669,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[4290], [], [], [], [], [], []], "Ev Idx": 5972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595088656.319, "dur": 5.462, + "args": { + "External id": 3284670,"Record function id": 0, "Concrete Inputs": ["[4290]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595088663.459, "dur": 9536.505, + "args": { + "External id": 3284671,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4290], [4290], []], "Ev Idx": 5974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595098555.201, "dur": 46.547, + "args": { + "External id": 3284672,"Record function id": 0, "Ev Idx": 5975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 1336756, "tid": 1336756, + "ts": 1587595098603.003, "dur": 211.245, + "args": { + "External id": 3284673,"Record function id": 0, "Ev Idx": 5976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595098645.042, "dur": 160.555, + "args": { + "External id": 3284674,"Sequence number": 32987104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[2048, 1], [4096, 1]], "Input Dims": [[32000, 2048], [16, 4096]], "Ev Idx": 5977 + } + }, + { + "ph": "s", "id": 228, "pid": 1336756, "tid": 1336756, "ts": 1587595098645.042, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595098720.016, "dur": 49.985, + "args": { + "External id": 3284675,"kernel_hash": "ccn7soditjd7aij6esf6mo5c3yt5tfcdquxcuegqoe6voa34krv7", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cn/ccn7soditjd7aij6esf6mo5c3yt5tfcdquxcuegqoe6voa34krv7.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096], [32000, 2048], [16, 4096, 2048], []], "Ev Idx": 5978 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595098871.493, "dur": 72.154, + "args": { + "External id": 3284676,"Record function id": 0, "Ev Idx": 5979 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 1336756, "tid": 1336756, + "ts": 1587595098954.421, "dur": 7548.897, + "args": { + "External id": 3284677,"Record function id": 0, "Ev Idx": 5980 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 1336756, "tid": 1336756, + "ts": 1587595098962.064, "dur": 724.869, + "args": { + "External id": 3284678,"Record function id": 0, "Ev Idx": 5981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595099063.425, "dur": 11.690, + "args": { + "External id": 3284679,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595099088.754, "dur": 43.683, + "args": { + "External id": 3284680,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099097.561, "dur": 2.270, + "args": { + "External id": 3284681,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099101.520, "dur": 0.423, + "args": { + "External id": 3284682,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099105.199, "dur": 2.501, + "args": { + "External id": 3284683,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099108.678, "dur": 0.543, + "args": { + "External id": 3284684,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099109.681, "dur": 0.399, + "args": { + "External id": 3284685,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099116.197, "dur": 0.372, + "args": { + "External id": 3284686,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099117.300, "dur": 0.195, + "args": { + "External id": 3284687,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099118.515, "dur": 1.689, + "args": { + "External id": 3284688,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099126.022, "dur": 0.177, + "args": { + "External id": 3284689,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595099142.761, "dur": 38.089, + "args": { + "External id": 3284690,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595099212.811, "dur": 110.349, + "args": { + "External id": 3284691,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595099225.895, "dur": 3.658, + "args": { + "External id": 3284692,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595099234.151, "dur": 9.148, + "args": { + "External id": 3284693,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595099238.450, "dur": 4.471, + "args": { + "External id": 3284694,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099241.289, "dur": 0.392, + "args": { + "External id": 3284695,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595099249.804, "dur": 33.209, + "args": { + "External id": 3284696,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099253.453, "dur": 0.496, + "args": { + "External id": 3284697,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099255.244, "dur": 0.507, + "args": { + "External id": 3284698,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099258.923, "dur": 0.502, + "args": { + "External id": 3284699,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099262.555, "dur": 0.461, + "args": { + "External id": 3284700,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099264.008, "dur": 3.191, + "args": { + "External id": 3284701,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099268.386, "dur": 0.525, + "args": { + "External id": 3284702,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099271.937, "dur": 0.154, + "args": { + "External id": 3284703,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099275.107, "dur": 0.212, + "args": { + "External id": 3284704,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099278.454, "dur": 0.476, + "args": { + "External id": 3284705,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595099293.949, "dur": 20.762, + "args": { + "External id": 3284706,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595099371.787, "dur": 236.784, + "args": { + "External id": 3284707,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595099402.547, "dur": 202.093, + "args": { + "External id": 3284708,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6011, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595099411.557, "dur": 187.828, + "args": { + "External id": 3284709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595099629.653, "dur": 2.006, + "args": { + "External id": 3284710,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6013, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 1336756, "tid": 1336756, + "ts": 1587595099705.764, "dur": 6607.428, + "args": { + "External id": 3284711,"Record function id": 0, "Ev Idx": 6014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595099790.578, "dur": 4.651, + "args": { + "External id": 3284712,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595099798.211, "dur": 1.199, + "args": { + "External id": 3284713,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595099801.071, "dur": 0.963, + "args": { + "External id": 3284714,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595099804.122, "dur": 2.111, + "args": { + "External id": 3284715,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595099807.343, "dur": 0.804, + "args": { + "External id": 3284716,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595099809.689, "dur": 0.822, + "args": { + "External id": 3284717,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595099816.730, "dur": 0.778, + "args": { + "External id": 3284718,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595099819.013, "dur": 1.994, + "args": { + "External id": 3284719,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595099822.592, "dur": 0.764, + "args": { + "External id": 3284720,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595099824.531, "dur": 0.740, + "args": { + "External id": 3284721,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595099844.590, "dur": 6429.345, + "args": { + "External id": 3284722,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595099860.210, "dur": 6406.859, + "args": { + "External id": 3284723,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595099909.305, "dur": 13.177, + "args": { + "External id": 3284724,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595099925.764, "dur": 6304.753, + "args": { + "External id": 3284725,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595099928.554, "dur": 6301.518, + "args": { + "External id": 3284726,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595099934.558, "dur": 7.115, + "args": { + "External id": 3284727,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595099943.563, "dur": 6283.768, + "args": { + "External id": 3284728,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595106449.563, "dur": 30.203, + "args": { + "External id": 3284729,"Sequence number": 32987105, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6032 + } + }, + { + "ph": "s", "id": 227, "pid": 1336756, "tid": 1336756, "ts": 1587595106449.563, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595106464.969, "dur": 10.257, + "args": { + "External id": 3284730,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595106470.755, "dur": 4.203, + "args": { + "External id": 3284731,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595106543.468, "dur": 89.349, + "args": { + "External id": 3284732,"Record function id": 0, "Ev Idx": 6035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595106634.607, "dur": 1154.542, + "args": { + "External id": 3284733,"Record function id": 0, "Ev Idx": 6036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595106677.125, "dur": 1099.346, + "args": { + "External id": 3284734,"Sequence number": 32987106, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6037 + } + }, + { + "ph": "s", "id": 226, "pid": 1336756, "tid": 1336756, "ts": 1587595106677.125, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595106744.639, "dur": 43.098, + "args": { + "External id": 3284735,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595106805.646, "dur": 127.979, + "args": { + "External id": 3284736,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595106948.425, "dur": 96.227, + "args": { + "External id": 3284737,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595107057.045, "dur": 36.576, + "args": { + "External id": 3284738,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595107125.079, "dur": 34.190, + "args": { + "External id": 3284739,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595107178.050, "dur": 16.117, + "args": { + "External id": 3284740,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595107214.250, "dur": 130.174, + "args": { + "External id": 3284741,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595107267.834, "dur": 9.960, + "args": { + "External id": 3284742,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595107272.665, "dur": 4.370, + "args": { + "External id": 3284743,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595107282.390, "dur": 3.512, + "args": { + "External id": 3284744,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595107287.276, "dur": 1.029, + "args": { + "External id": 3284745,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595107290.848, "dur": 2.480, + "args": { + "External id": 3284746,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595107355.367, "dur": 46.697, + "args": { + "External id": 3284747,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595107433.829, "dur": 28.964, + "args": { + "External id": 3284748,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595107470.680, "dur": 41.129, + "args": { + "External id": 3284749,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595107521.459, "dur": 35.612, + "args": { + "External id": 3284750,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595107580.257, "dur": 25.009, + "args": { + "External id": 3284751,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595107611.018, "dur": 34.750, + "args": { + "External id": 3284752,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595107672.766, "dur": 22.533, + "args": { + "External id": 3284753,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6056 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 1336756, "tid": 1336756, + "ts": 1587595107854.007, "dur": 112.429, + "args": { + "External id": 3284754,"Record function id": 0, "Ev Idx": 6057 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595108097.051, "dur": 48.034, + "args": { + "External id": 3284755,"Record function id": 0, "Ev Idx": 6058 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 1336756, "tid": 1336756, + "ts": 1587595108154.385, "dur": 20312.676, + "args": { + "External id": 3284756,"Record function id": 0, "Ev Idx": 6059 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 1336756, "tid": 1336756, + "ts": 1587595108164.954, "dur": 896.641, + "args": { + "External id": 3284757,"Record function id": 0, "Ev Idx": 6060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595108236.549, "dur": 9.223, + "args": { + "External id": 3284758,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595108261.651, "dur": 43.060, + "args": { + "External id": 3284759,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108266.740, "dur": 3.977, + "args": { + "External id": 3284760,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108272.236, "dur": 0.629, + "args": { + "External id": 3284761,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108276.383, "dur": 0.964, + "args": { + "External id": 3284762,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108280.493, "dur": 0.579, + "args": { + "External id": 3284763,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108282.237, "dur": 1.544, + "args": { + "External id": 3284764,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108286.615, "dur": 0.434, + "args": { + "External id": 3284765,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108290.872, "dur": 0.435, + "args": { + "External id": 3284766,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108294.355, "dur": 0.156, + "args": { + "External id": 3284767,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108295.367, "dur": 2.524, + "args": { + "External id": 3284768,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595108315.154, "dur": 44.198, + "args": { + "External id": 3284769,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595108391.753, "dur": 118.897, + "args": { + "External id": 3284770,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595108402.079, "dur": 3.866, + "args": { + "External id": 3284771,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595108410.941, "dur": 9.575, + "args": { + "External id": 3284772,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595108415.257, "dur": 4.839, + "args": { + "External id": 3284773,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108418.118, "dur": 0.702, + "args": { + "External id": 3284774,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595108426.885, "dur": 35.997, + "args": { + "External id": 3284775,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108431.321, "dur": 0.533, + "args": { + "External id": 3284776,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108432.985, "dur": 2.015, + "args": { + "External id": 3284777,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108436.245, "dur": 3.046, + "args": { + "External id": 3284778,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108442.453, "dur": 0.536, + "args": { + "External id": 3284779,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108444.100, "dur": 0.475, + "args": { + "External id": 3284780,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108450.422, "dur": 0.359, + "args": { + "External id": 3284781,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108451.733, "dur": 0.293, + "args": { + "External id": 3284782,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108452.862, "dur": 0.353, + "args": { + "External id": 3284783,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595108458.021, "dur": 0.347, + "args": { + "External id": 3284784,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595108475.355, "dur": 25.839, + "args": { + "External id": 3284785,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595108560.179, "dur": 362.005, + "args": { + "External id": 3284786,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595108594.566, "dur": 322.662, + "args": { + "External id": 3284787,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6090, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595108604.143, "dur": 268.779, + "args": { + "External id": 3284788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595108951.397, "dur": 2.995, + "args": { + "External id": 3284789,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6092, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 1336756, "tid": 1336756, + "ts": 1587595109083.693, "dur": 19201.914, + "args": { + "External id": 3284790,"Record function id": 0, "Ev Idx": 6093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595109177.896, "dur": 6.095, + "args": { + "External id": 3284791,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595109187.653, "dur": 1.139, + "args": { + "External id": 3284792,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595109190.780, "dur": 0.982, + "args": { + "External id": 3284793,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595109193.931, "dur": 1.115, + "args": { + "External id": 3284794,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595109196.723, "dur": 1.031, + "args": { + "External id": 3284795,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595109202.125, "dur": 1.146, + "args": { + "External id": 3284796,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595109204.827, "dur": 0.944, + "args": { + "External id": 3284797,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595109207.804, "dur": 3.393, + "args": { + "External id": 3284798,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595109212.870, "dur": 0.756, + "args": { + "External id": 3284799,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595109217.403, "dur": 0.852, + "args": { + "External id": 3284800,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595109238.590, "dur": 19008.476, + "args": { + "External id": 3284801,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595109253.581, "dur": 18986.206, + "args": { + "External id": 3284802,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595109275.574, "dur": 13.116, + "args": { + "External id": 3284803,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595109291.904, "dur": 18913.664, + "args": { + "External id": 3284804,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595109294.797, "dur": 18910.291, + "args": { + "External id": 3284805,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595109300.646, "dur": 4.807, + "args": { + "External id": 3284806,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595109307.453, "dur": 18894.696, + "args": { + "External id": 3284807,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595128412.725, "dur": 29.805, + "args": { + "External id": 3284808,"Sequence number": 32987107, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6111 + } + }, + { + "ph": "s", "id": 225, "pid": 1336756, "tid": 1336756, "ts": 1587595128412.725, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595128431.205, "dur": 6.958, + "args": { + "External id": 3284809,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595128433.960, "dur": 3.990, + "args": { + "External id": 3284810,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595128508.510, "dur": 81.340, + "args": { + "External id": 3284811,"Record function id": 0, "Ev Idx": 6114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595128591.409, "dur": 1090.677, + "args": { + "External id": 3284812,"Record function id": 0, "Ev Idx": 6115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595128630.193, "dur": 1039.515, + "args": { + "External id": 3284813,"Sequence number": 32987108, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6116 + } + }, + { + "ph": "s", "id": 224, "pid": 1336756, "tid": 1336756, "ts": 1587595128630.193, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595128710.011, "dur": 40.273, + "args": { + "External id": 3284814,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595128764.879, "dur": 100.841, + "args": { + "External id": 3284815,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595128890.155, "dur": 42.882, + "args": { + "External id": 3284816,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595128941.573, "dur": 30.785, + "args": { + "External id": 3284817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595129042.309, "dur": 31.907, + "args": { + "External id": 3284818,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595129092.374, "dur": 18.129, + "args": { + "External id": 3284819,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595129127.064, "dur": 132.949, + "args": { + "External id": 3284820,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595129177.047, "dur": 15.779, + "args": { + "External id": 3284821,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595129184.166, "dur": 7.800, + "args": { + "External id": 3284822,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595129195.252, "dur": 4.270, + "args": { + "External id": 3284823,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595129200.891, "dur": 3.480, + "args": { + "External id": 3284824,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595129206.671, "dur": 2.580, + "args": { + "External id": 3284825,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595129270.981, "dur": 50.466, + "args": { + "External id": 3284826,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595129351.802, "dur": 28.795, + "args": { + "External id": 3284827,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595129389.216, "dur": 42.869, + "args": { + "External id": 3284828,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595129437.391, "dur": 34.575, + "args": { + "External id": 3284829,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595129495.110, "dur": 26.140, + "args": { + "External id": 3284830,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595129526.447, "dur": 33.780, + "args": { + "External id": 3284831,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595129582.431, "dur": 21.262, + "args": { + "External id": 3284832,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6135 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 1336756, "tid": 1336756, + "ts": 1587595129742.554, "dur": 74.300, + "args": { + "External id": 3284833,"Record function id": 0, "Ev Idx": 6136 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595129910.038, "dur": 48.569, + "args": { + "External id": 3284834,"Record function id": 0, "Ev Idx": 6137 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 1336756, "tid": 1336756, + "ts": 1587595129967.898, "dur": 17785.675, + "args": { + "External id": 3284835,"Record function id": 0, "Ev Idx": 6138 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 1336756, "tid": 1336756, + "ts": 1587595129978.066, "dur": 838.861, + "args": { + "External id": 3284836,"Record function id": 0, "Ev Idx": 6139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595130092.132, "dur": 8.661, + "args": { + "External id": 3284837,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595130114.138, "dur": 42.999, + "args": { + "External id": 3284838,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130119.340, "dur": 2.056, + "args": { + "External id": 3284839,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130127.609, "dur": 0.242, + "args": { + "External id": 3284840,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130128.627, "dur": 0.601, + "args": { + "External id": 3284841,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130130.264, "dur": 0.574, + "args": { + "External id": 3284842,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130136.617, "dur": 0.634, + "args": { + "External id": 3284843,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130138.500, "dur": 0.669, + "args": { + "External id": 3284844,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130142.986, "dur": 3.943, + "args": { + "External id": 3284845,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130148.260, "dur": 0.567, + "args": { + "External id": 3284846,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130149.702, "dur": 0.147, + "args": { + "External id": 3284847,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595130170.247, "dur": 48.082, + "args": { + "External id": 3284848,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595130250.045, "dur": 113.582, + "args": { + "External id": 3284849,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595130262.668, "dur": 3.826, + "args": { + "External id": 3284850,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595130271.302, "dur": 9.335, + "args": { + "External id": 3284851,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595130275.715, "dur": 4.503, + "args": { + "External id": 3284852,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130278.562, "dur": 0.539, + "args": { + "External id": 3284853,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595130288.695, "dur": 34.194, + "args": { + "External id": 3284854,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130290.336, "dur": 2.697, + "args": { + "External id": 3284855,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130294.541, "dur": 0.650, + "args": { + "External id": 3284856,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130298.261, "dur": 0.357, + "args": { + "External id": 3284857,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130302.048, "dur": 1.134, + "args": { + "External id": 3284858,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130305.972, "dur": 0.366, + "args": { + "External id": 3284859,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130307.266, "dur": 0.146, + "args": { + "External id": 3284860,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130310.693, "dur": 0.481, + "args": { + "External id": 3284861,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130314.518, "dur": 0.294, + "args": { + "External id": 3284862,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595130315.459, "dur": 2.166, + "args": { + "External id": 3284863,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595130333.625, "dur": 22.184, + "args": { + "External id": 3284864,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595130415.171, "dur": 315.899, + "args": { + "External id": 3284865,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595130450.495, "dur": 275.976, + "args": { + "External id": 3284866,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6169, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595130463.306, "dur": 257.978, + "args": { + "External id": 3284867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595130756.460, "dur": 2.412, + "args": { + "External id": 3284868,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6171, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 1336756, "tid": 1336756, + "ts": 1587595130834.654, "dur": 16745.003, + "args": { + "External id": 3284869,"Record function id": 0, "Ev Idx": 6172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595130939.085, "dur": 6.160, + "args": { + "External id": 3284870,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595130948.741, "dur": 0.684, + "args": { + "External id": 3284871,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595130951.253, "dur": 2.447, + "args": { + "External id": 3284872,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595130955.343, "dur": 1.029, + "args": { + "External id": 3284873,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595130957.991, "dur": 0.971, + "args": { + "External id": 3284874,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595130960.346, "dur": 0.928, + "args": { + "External id": 3284875,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595130965.139, "dur": 0.986, + "args": { + "External id": 3284876,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595130967.772, "dur": 2.887, + "args": { + "External id": 3284877,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595130972.299, "dur": 0.852, + "args": { + "External id": 3284878,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595130974.580, "dur": 0.707, + "args": { + "External id": 3284879,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595131030.370, "dur": 16511.636, + "args": { + "External id": 3284880,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595131045.496, "dur": 16490.025, + "args": { + "External id": 3284881,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595131068.398, "dur": 13.915, + "args": { + "External id": 3284882,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595131085.394, "dur": 16415.086, + "args": { + "External id": 3284883,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595131087.727, "dur": 16412.266, + "args": { + "External id": 3284884,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595131095.900, "dur": 5.338, + "args": { + "External id": 3284885,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595131103.402, "dur": 16393.376, + "args": { + "External id": 3284886,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595147703.335, "dur": 28.073, + "args": { + "External id": 3284887,"Sequence number": 32987109, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6190 + } + }, + { + "ph": "s", "id": 223, "pid": 1336756, "tid": 1336756, "ts": 1587595147703.335, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595147720.204, "dur": 6.858, + "args": { + "External id": 3284888,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595147723.120, "dur": 3.689, + "args": { + "External id": 3284889,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595147791.070, "dur": 76.612, + "args": { + "External id": 3284890,"Record function id": 0, "Ev Idx": 6193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595147869.122, "dur": 1109.311, + "args": { + "External id": 3284891,"Record function id": 0, "Ev Idx": 6194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595147935.818, "dur": 1029.417, + "args": { + "External id": 3284892,"Sequence number": 32987110, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6195 + } + }, + { + "ph": "s", "id": 222, "pid": 1336756, "tid": 1336756, "ts": 1587595147935.818, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595148042.403, "dur": 45.720, + "args": { + "External id": 3284893,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595148102.989, "dur": 103.485, + "args": { + "External id": 3284894,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595148215.941, "dur": 44.445, + "args": { + "External id": 3284895,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595148270.135, "dur": 31.690, + "args": { + "External id": 3284896,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595148326.881, "dur": 26.106, + "args": { + "External id": 3284897,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595148371.727, "dur": 14.903, + "args": { + "External id": 3284898,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595148405.314, "dur": 126.049, + "args": { + "External id": 3284899,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595148452.171, "dur": 12.315, + "args": { + "External id": 3284900,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595148458.592, "dur": 4.940, + "args": { + "External id": 3284901,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595148467.127, "dur": 5.551, + "args": { + "External id": 3284902,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595148473.843, "dur": 0.765, + "args": { + "External id": 3284903,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595148477.267, "dur": 4.068, + "args": { + "External id": 3284904,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595148542.225, "dur": 45.983, + "args": { + "External id": 3284905,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595148617.605, "dur": 28.434, + "args": { + "External id": 3284906,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595148654.421, "dur": 40.167, + "args": { + "External id": 3284907,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595148702.169, "dur": 35.189, + "args": { + "External id": 3284908,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595148758.606, "dur": 25.357, + "args": { + "External id": 3284909,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595148789.989, "dur": 34.507, + "args": { + "External id": 3284910,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595148847.587, "dur": 18.379, + "args": { + "External id": 3284911,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6214 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 1336756, "tid": 1336756, + "ts": 1587595149078.880, "dur": 74.868, + "args": { + "External id": 3284912,"Record function id": 0, "Ev Idx": 6215 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595149227.413, "dur": 47.130, + "args": { + "External id": 3284913,"Record function id": 0, "Ev Idx": 6216 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 1336756, "tid": 1336756, + "ts": 1587595149283.692, "dur": 17998.161, + "args": { + "External id": 3284914,"Record function id": 0, "Ev Idx": 6217 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 1336756, "tid": 1336756, + "ts": 1587595149293.434, "dur": 897.318, + "args": { + "External id": 3284915,"Record function id": 0, "Ev Idx": 6218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595149370.517, "dur": 8.441, + "args": { + "External id": 3284916,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595149391.846, "dur": 43.939, + "args": { + "External id": 3284917,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149397.055, "dur": 2.589, + "args": { + "External id": 3284918,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149406.700, "dur": 0.221, + "args": { + "External id": 3284919,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149408.191, "dur": 0.598, + "args": { + "External id": 3284920,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149411.981, "dur": 0.552, + "args": { + "External id": 3284921,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149415.723, "dur": 0.374, + "args": { + "External id": 3284922,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149416.998, "dur": 0.399, + "args": { + "External id": 3284923,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149420.830, "dur": 3.720, + "args": { + "External id": 3284924,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149425.293, "dur": 0.563, + "args": { + "External id": 3284925,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149426.812, "dur": 0.860, + "args": { + "External id": 3284926,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595149448.412, "dur": 41.861, + "args": { + "External id": 3284927,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595149526.976, "dur": 134.765, + "args": { + "External id": 3284928,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595149540.231, "dur": 5.548, + "args": { + "External id": 3284929,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595149551.191, "dur": 12.316, + "args": { + "External id": 3284930,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595149555.507, "dur": 7.599, + "args": { + "External id": 3284931,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149561.306, "dur": 0.522, + "args": { + "External id": 3284932,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595149569.446, "dur": 33.959, + "args": { + "External id": 3284933,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149571.023, "dur": 0.794, + "args": { + "External id": 3284934,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149574.432, "dur": 2.520, + "args": { + "External id": 3284935,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149577.904, "dur": 0.387, + "args": { + "External id": 3284936,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149578.951, "dur": 1.920, + "args": { + "External id": 3284937,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149586.519, "dur": 0.333, + "args": { + "External id": 3284938,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149587.484, "dur": 0.720, + "args": { + "External id": 3284939,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149589.046, "dur": 0.538, + "args": { + "External id": 3284940,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149595.270, "dur": 0.681, + "args": { + "External id": 3284941,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595149596.899, "dur": 0.351, + "args": { + "External id": 3284942,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595149626.867, "dur": 26.506, + "args": { + "External id": 3284943,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595149713.155, "dur": 380.268, + "args": { + "External id": 3284944,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595149748.194, "dur": 339.652, + "args": { + "External id": 3284945,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6248, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595149761.201, "dur": 320.335, + "args": { + "External id": 3284946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595150119.465, "dur": 2.478, + "args": { + "External id": 3284947,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6250, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 1336756, "tid": 1336756, + "ts": 1587595150210.967, "dur": 16884.324, + "args": { + "External id": 3284948,"Record function id": 0, "Ev Idx": 6251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595150299.472, "dur": 6.127, + "args": { + "External id": 3284949,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595150308.843, "dur": 0.914, + "args": { + "External id": 3284950,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595150311.295, "dur": 1.981, + "args": { + "External id": 3284951,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595150315.094, "dur": 1.568, + "args": { + "External id": 3284952,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595150318.060, "dur": 1.182, + "args": { + "External id": 3284953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595150320.499, "dur": 1.107, + "args": { + "External id": 3284954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595150325.452, "dur": 1.127, + "args": { + "External id": 3284955,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595150328.139, "dur": 2.302, + "args": { + "External id": 3284956,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595150331.723, "dur": 0.850, + "args": { + "External id": 3284957,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595150336.047, "dur": 0.803, + "args": { + "External id": 3284958,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595150355.698, "dur": 16702.288, + "args": { + "External id": 3284959,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595150371.096, "dur": 16680.284, + "args": { + "External id": 3284960,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595150391.787, "dur": 13.644, + "args": { + "External id": 3284961,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595150408.415, "dur": 16609.956, + "args": { + "External id": 3284962,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595150411.368, "dur": 16606.548, + "args": { + "External id": 3284963,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595150419.364, "dur": 5.881, + "args": { + "External id": 3284964,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595150427.178, "dur": 16587.793, + "args": { + "External id": 3284965,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595167217.956, "dur": 40.377, + "args": { + "External id": 3284966,"Sequence number": 32987111, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6269 + } + }, + { + "ph": "s", "id": 221, "pid": 1336756, "tid": 1336756, "ts": 1587595167217.956, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595167234.791, "dur": 18.977, + "args": { + "External id": 3284967,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595167249.347, "dur": 4.181, + "args": { + "External id": 3284968,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595167319.138, "dur": 77.068, + "args": { + "External id": 3284969,"Record function id": 0, "Ev Idx": 6272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595167399.624, "dur": 1075.152, + "args": { + "External id": 3284970,"Record function id": 0, "Ev Idx": 6273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595167438.982, "dur": 1023.325, + "args": { + "External id": 3284971,"Sequence number": 32987112, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6274 + } + }, + { + "ph": "s", "id": 220, "pid": 1336756, "tid": 1336756, "ts": 1587595167438.982, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595167504.854, "dur": 41.205, + "args": { + "External id": 3284972,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595167557.700, "dur": 101.090, + "args": { + "External id": 3284973,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595167672.477, "dur": 37.547, + "args": { + "External id": 3284974,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595167715.888, "dur": 31.105, + "args": { + "External id": 3284975,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595167773.513, "dur": 24.078, + "args": { + "External id": 3284976,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595167814.068, "dur": 15.544, + "args": { + "External id": 3284977,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595167848.379, "dur": 193.602, + "args": { + "External id": 3284978,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595167918.890, "dur": 11.399, + "args": { + "External id": 3284979,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595167923.745, "dur": 5.584, + "args": { + "External id": 3284980,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595167932.602, "dur": 4.567, + "args": { + "External id": 3284981,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595167938.728, "dur": 0.983, + "args": { + "External id": 3284982,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595167944.664, "dur": 4.582, + "args": { + "External id": 3284983,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595168055.554, "dur": 53.642, + "args": { + "External id": 3284984,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595168143.767, "dur": 30.217, + "args": { + "External id": 3284985,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595168182.605, "dur": 42.792, + "args": { + "External id": 3284986,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595168232.504, "dur": 34.342, + "args": { + "External id": 3284987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595168287.341, "dur": 29.927, + "args": { + "External id": 3284988,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595168322.466, "dur": 33.774, + "args": { + "External id": 3284989,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595168375.699, "dur": 18.538, + "args": { + "External id": 3284990,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6293 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 1336756, "tid": 1336756, + "ts": 1587595168538.418, "dur": 72.521, + "args": { + "External id": 3284991,"Record function id": 0, "Ev Idx": 6294 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595168682.078, "dur": 46.522, + "args": { + "External id": 3284992,"Record function id": 0, "Ev Idx": 6295 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 1336756, "tid": 1336756, + "ts": 1587595168762.764, "dur": 17889.815, + "args": { + "External id": 3284993,"Record function id": 0, "Ev Idx": 6296 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 1336756, "tid": 1336756, + "ts": 1587595168775.624, "dur": 942.834, + "args": { + "External id": 3284994,"Record function id": 0, "Ev Idx": 6297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595168852.829, "dur": 7.075, + "args": { + "External id": 3284995,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595168872.633, "dur": 63.157, + "args": { + "External id": 3284996,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595168899.260, "dur": 2.422, + "args": { + "External id": 3284997,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595168906.119, "dur": 0.620, + "args": { + "External id": 3284998,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595168907.524, "dur": 0.369, + "args": { + "External id": 3284999,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595168911.729, "dur": 2.386, + "args": { + "External id": 3285000,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595168914.763, "dur": 0.407, + "args": { + "External id": 3285001,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595168918.289, "dur": 0.404, + "args": { + "External id": 3285002,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595168921.786, "dur": 2.116, + "args": { + "External id": 3285003,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595168924.446, "dur": 0.353, + "args": { + "External id": 3285004,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595168927.781, "dur": 0.187, + "args": { + "External id": 3285005,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595168948.917, "dur": 79.522, + "args": { + "External id": 3285006,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595169066.015, "dur": 122.282, + "args": { + "External id": 3285007,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595169080.304, "dur": 4.799, + "args": { + "External id": 3285008,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595169090.168, "dur": 12.505, + "args": { + "External id": 3285009,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595169094.211, "dur": 8.040, + "args": { + "External id": 3285010,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595169099.294, "dur": 1.424, + "args": { + "External id": 3285011,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595169109.312, "dur": 31.087, + "args": { + "External id": 3285012,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595169110.848, "dur": 0.243, + "args": { + "External id": 3285013,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595169114.612, "dur": 0.545, + "args": { + "External id": 3285014,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595169115.761, "dur": 0.588, + "args": { + "External id": 3285015,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595169121.552, "dur": 1.543, + "args": { + "External id": 3285016,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595169123.830, "dur": 0.373, + "args": { + "External id": 3285017,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595169124.645, "dur": 2.787, + "args": { + "External id": 3285018,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595169129.972, "dur": 0.289, + "args": { + "External id": 3285019,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595169130.737, "dur": 0.177, + "args": { + "External id": 3285020,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595169134.075, "dur": 0.194, + "args": { + "External id": 3285021,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595169154.695, "dur": 26.030, + "args": { + "External id": 3285022,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595169244.524, "dur": 383.933, + "args": { + "External id": 3285023,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595169279.557, "dur": 344.243, + "args": { + "External id": 3285024,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6327, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595169290.950, "dur": 327.225, + "args": { + "External id": 3285025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595169651.257, "dur": 2.520, + "args": { + "External id": 3285026,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6329, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 1336756, "tid": 1336756, + "ts": 1587595169737.311, "dur": 16727.464, + "args": { + "External id": 3285027,"Record function id": 0, "Ev Idx": 6330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595169825.337, "dur": 5.217, + "args": { + "External id": 3285028,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595169833.845, "dur": 1.139, + "args": { + "External id": 3285029,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595169836.623, "dur": 2.183, + "args": { + "External id": 3285030,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595169840.639, "dur": 0.875, + "args": { + "External id": 3285031,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595169842.871, "dur": 0.849, + "args": { + "External id": 3285032,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595169844.872, "dur": 1.185, + "args": { + "External id": 3285033,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595169849.610, "dur": 0.791, + "args": { + "External id": 3285034,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595169853.263, "dur": 2.598, + "args": { + "External id": 3285035,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595169857.202, "dur": 0.829, + "args": { + "External id": 3285036,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595169859.170, "dur": 0.733, + "args": { + "External id": 3285037,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595169900.393, "dur": 16521.760, + "args": { + "External id": 3285038,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595169918.262, "dur": 16496.494, + "args": { + "External id": 3285039,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595169939.100, "dur": 14.105, + "args": { + "External id": 3285040,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595169956.080, "dur": 16423.615, + "args": { + "External id": 3285041,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595169958.596, "dur": 16420.588, + "args": { + "External id": 3285042,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595169963.887, "dur": 6.365, + "args": { + "External id": 3285043,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595169971.895, "dur": 16403.968, + "args": { + "External id": 3285044,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595186599.169, "dur": 29.939, + "args": { + "External id": 3285045,"Sequence number": 32987113, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6348 + } + }, + { + "ph": "s", "id": 219, "pid": 1336756, "tid": 1336756, "ts": 1587595186599.169, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595186614.629, "dur": 10.095, + "args": { + "External id": 3285046,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595186620.197, "dur": 4.303, + "args": { + "External id": 3285047,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595186690.479, "dur": 74.989, + "args": { + "External id": 3285048,"Record function id": 0, "Ev Idx": 6351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595186767.120, "dur": 1062.627, + "args": { + "External id": 3285049,"Record function id": 0, "Ev Idx": 6352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595186807.962, "dur": 1009.579, + "args": { + "External id": 3285050,"Sequence number": 32987114, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6353 + } + }, + { + "ph": "s", "id": 218, "pid": 1336756, "tid": 1336756, "ts": 1587595186807.962, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595186871.061, "dur": 57.555, + "args": { + "External id": 3285051,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595186943.704, "dur": 127.232, + "args": { + "External id": 3285052,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595187084.240, "dur": 41.118, + "args": { + "External id": 3285053,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595187134.149, "dur": 31.379, + "args": { + "External id": 3285054,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595187194.114, "dur": 30.945, + "args": { + "External id": 3285055,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595187244.989, "dur": 15.703, + "args": { + "External id": 3285056,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595187278.984, "dur": 129.318, + "args": { + "External id": 3285057,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595187329.022, "dur": 9.675, + "args": { + "External id": 3285058,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595187333.427, "dur": 4.560, + "args": { + "External id": 3285059,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595187341.065, "dur": 4.837, + "args": { + "External id": 3285060,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595187349.786, "dur": 0.931, + "args": { + "External id": 3285061,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595187352.829, "dur": 5.851, + "args": { + "External id": 3285062,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595187418.603, "dur": 44.177, + "args": { + "External id": 3285063,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595187491.109, "dur": 27.880, + "args": { + "External id": 3285064,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595187526.359, "dur": 39.460, + "args": { + "External id": 3285065,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595187576.275, "dur": 35.574, + "args": { + "External id": 3285066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595187635.060, "dur": 26.516, + "args": { + "External id": 3285067,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595187667.540, "dur": 34.684, + "args": { + "External id": 3285068,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595187721.588, "dur": 19.885, + "args": { + "External id": 3285069,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6372 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 1336756, "tid": 1336756, + "ts": 1587595187910.148, "dur": 105.623, + "args": { + "External id": 3285070,"Record function id": 0, "Ev Idx": 6373 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595188095.630, "dur": 47.701, + "args": { + "External id": 3285071,"Record function id": 0, "Ev Idx": 6374 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 1336756, "tid": 1336756, + "ts": 1587595188152.936, "dur": 18112.638, + "args": { + "External id": 3285072,"Record function id": 0, "Ev Idx": 6375 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 1336756, "tid": 1336756, + "ts": 1587595188165.334, "dur": 948.291, + "args": { + "External id": 3285073,"Record function id": 0, "Ev Idx": 6376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595188242.066, "dur": 9.013, + "args": { + "External id": 3285074,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595188263.893, "dur": 42.012, + "args": { + "External id": 3285075,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188270.533, "dur": 2.278, + "args": { + "External id": 3285076,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188277.133, "dur": 0.432, + "args": { + "External id": 3285077,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188280.924, "dur": 0.239, + "args": { + "External id": 3285078,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188281.659, "dur": 0.276, + "args": { + "External id": 3285079,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188284.412, "dur": 0.739, + "args": { + "External id": 3285080,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188288.439, "dur": 0.619, + "args": { + "External id": 3285081,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188289.768, "dur": 4.095, + "args": { + "External id": 3285082,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188294.370, "dur": 0.556, + "args": { + "External id": 3285083,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188297.526, "dur": 0.382, + "args": { + "External id": 3285084,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595188316.312, "dur": 43.774, + "args": { + "External id": 3285085,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595188391.506, "dur": 115.347, + "args": { + "External id": 3285086,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595188405.986, "dur": 3.327, + "args": { + "External id": 3285087,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595188414.318, "dur": 8.837, + "args": { + "External id": 3285088,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595188418.473, "dur": 4.269, + "args": { + "External id": 3285089,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188421.178, "dur": 0.524, + "args": { + "External id": 3285090,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595188428.946, "dur": 33.309, + "args": { + "External id": 3285091,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188431.453, "dur": 2.509, + "args": { + "External id": 3285092,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188435.299, "dur": 0.234, + "args": { + "External id": 3285093,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188436.555, "dur": 0.449, + "args": { + "External id": 3285094,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188441.021, "dur": 1.531, + "args": { + "External id": 3285095,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188444.173, "dur": 0.157, + "args": { + "External id": 3285096,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188446.496, "dur": 0.432, + "args": { + "External id": 3285097,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188450.509, "dur": 0.181, + "args": { + "External id": 3285098,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188452.145, "dur": 0.311, + "args": { + "External id": 3285099,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595188454.183, "dur": 2.554, + "args": { + "External id": 3285100,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595188474.387, "dur": 24.691, + "args": { + "External id": 3285101,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595188558.347, "dur": 413.584, + "args": { + "External id": 3285102,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595188593.035, "dur": 373.500, + "args": { + "External id": 3285103,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6406, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595188604.328, "dur": 355.941, + "args": { + "External id": 3285104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595189035.610, "dur": 3.342, + "args": { + "External id": 3285105,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6408, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 1336756, "tid": 1336756, + "ts": 1587595189134.513, "dur": 16943.108, + "args": { + "External id": 3285106,"Record function id": 0, "Ev Idx": 6409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595189230.533, "dur": 6.597, + "args": { + "External id": 3285107,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595189240.835, "dur": 1.197, + "args": { + "External id": 3285108,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595189244.447, "dur": 1.887, + "args": { + "External id": 3285109,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595189248.249, "dur": 1.094, + "args": { + "External id": 3285110,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595189251.076, "dur": 0.984, + "args": { + "External id": 3285111,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595189253.540, "dur": 1.160, + "args": { + "External id": 3285112,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595189258.178, "dur": 0.969, + "args": { + "External id": 3285113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595189260.652, "dur": 3.032, + "args": { + "External id": 3285114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595189265.133, "dur": 0.725, + "args": { + "External id": 3285115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595189267.367, "dur": 0.718, + "args": { + "External id": 3285116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595189288.313, "dur": 16742.877, + "args": { + "External id": 3285117,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595189311.453, "dur": 16712.873, + "args": { + "External id": 3285118,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595189331.723, "dur": 12.820, + "args": { + "External id": 3285119,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595189347.439, "dur": 16618.806, + "args": { + "External id": 3285120,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595189349.859, "dur": 16615.879, + "args": { + "External id": 3285121,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595189356.494, "dur": 4.874, + "args": { + "External id": 3285122,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595189362.933, "dur": 16599.871, + "args": { + "External id": 3285123,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595206214.066, "dur": 27.767, + "args": { + "External id": 3285124,"Sequence number": 32987115, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6427 + } + }, + { + "ph": "s", "id": 217, "pid": 1336756, "tid": 1336756, "ts": 1587595206214.066, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595206230.057, "dur": 7.306, + "args": { + "External id": 3285125,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595206233.244, "dur": 3.830, + "args": { + "External id": 3285126,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595206306.696, "dur": 77.697, + "args": { + "External id": 3285127,"Record function id": 0, "Ev Idx": 6430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595206386.493, "dur": 1083.499, + "args": { + "External id": 3285128,"Record function id": 0, "Ev Idx": 6431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595206426.220, "dur": 1030.601, + "args": { + "External id": 3285129,"Sequence number": 32987116, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6432 + } + }, + { + "ph": "s", "id": 216, "pid": 1336756, "tid": 1336756, "ts": 1587595206426.220, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595206489.377, "dur": 39.629, + "args": { + "External id": 3285130,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595206540.764, "dur": 102.745, + "args": { + "External id": 3285131,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595206651.901, "dur": 39.037, + "args": { + "External id": 3285132,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595206700.001, "dur": 32.583, + "args": { + "External id": 3285133,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595206757.718, "dur": 27.960, + "args": { + "External id": 3285134,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595206802.989, "dur": 17.697, + "args": { + "External id": 3285135,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595206836.906, "dur": 192.092, + "args": { + "External id": 3285136,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595206904.927, "dur": 13.636, + "args": { + "External id": 3285137,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595206910.095, "dur": 7.525, + "args": { + "External id": 3285138,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595206921.439, "dur": 4.114, + "args": { + "External id": 3285139,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595206927.110, "dur": 1.142, + "args": { + "External id": 3285140,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595206936.734, "dur": 3.160, + "args": { + "External id": 3285141,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595207041.777, "dur": 55.298, + "args": { + "External id": 3285142,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595207131.801, "dur": 33.574, + "args": { + "External id": 3285143,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595207173.597, "dur": 42.210, + "args": { + "External id": 3285144,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595207224.437, "dur": 34.802, + "args": { + "External id": 3285145,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595207282.027, "dur": 27.511, + "args": { + "External id": 3285146,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595207315.271, "dur": 33.669, + "args": { + "External id": 3285147,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595207370.715, "dur": 18.590, + "args": { + "External id": 3285148,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6451 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 1336756, "tid": 1336756, + "ts": 1587595207535.046, "dur": 72.545, + "args": { + "External id": 3285149,"Record function id": 0, "Ev Idx": 6452 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595207679.386, "dur": 47.256, + "args": { + "External id": 3285150,"Record function id": 0, "Ev Idx": 6453 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 1336756, "tid": 1336756, + "ts": 1587595207735.527, "dur": 17993.121, + "args": { + "External id": 3285151,"Record function id": 0, "Ev Idx": 6454 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 1336756, "tid": 1336756, + "ts": 1587595207743.355, "dur": 936.358, + "args": { + "External id": 3285152,"Record function id": 0, "Ev Idx": 6455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595207816.930, "dur": 7.066, + "args": { + "External id": 3285153,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595207837.108, "dur": 59.770, + "args": { + "External id": 3285154,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595207842.283, "dur": 2.266, + "args": { + "External id": 3285155,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595207850.088, "dur": 0.551, + "args": { + "External id": 3285156,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595207852.268, "dur": 0.362, + "args": { + "External id": 3285157,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595207854.412, "dur": 0.382, + "args": { + "External id": 3285158,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595207858.851, "dur": 0.574, + "args": { + "External id": 3285159,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595207860.953, "dur": 0.554, + "args": { + "External id": 3285160,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595207863.065, "dur": 3.493, + "args": { + "External id": 3285161,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595207868.660, "dur": 0.336, + "args": { + "External id": 3285162,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595207870.454, "dur": 0.174, + "args": { + "External id": 3285163,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595207909.844, "dur": 40.389, + "args": { + "External id": 3285164,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595208022.163, "dur": 124.677, + "args": { + "External id": 3285165,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595208036.416, "dur": 5.874, + "args": { + "External id": 3285166,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595208047.622, "dur": 10.480, + "args": { + "External id": 3285167,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595208051.734, "dur": 5.975, + "args": { + "External id": 3285168,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595208055.502, "dur": 0.588, + "args": { + "External id": 3285169,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595208065.623, "dur": 34.756, + "args": { + "External id": 3285170,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595208067.816, "dur": 2.476, + "args": { + "External id": 3285171,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595208072.440, "dur": 0.289, + "args": { + "External id": 3285172,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595208074.169, "dur": 0.396, + "args": { + "External id": 3285173,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595208078.667, "dur": 1.171, + "args": { + "External id": 3285174,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595208081.218, "dur": 0.615, + "args": { + "External id": 3285175,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595208083.552, "dur": 0.478, + "args": { + "External id": 3285176,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595208087.476, "dur": 0.169, + "args": { + "External id": 3285177,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595208089.320, "dur": 0.196, + "args": { + "External id": 3285178,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595208091.171, "dur": 2.699, + "args": { + "External id": 3285179,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595208111.393, "dur": 26.515, + "args": { + "External id": 3285180,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595208202.687, "dur": 385.665, + "args": { + "External id": 3285181,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595208237.020, "dur": 346.424, + "args": { + "External id": 3285182,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6485, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595208249.148, "dur": 328.746, + "args": { + "External id": 3285183,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595208613.568, "dur": 2.381, + "args": { + "External id": 3285184,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6487, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 1336756, "tid": 1336756, + "ts": 1587595208698.653, "dur": 16849.036, + "args": { + "External id": 3285185,"Record function id": 0, "Ev Idx": 6488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595208787.732, "dur": 5.235, + "args": { + "External id": 3285186,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595208796.653, "dur": 0.947, + "args": { + "External id": 3285187,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595208799.533, "dur": 1.878, + "args": { + "External id": 3285188,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595208803.317, "dur": 1.058, + "args": { + "External id": 3285189,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595208805.810, "dur": 0.890, + "args": { + "External id": 3285190,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595208808.430, "dur": 0.927, + "args": { + "External id": 3285191,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595208812.941, "dur": 0.926, + "args": { + "External id": 3285192,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595208815.610, "dur": 2.714, + "args": { + "External id": 3285193,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595208819.845, "dur": 1.102, + "args": { + "External id": 3285194,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595208822.644, "dur": 0.645, + "args": { + "External id": 3285195,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595208842.389, "dur": 16661.269, + "args": { + "External id": 3285196,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595208857.672, "dur": 16639.495, + "args": { + "External id": 3285197,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595208897.673, "dur": 13.539, + "args": { + "External id": 3285198,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595208914.404, "dur": 16549.223, + "args": { + "External id": 3285199,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595208917.092, "dur": 16546.063, + "args": { + "External id": 3285200,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595208923.036, "dur": 5.604, + "args": { + "External id": 3285201,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595208930.528, "dur": 16529.749, + "args": { + "External id": 3285202,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595225676.770, "dur": 27.770, + "args": { + "External id": 3285203,"Sequence number": 32987117, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6506 + } + }, + { + "ph": "s", "id": 215, "pid": 1336756, "tid": 1336756, "ts": 1587595225676.770, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595225692.664, "dur": 7.235, + "args": { + "External id": 3285204,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595225695.675, "dur": 3.971, + "args": { + "External id": 3285205,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595225768.109, "dur": 77.116, + "args": { + "External id": 3285206,"Record function id": 0, "Ev Idx": 6509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595225847.133, "dur": 1070.762, + "args": { + "External id": 3285207,"Record function id": 0, "Ev Idx": 6510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595225903.353, "dur": 1000.753, + "args": { + "External id": 3285208,"Sequence number": 32987118, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6511 + } + }, + { + "ph": "s", "id": 214, "pid": 1336756, "tid": 1336756, "ts": 1587595225903.353, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595225969.989, "dur": 72.653, + "args": { + "External id": 3285209,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595226057.419, "dur": 100.314, + "args": { + "External id": 3285210,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595226166.498, "dur": 38.671, + "args": { + "External id": 3285211,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595226214.914, "dur": 31.827, + "args": { + "External id": 3285212,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595226272.425, "dur": 27.931, + "args": { + "External id": 3285213,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595226317.535, "dur": 15.521, + "args": { + "External id": 3285214,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595226351.073, "dur": 126.795, + "args": { + "External id": 3285215,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595226399.254, "dur": 10.690, + "args": { + "External id": 3285216,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595226404.186, "dur": 5.008, + "args": { + "External id": 3285217,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595226412.828, "dur": 6.761, + "args": { + "External id": 3285218,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595226421.009, "dur": 1.135, + "args": { + "External id": 3285219,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595226424.415, "dur": 3.659, + "args": { + "External id": 3285220,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595226487.770, "dur": 45.814, + "args": { + "External id": 3285221,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595226564.425, "dur": 32.203, + "args": { + "External id": 3285222,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595226604.669, "dur": 40.315, + "args": { + "External id": 3285223,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595226652.250, "dur": 35.068, + "args": { + "External id": 3285224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595226711.377, "dur": 25.458, + "args": { + "External id": 3285225,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595226742.000, "dur": 34.677, + "args": { + "External id": 3285226,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595226796.332, "dur": 19.016, + "args": { + "External id": 3285227,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6530 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 1336756, "tid": 1336756, + "ts": 1587595227033.639, "dur": 78.121, + "args": { + "External id": 3285228,"Record function id": 0, "Ev Idx": 6531 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595227188.157, "dur": 49.779, + "args": { + "External id": 3285229,"Record function id": 0, "Ev Idx": 6532 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 1336756, "tid": 1336756, + "ts": 1587595227247.082, "dur": 18292.936, + "args": { + "External id": 3285230,"Record function id": 0, "Ev Idx": 6533 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 1336756, "tid": 1336756, + "ts": 1587595227255.277, "dur": 951.860, + "args": { + "External id": 3285231,"Record function id": 0, "Ev Idx": 6534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595227332.907, "dur": 8.856, + "args": { + "External id": 3285232,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595227354.935, "dur": 41.949, + "args": { + "External id": 3285233,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227360.286, "dur": 2.604, + "args": { + "External id": 3285234,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227367.915, "dur": 0.614, + "args": { + "External id": 3285235,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227370.287, "dur": 0.306, + "args": { + "External id": 3285236,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227372.335, "dur": 0.354, + "args": { + "External id": 3285237,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227376.468, "dur": 0.435, + "args": { + "External id": 3285238,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227378.708, "dur": 0.331, + "args": { + "External id": 3285239,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227380.674, "dur": 3.761, + "args": { + "External id": 3285240,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227386.068, "dur": 0.353, + "args": { + "External id": 3285241,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227388.753, "dur": 0.374, + "args": { + "External id": 3285242,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595227407.023, "dur": 41.885, + "args": { + "External id": 3285243,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595227480.472, "dur": 116.569, + "args": { + "External id": 3285244,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595227492.781, "dur": 3.398, + "args": { + "External id": 3285245,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595227501.076, "dur": 10.580, + "args": { + "External id": 3285246,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595227505.423, "dur": 5.820, + "args": { + "External id": 3285247,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227509.542, "dur": 0.462, + "args": { + "External id": 3285248,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595227517.961, "dur": 35.415, + "args": { + "External id": 3285249,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227520.417, "dur": 2.573, + "args": { + "External id": 3285250,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227525.254, "dur": 0.238, + "args": { + "External id": 3285251,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227527.509, "dur": 0.716, + "args": { + "External id": 3285252,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227532.707, "dur": 1.431, + "args": { + "External id": 3285253,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227535.590, "dur": 0.339, + "args": { + "External id": 3285254,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227537.400, "dur": 0.303, + "args": { + "External id": 3285255,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227541.619, "dur": 0.266, + "args": { + "External id": 3285256,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227543.422, "dur": 0.470, + "args": { + "External id": 3285257,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595227545.293, "dur": 2.524, + "args": { + "External id": 3285258,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595227565.141, "dur": 24.311, + "args": { + "External id": 3285259,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595227647.805, "dur": 457.247, + "args": { + "External id": 3285260,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595227679.692, "dur": 419.862, + "args": { + "External id": 3285261,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6564, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595227690.766, "dur": 400.563, + "args": { + "External id": 3285262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595228134.620, "dur": 2.465, + "args": { + "External id": 3285263,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6566, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 1336756, "tid": 1336756, + "ts": 1587595228228.095, "dur": 17127.083, + "args": { + "External id": 3285264,"Record function id": 0, "Ev Idx": 6567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595228323.272, "dur": 6.274, + "args": { + "External id": 3285265,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595228333.307, "dur": 0.992, + "args": { + "External id": 3285266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595228336.149, "dur": 2.783, + "args": { + "External id": 3285267,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595228348.775, "dur": 0.812, + "args": { + "External id": 3285268,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595228351.185, "dur": 0.671, + "args": { + "External id": 3285269,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595228353.517, "dur": 0.872, + "args": { + "External id": 3285270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595228358.030, "dur": 0.770, + "args": { + "External id": 3285271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595228360.029, "dur": 1.987, + "args": { + "External id": 3285272,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595228363.595, "dur": 0.881, + "args": { + "External id": 3285273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595228366.270, "dur": 0.621, + "args": { + "External id": 3285274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595228390.696, "dur": 16917.853, + "args": { + "External id": 3285275,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595228406.678, "dur": 16895.059, + "args": { + "External id": 3285276,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595228425.645, "dur": 14.778, + "args": { + "External id": 3285277,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595228443.532, "dur": 16825.284, + "args": { + "External id": 3285278,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595228445.830, "dur": 16822.382, + "args": { + "External id": 3285279,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595228452.378, "dur": 4.663, + "args": { + "External id": 3285280,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595228459.030, "dur": 16806.159, + "args": { + "External id": 3285281,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595245485.752, "dur": 29.376, + "args": { + "External id": 3285282,"Sequence number": 32987119, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6585 + } + }, + { + "ph": "s", "id": 213, "pid": 1336756, "tid": 1336756, "ts": 1587595245485.752, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595245502.926, "dur": 7.596, + "args": { + "External id": 3285283,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595245505.914, "dur": 4.380, + "args": { + "External id": 3285284,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595245578.741, "dur": 75.589, + "args": { + "External id": 3285285,"Record function id": 0, "Ev Idx": 6588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595245656.267, "dur": 1068.254, + "args": { + "External id": 3285286,"Record function id": 0, "Ev Idx": 6589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595245697.802, "dur": 1014.231, + "args": { + "External id": 3285287,"Sequence number": 32987120, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6590 + } + }, + { + "ph": "s", "id": 212, "pid": 1336756, "tid": 1336756, "ts": 1587595245697.802, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595245764.387, "dur": 40.571, + "args": { + "External id": 3285288,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595245816.430, "dur": 120.994, + "args": { + "External id": 3285289,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595245950.576, "dur": 72.977, + "args": { + "External id": 3285290,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595246037.234, "dur": 35.469, + "args": { + "External id": 3285291,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595246100.437, "dur": 26.369, + "args": { + "External id": 3285292,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595246146.851, "dur": 16.510, + "args": { + "External id": 3285293,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595246183.493, "dur": 127.267, + "args": { + "External id": 3285294,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595246232.575, "dur": 10.625, + "args": { + "External id": 3285295,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595246237.569, "dur": 4.930, + "args": { + "External id": 3285296,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595246246.004, "dur": 6.949, + "args": { + "External id": 3285297,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595246254.466, "dur": 0.979, + "args": { + "External id": 3285298,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595246257.813, "dur": 3.341, + "args": { + "External id": 3285299,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595246321.932, "dur": 44.247, + "args": { + "External id": 3285300,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595246392.843, "dur": 26.526, + "args": { + "External id": 3285301,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595246427.464, "dur": 40.413, + "args": { + "External id": 3285302,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595246475.654, "dur": 34.398, + "args": { + "External id": 3285303,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595246539.315, "dur": 28.906, + "args": { + "External id": 3285304,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595246576.819, "dur": 34.419, + "args": { + "External id": 3285305,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595246628.351, "dur": 17.321, + "args": { + "External id": 3285306,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6609 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 1336756, "tid": 1336756, + "ts": 1587595246787.571, "dur": 72.397, + "args": { + "External id": 3285307,"Record function id": 0, "Ev Idx": 6610 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595246954.794, "dur": 85.376, + "args": { + "External id": 3285308,"Record function id": 0, "Ev Idx": 6611 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 1336756, "tid": 1336756, + "ts": 1587595247051.312, "dur": 18375.947, + "args": { + "External id": 3285309,"Record function id": 0, "Ev Idx": 6612 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 1336756, "tid": 1336756, + "ts": 1587595247060.227, "dur": 881.313, + "args": { + "External id": 3285310,"Record function id": 0, "Ev Idx": 6613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595247137.153, "dur": 8.345, + "args": { + "External id": 3285311,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595247158.506, "dur": 39.955, + "args": { + "External id": 3285312,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247163.839, "dur": 2.265, + "args": { + "External id": 3285313,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247171.233, "dur": 0.480, + "args": { + "External id": 3285314,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247173.311, "dur": 0.382, + "args": { + "External id": 3285315,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247175.291, "dur": 0.475, + "args": { + "External id": 3285316,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247179.664, "dur": 0.531, + "args": { + "External id": 3285317,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247182.052, "dur": 0.583, + "args": { + "External id": 3285318,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247184.094, "dur": 4.227, + "args": { + "External id": 3285319,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247189.361, "dur": 0.499, + "args": { + "External id": 3285320,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247191.045, "dur": 0.306, + "args": { + "External id": 3285321,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595247209.376, "dur": 41.767, + "args": { + "External id": 3285322,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595247283.665, "dur": 114.381, + "args": { + "External id": 3285323,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595247296.396, "dur": 3.947, + "args": { + "External id": 3285324,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595247305.407, "dur": 10.289, + "args": { + "External id": 3285325,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595247309.584, "dur": 5.688, + "args": { + "External id": 3285326,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247313.357, "dur": 0.572, + "args": { + "External id": 3285327,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595247322.011, "dur": 33.314, + "args": { + "External id": 3285328,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247324.055, "dur": 2.772, + "args": { + "External id": 3285329,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247328.461, "dur": 0.230, + "args": { + "External id": 3285330,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247330.303, "dur": 0.356, + "args": { + "External id": 3285331,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247334.449, "dur": 1.752, + "args": { + "External id": 3285332,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247337.649, "dur": 0.518, + "args": { + "External id": 3285333,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247340.019, "dur": 0.147, + "args": { + "External id": 3285334,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247344.333, "dur": 0.340, + "args": { + "External id": 3285335,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247345.891, "dur": 0.184, + "args": { + "External id": 3285336,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595247347.325, "dur": 2.337, + "args": { + "External id": 3285337,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595247366.182, "dur": 24.103, + "args": { + "External id": 3285338,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595247450.058, "dur": 379.150, + "args": { + "External id": 3285339,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595247482.228, "dur": 341.973, + "args": { + "External id": 3285340,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6643, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595247493.267, "dur": 325.487, + "args": { + "External id": 3285341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595247853.248, "dur": 2.237, + "args": { + "External id": 3285342,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6645, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 1336756, "tid": 1336756, + "ts": 1587595247963.162, "dur": 17284.738, + "args": { + "External id": 3285343,"Record function id": 0, "Ev Idx": 6646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595248095.909, "dur": 6.424, + "args": { + "External id": 3285344,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595248106.790, "dur": 0.990, + "args": { + "External id": 3285345,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595248109.792, "dur": 2.588, + "args": { + "External id": 3285346,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595248114.210, "dur": 1.117, + "args": { + "External id": 3285347,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595248116.787, "dur": 1.169, + "args": { + "External id": 3285348,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595248119.553, "dur": 0.868, + "args": { + "External id": 3285349,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595248123.929, "dur": 1.268, + "args": { + "External id": 3285350,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595248126.661, "dur": 1.781, + "args": { + "External id": 3285351,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595248129.770, "dur": 0.797, + "args": { + "External id": 3285352,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595248131.917, "dur": 0.721, + "args": { + "External id": 3285353,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595248153.877, "dur": 17049.747, + "args": { + "External id": 3285354,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595248170.089, "dur": 17027.221, + "args": { + "External id": 3285355,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595248192.956, "dur": 13.148, + "args": { + "External id": 3285356,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595248209.231, "dur": 16953.147, + "args": { + "External id": 3285357,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595248211.707, "dur": 16950.227, + "args": { + "External id": 3285358,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595248218.069, "dur": 4.472, + "args": { + "External id": 3285359,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595248224.117, "dur": 16935.094, + "args": { + "External id": 3285360,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595265375.228, "dur": 27.674, + "args": { + "External id": 3285361,"Sequence number": 32987121, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6664 + } + }, + { + "ph": "s", "id": 211, "pid": 1336756, "tid": 1336756, "ts": 1587595265375.228, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595265391.230, "dur": 7.163, + "args": { + "External id": 3285362,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595265394.288, "dur": 3.864, + "args": { + "External id": 3285363,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595265466.741, "dur": 73.036, + "args": { + "External id": 3285364,"Record function id": 0, "Ev Idx": 6667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595265541.318, "dur": 1087.487, + "args": { + "External id": 3285365,"Record function id": 0, "Ev Idx": 6668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595265581.000, "dur": 1034.968, + "args": { + "External id": 3285366,"Sequence number": 32987122, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6669 + } + }, + { + "ph": "s", "id": 210, "pid": 1336756, "tid": 1336756, "ts": 1587595265581.000, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595265648.733, "dur": 40.461, + "args": { + "External id": 3285367,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595265705.435, "dur": 106.002, + "args": { + "External id": 3285368,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595265821.229, "dur": 39.739, + "args": { + "External id": 3285369,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595265867.267, "dur": 52.014, + "args": { + "External id": 3285370,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595265954.662, "dur": 64.961, + "args": { + "External id": 3285371,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595266039.955, "dur": 20.084, + "args": { + "External id": 3285372,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595266076.734, "dur": 133.224, + "args": { + "External id": 3285373,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595266127.134, "dur": 11.988, + "args": { + "External id": 3285374,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595266131.744, "dur": 6.325, + "args": { + "External id": 3285375,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595266141.758, "dur": 4.974, + "args": { + "External id": 3285376,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595266148.138, "dur": 3.102, + "args": { + "External id": 3285377,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595266153.706, "dur": 4.637, + "args": { + "External id": 3285378,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595266221.315, "dur": 51.223, + "args": { + "External id": 3285379,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595266302.668, "dur": 28.572, + "args": { + "External id": 3285380,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595266341.198, "dur": 40.362, + "args": { + "External id": 3285381,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595266389.565, "dur": 34.721, + "args": { + "External id": 3285382,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595266448.793, "dur": 26.288, + "args": { + "External id": 3285383,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595266480.398, "dur": 35.013, + "args": { + "External id": 3285384,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595266533.004, "dur": 21.537, + "args": { + "External id": 3285385,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6688 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 1336756, "tid": 1336756, + "ts": 1587595266691.864, "dur": 72.205, + "args": { + "External id": 3285386,"Record function id": 0, "Ev Idx": 6689 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595266835.421, "dur": 66.180, + "args": { + "External id": 3285387,"Record function id": 0, "Ev Idx": 6690 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 1336756, "tid": 1336756, + "ts": 1587595266913.109, "dur": 18289.970, + "args": { + "External id": 3285388,"Record function id": 0, "Ev Idx": 6691 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 1336756, "tid": 1336756, + "ts": 1587595266921.976, "dur": 1018.763, + "args": { + "External id": 3285389,"Record function id": 0, "Ev Idx": 6692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595267039.851, "dur": 9.066, + "args": { + "External id": 3285390,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595267062.927, "dur": 34.187, + "args": { + "External id": 3285391,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267068.151, "dur": 2.271, + "args": { + "External id": 3285392,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267074.821, "dur": 0.616, + "args": { + "External id": 3285393,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267076.873, "dur": 0.549, + "args": { + "External id": 3285394,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267078.798, "dur": 0.376, + "args": { + "External id": 3285395,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267082.314, "dur": 0.259, + "args": { + "External id": 3285396,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267084.017, "dur": 0.243, + "args": { + "External id": 3285397,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267085.085, "dur": 2.531, + "args": { + "External id": 3285398,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267088.860, "dur": 0.195, + "args": { + "External id": 3285399,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267090.223, "dur": 0.143, + "args": { + "External id": 3285400,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595267107.754, "dur": 42.160, + "args": { + "External id": 3285401,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595267184.048, "dur": 123.498, + "args": { + "External id": 3285402,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595267197.334, "dur": 3.528, + "args": { + "External id": 3285403,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595267205.583, "dur": 10.185, + "args": { + "External id": 3285404,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595267209.952, "dur": 5.442, + "args": { + "External id": 3285405,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267213.724, "dur": 0.561, + "args": { + "External id": 3285406,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595267222.119, "dur": 33.732, + "args": { + "External id": 3285407,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267223.646, "dur": 2.581, + "args": { + "External id": 3285408,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267227.572, "dur": 0.361, + "args": { + "External id": 3285409,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267229.209, "dur": 0.317, + "args": { + "External id": 3285410,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267233.780, "dur": 1.472, + "args": { + "External id": 3285411,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267240.535, "dur": 0.225, + "args": { + "External id": 3285412,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267241.753, "dur": 0.366, + "args": { + "External id": 3285413,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267245.611, "dur": 0.158, + "args": { + "External id": 3285414,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267246.591, "dur": 0.157, + "args": { + "External id": 3285415,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595267247.965, "dur": 2.718, + "args": { + "External id": 3285416,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595267275.570, "dur": 23.581, + "args": { + "External id": 3285417,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595267362.602, "dur": 456.427, + "args": { + "External id": 3285418,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595267398.254, "dur": 415.152, + "args": { + "External id": 3285419,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6722, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595267409.812, "dur": 398.190, + "args": { + "External id": 3285420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595267845.023, "dur": 2.417, + "args": { + "External id": 3285421,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6724, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 1336756, "tid": 1336756, + "ts": 1587595267962.921, "dur": 17056.686, + "args": { + "External id": 3285422,"Record function id": 0, "Ev Idx": 6725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595268110.667, "dur": 6.721, + "args": { + "External id": 3285423,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595268121.556, "dur": 1.200, + "args": { + "External id": 3285424,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595268124.675, "dur": 2.247, + "args": { + "External id": 3285425,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595268128.991, "dur": 1.010, + "args": { + "External id": 3285426,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595268131.424, "dur": 0.837, + "args": { + "External id": 3285427,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595268133.401, "dur": 0.846, + "args": { + "External id": 3285428,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595268138.548, "dur": 0.707, + "args": { + "External id": 3285429,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595268140.827, "dur": 2.507, + "args": { + "External id": 3285430,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595268144.608, "dur": 0.999, + "args": { + "External id": 3285431,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595268146.962, "dur": 0.896, + "args": { + "External id": 3285432,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595268168.851, "dur": 16783.806, + "args": { + "External id": 3285433,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595268184.948, "dur": 16761.124, + "args": { + "External id": 3285434,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595268206.631, "dur": 14.036, + "args": { + "External id": 3285435,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595268223.585, "dur": 16689.806, + "args": { + "External id": 3285436,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595268226.441, "dur": 16686.470, + "args": { + "External id": 3285437,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595268232.866, "dur": 6.298, + "args": { + "External id": 3285438,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595268241.071, "dur": 16668.745, + "args": { + "External id": 3285439,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595285151.111, "dur": 28.368, + "args": { + "External id": 3285440,"Sequence number": 32987123, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6743 + } + }, + { + "ph": "s", "id": 209, "pid": 1336756, "tid": 1336756, "ts": 1587595285151.111, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595285166.922, "dur": 8.055, + "args": { + "External id": 3285441,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595285170.526, "dur": 4.184, + "args": { + "External id": 3285442,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595285240.436, "dur": 76.133, + "args": { + "External id": 3285443,"Record function id": 0, "Ev Idx": 6746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595285318.130, "dur": 1065.942, + "args": { + "External id": 3285444,"Record function id": 0, "Ev Idx": 6747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595285357.746, "dur": 1013.457, + "args": { + "External id": 3285445,"Sequence number": 32987124, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6748 + } + }, + { + "ph": "s", "id": 208, "pid": 1336756, "tid": 1336756, "ts": 1587595285357.746, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595285422.162, "dur": 40.931, + "args": { + "External id": 3285446,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595285475.037, "dur": 103.802, + "args": { + "External id": 3285447,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595285587.086, "dur": 38.747, + "args": { + "External id": 3285448,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595285633.942, "dur": 30.138, + "args": { + "External id": 3285449,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595285690.559, "dur": 26.592, + "args": { + "External id": 3285450,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595285733.946, "dur": 15.902, + "args": { + "External id": 3285451,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595285767.130, "dur": 144.489, + "args": { + "External id": 3285452,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595285812.731, "dur": 10.228, + "args": { + "External id": 3285453,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595285817.395, "dur": 4.668, + "args": { + "External id": 3285454,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595285825.663, "dur": 5.831, + "args": { + "External id": 3285455,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595285833.052, "dur": 1.078, + "args": { + "External id": 3285456,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595285836.523, "dur": 5.204, + "args": { + "External id": 3285457,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595285923.974, "dur": 49.995, + "args": { + "External id": 3285458,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595286042.796, "dur": 30.782, + "args": { + "External id": 3285459,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595286083.657, "dur": 46.490, + "args": { + "External id": 3285460,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595286138.407, "dur": 34.854, + "args": { + "External id": 3285461,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595286196.485, "dur": 29.052, + "args": { + "External id": 3285462,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595286231.196, "dur": 33.397, + "args": { + "External id": 3285463,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595286284.363, "dur": 17.826, + "args": { + "External id": 3285464,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6767 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 1336756, "tid": 1336756, + "ts": 1587595286447.169, "dur": 73.662, + "args": { + "External id": 3285465,"Record function id": 0, "Ev Idx": 6768 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595286591.773, "dur": 48.039, + "args": { + "External id": 3285466,"Record function id": 0, "Ev Idx": 6769 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 1336756, "tid": 1336756, + "ts": 1587595286649.413, "dur": 18492.259, + "args": { + "External id": 3285467,"Record function id": 0, "Ev Idx": 6770 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 1336756, "tid": 1336756, + "ts": 1587595286658.687, "dur": 890.172, + "args": { + "External id": 3285468,"Record function id": 0, "Ev Idx": 6771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595286735.887, "dur": 6.768, + "args": { + "External id": 3285469,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595286755.345, "dur": 39.879, + "args": { + "External id": 3285470,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286762.272, "dur": 2.477, + "args": { + "External id": 3285471,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286768.818, "dur": 0.676, + "args": { + "External id": 3285472,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286770.328, "dur": 0.270, + "args": { + "External id": 3285473,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286771.632, "dur": 2.426, + "args": { + "External id": 3285474,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286775.757, "dur": 0.365, + "args": { + "External id": 3285475,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286777.570, "dur": 0.395, + "args": { + "External id": 3285476,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286782.154, "dur": 2.080, + "args": { + "External id": 3285477,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286786.001, "dur": 0.156, + "args": { + "External id": 3285478,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286787.990, "dur": 0.316, + "args": { + "External id": 3285479,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595286807.013, "dur": 36.432, + "args": { + "External id": 3285480,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595286893.386, "dur": 162.858, + "args": { + "External id": 3285481,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595286908.307, "dur": 5.158, + "args": { + "External id": 3285482,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595286919.019, "dur": 13.472, + "args": { + "External id": 3285483,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595286923.245, "dur": 8.799, + "args": { + "External id": 3285484,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286927.414, "dur": 3.002, + "args": { + "External id": 3285485,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595286939.740, "dur": 32.268, + "args": { + "External id": 3285486,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286942.029, "dur": 0.711, + "args": { + "External id": 3285487,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286944.636, "dur": 0.422, + "args": { + "External id": 3285488,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286946.636, "dur": 0.397, + "args": { + "External id": 3285489,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286950.902, "dur": 1.826, + "args": { + "External id": 3285490,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286954.229, "dur": 0.506, + "args": { + "External id": 3285491,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286956.201, "dur": 2.436, + "args": { + "External id": 3285492,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286960.174, "dur": 0.515, + "args": { + "External id": 3285493,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286962.170, "dur": 0.264, + "args": { + "External id": 3285494,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595286966.383, "dur": 0.375, + "args": { + "External id": 3285495,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595287017.971, "dur": 28.655, + "args": { + "External id": 3285496,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595287112.631, "dur": 345.331, + "args": { + "External id": 3285497,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595287146.237, "dur": 307.381, + "args": { + "External id": 3285498,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6801, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595287156.598, "dur": 291.627, + "args": { + "External id": 3285499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595287482.873, "dur": 2.260, + "args": { + "External id": 3285500,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6803, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 1336756, "tid": 1336756, + "ts": 1587595287568.010, "dur": 17347.588, + "args": { + "External id": 3285501,"Record function id": 0, "Ev Idx": 6804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595287658.380, "dur": 5.471, + "args": { + "External id": 3285502,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595287667.008, "dur": 1.122, + "args": { + "External id": 3285503,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595287669.795, "dur": 1.845, + "args": { + "External id": 3285504,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595287697.975, "dur": 0.826, + "args": { + "External id": 3285505,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595287700.444, "dur": 0.700, + "args": { + "External id": 3285506,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595287702.751, "dur": 0.826, + "args": { + "External id": 3285507,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595287707.365, "dur": 0.892, + "args": { + "External id": 3285508,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595287709.639, "dur": 1.643, + "args": { + "External id": 3285509,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595287712.759, "dur": 0.624, + "args": { + "External id": 3285510,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595287715.013, "dur": 0.906, + "args": { + "External id": 3285511,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595287738.723, "dur": 17122.553, + "args": { + "External id": 3285512,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595287754.639, "dur": 17099.648, + "args": { + "External id": 3285513,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595287767.959, "dur": 12.732, + "args": { + "External id": 3285514,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595287783.643, "dur": 17037.302, + "args": { + "External id": 3285515,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595287785.958, "dur": 17034.405, + "args": { + "External id": 3285516,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595287792.092, "dur": 7.033, + "args": { + "External id": 3285517,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595287800.668, "dur": 17016.775, + "args": { + "External id": 3285518,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595305078.654, "dur": 37.187, + "args": { + "External id": 3285519,"Sequence number": 32987125, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6822 + } + }, + { + "ph": "s", "id": 207, "pid": 1336756, "tid": 1336756, "ts": 1587595305078.654, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595305102.423, "dur": 8.707, + "args": { + "External id": 3285520,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595305106.074, "dur": 4.687, + "args": { + "External id": 3285521,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595305183.018, "dur": 73.985, + "args": { + "External id": 3285522,"Record function id": 0, "Ev Idx": 6825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595305258.543, "dur": 1078.605, + "args": { + "External id": 3285523,"Record function id": 0, "Ev Idx": 6826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595305297.993, "dur": 1026.447, + "args": { + "External id": 3285524,"Sequence number": 32987126, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6827 + } + }, + { + "ph": "s", "id": 206, "pid": 1336756, "tid": 1336756, "ts": 1587595305297.993, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595305361.691, "dur": 41.724, + "args": { + "External id": 3285525,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595305416.162, "dur": 100.457, + "args": { + "External id": 3285526,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595305525.244, "dur": 43.229, + "args": { + "External id": 3285527,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595305576.661, "dur": 30.383, + "args": { + "External id": 3285528,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595305631.681, "dur": 25.209, + "args": { + "External id": 3285529,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595305673.953, "dur": 14.230, + "args": { + "External id": 3285530,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595305705.063, "dur": 126.631, + "args": { + "External id": 3285531,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595305754.873, "dur": 10.093, + "args": { + "External id": 3285532,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595305759.536, "dur": 4.626, + "args": { + "External id": 3285533,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595305767.891, "dur": 5.119, + "args": { + "External id": 3285534,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595305774.396, "dur": 0.915, + "args": { + "External id": 3285535,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595305777.741, "dur": 4.907, + "args": { + "External id": 3285536,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595305841.954, "dur": 63.347, + "args": { + "External id": 3285537,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595305939.359, "dur": 30.263, + "args": { + "External id": 3285538,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595305980.010, "dur": 96.152, + "args": { + "External id": 3285539,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595306088.039, "dur": 34.962, + "args": { + "External id": 3285540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595306146.918, "dur": 29.185, + "args": { + "External id": 3285541,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595306182.352, "dur": 34.188, + "args": { + "External id": 3285542,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595306237.268, "dur": 18.090, + "args": { + "External id": 3285543,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6846 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 1336756, "tid": 1336756, + "ts": 1587595306400.109, "dur": 69.371, + "args": { + "External id": 3285544,"Record function id": 0, "Ev Idx": 6847 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595306541.630, "dur": 45.718, + "args": { + "External id": 3285545,"Record function id": 0, "Ev Idx": 6848 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 1336756, "tid": 1336756, + "ts": 1587595306595.771, "dur": 18638.061, + "args": { + "External id": 3285546,"Record function id": 0, "Ev Idx": 6849 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 1336756, "tid": 1336756, + "ts": 1587595306604.529, "dur": 902.205, + "args": { + "External id": 3285547,"Record function id": 0, "Ev Idx": 6850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595306680.082, "dur": 7.542, + "args": { + "External id": 3285548,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595306700.682, "dur": 40.193, + "args": { + "External id": 3285549,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306705.830, "dur": 2.024, + "args": { + "External id": 3285550,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306713.014, "dur": 0.370, + "args": { + "External id": 3285551,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306714.926, "dur": 0.622, + "args": { + "External id": 3285552,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306717.532, "dur": 0.294, + "args": { + "External id": 3285553,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306721.819, "dur": 0.580, + "args": { + "External id": 3285554,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306723.904, "dur": 0.406, + "args": { + "External id": 3285555,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306725.849, "dur": 3.971, + "args": { + "External id": 3285556,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306731.802, "dur": 0.241, + "args": { + "External id": 3285557,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306734.194, "dur": 0.343, + "args": { + "External id": 3285558,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595306755.274, "dur": 37.713, + "args": { + "External id": 3285559,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595306824.448, "dur": 144.215, + "args": { + "External id": 3285560,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595306838.268, "dur": 5.238, + "args": { + "External id": 3285561,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595306848.352, "dur": 10.727, + "args": { + "External id": 3285562,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595306852.353, "dur": 6.335, + "args": { + "External id": 3285563,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306856.819, "dur": 0.657, + "args": { + "External id": 3285564,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595306866.388, "dur": 50.944, + "args": { + "External id": 3285565,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306869.115, "dur": 0.654, + "args": { + "External id": 3285566,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306871.479, "dur": 19.320, + "args": { + "External id": 3285567,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306893.792, "dur": 0.471, + "args": { + "External id": 3285568,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306896.155, "dur": 1.744, + "args": { + "External id": 3285569,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306901.668, "dur": 0.380, + "args": { + "External id": 3285570,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306903.338, "dur": 0.183, + "args": { + "External id": 3285571,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306904.974, "dur": 0.473, + "args": { + "External id": 3285572,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306909.093, "dur": 0.537, + "args": { + "External id": 3285573,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595306911.065, "dur": 0.608, + "args": { + "External id": 3285574,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595306931.874, "dur": 28.445, + "args": { + "External id": 3285575,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595307058.634, "dur": 354.566, + "args": { + "External id": 3285576,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595307094.657, "dur": 314.027, + "args": { + "External id": 3285577,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6880, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595307104.532, "dur": 297.716, + "args": { + "External id": 3285578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595307438.264, "dur": 2.571, + "args": { + "External id": 3285579,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6882, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 1336756, "tid": 1336756, + "ts": 1587595307526.574, "dur": 17512.146, + "args": { + "External id": 3285580,"Record function id": 0, "Ev Idx": 6883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595307620.347, "dur": 6.186, + "args": { + "External id": 3285581,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595307630.003, "dur": 0.945, + "args": { + "External id": 3285582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595307632.568, "dur": 1.905, + "args": { + "External id": 3285583,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595307636.005, "dur": 0.854, + "args": { + "External id": 3285584,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595307638.048, "dur": 1.240, + "args": { + "External id": 3285585,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595307640.472, "dur": 0.984, + "args": { + "External id": 3285586,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595307645.077, "dur": 0.765, + "args": { + "External id": 3285587,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595307647.432, "dur": 1.953, + "args": { + "External id": 3285588,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595307650.956, "dur": 0.959, + "args": { + "External id": 3285589,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595307653.479, "dur": 0.691, + "args": { + "External id": 3285590,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595307675.156, "dur": 17300.341, + "args": { + "External id": 3285591,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595307691.219, "dur": 17277.345, + "args": { + "External id": 3285592,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595307714.735, "dur": 13.737, + "args": { + "External id": 3285593,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595307731.217, "dur": 17203.968, + "args": { + "External id": 3285594,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595307733.725, "dur": 17200.932, + "args": { + "External id": 3285595,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595307740.145, "dur": 6.109, + "args": { + "External id": 3285596,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595307747.856, "dur": 17183.545, + "args": { + "External id": 3285597,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595325171.647, "dur": 37.489, + "args": { + "External id": 3285598,"Sequence number": 32987127, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6901 + } + }, + { + "ph": "s", "id": 205, "pid": 1336756, "tid": 1336756, "ts": 1587595325171.647, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595325196.291, "dur": 8.130, + "args": { + "External id": 3285599,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595325199.785, "dur": 4.374, + "args": { + "External id": 3285600,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595325272.584, "dur": 76.703, + "args": { + "External id": 3285601,"Record function id": 0, "Ev Idx": 6904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595325350.967, "dur": 1083.939, + "args": { + "External id": 3285602,"Record function id": 0, "Ev Idx": 6905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595325391.937, "dur": 1030.651, + "args": { + "External id": 3285603,"Sequence number": 32987128, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6906 + } + }, + { + "ph": "s", "id": 204, "pid": 1336756, "tid": 1336756, "ts": 1587595325391.937, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595325457.550, "dur": 41.776, + "args": { + "External id": 3285604,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595325511.743, "dur": 103.567, + "args": { + "External id": 3285605,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595325624.346, "dur": 38.503, + "args": { + "External id": 3285606,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595325670.694, "dur": 31.116, + "args": { + "External id": 3285607,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595325727.798, "dur": 26.020, + "args": { + "External id": 3285608,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595325771.626, "dur": 16.555, + "args": { + "External id": 3285609,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595325806.053, "dur": 152.414, + "args": { + "External id": 3285610,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595325854.246, "dur": 10.516, + "args": { + "External id": 3285611,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595325859.024, "dur": 5.014, + "args": { + "External id": 3285612,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595325867.679, "dur": 24.492, + "args": { + "External id": 3285613,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595325895.032, "dur": 1.314, + "args": { + "External id": 3285614,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595325899.152, "dur": 6.436, + "args": { + "External id": 3285615,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595325969.769, "dur": 87.363, + "args": { + "External id": 3285616,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595326092.712, "dur": 33.262, + "args": { + "External id": 3285617,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595326136.834, "dur": 44.466, + "args": { + "External id": 3285618,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595326189.994, "dur": 33.839, + "args": { + "External id": 3285619,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595326246.403, "dur": 30.069, + "args": { + "External id": 3285620,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595326281.801, "dur": 33.016, + "args": { + "External id": 3285621,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595326335.451, "dur": 18.678, + "args": { + "External id": 3285622,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6925 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 1336756, "tid": 1336756, + "ts": 1587595326497.407, "dur": 73.449, + "args": { + "External id": 3285623,"Record function id": 0, "Ev Idx": 6926 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595326644.431, "dur": 44.802, + "args": { + "External id": 3285624,"Record function id": 0, "Ev Idx": 6927 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 1336756, "tid": 1336756, + "ts": 1587595326698.648, "dur": 18431.196, + "args": { + "External id": 3285625,"Record function id": 0, "Ev Idx": 6928 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 1336756, "tid": 1336756, + "ts": 1587595326707.604, "dur": 918.084, + "args": { + "External id": 3285626,"Record function id": 0, "Ev Idx": 6929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595326783.657, "dur": 7.204, + "args": { + "External id": 3285627,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595326804.383, "dur": 42.680, + "args": { + "External id": 3285628,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595326809.951, "dur": 2.359, + "args": { + "External id": 3285629,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595326817.044, "dur": 0.477, + "args": { + "External id": 3285630,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595326819.412, "dur": 0.571, + "args": { + "External id": 3285631,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595326822.135, "dur": 0.645, + "args": { + "External id": 3285632,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595326826.451, "dur": 0.292, + "args": { + "External id": 3285633,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595326828.537, "dur": 0.912, + "args": { + "External id": 3285634,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595326830.885, "dur": 3.814, + "args": { + "External id": 3285635,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595326838.601, "dur": 0.200, + "args": { + "External id": 3285636,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595326840.057, "dur": 0.338, + "args": { + "External id": 3285637,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595326859.297, "dur": 59.574, + "args": { + "External id": 3285638,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595326954.731, "dur": 183.176, + "args": { + "External id": 3285639,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595326968.362, "dur": 4.923, + "args": { + "External id": 3285640,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595326978.215, "dur": 61.174, + "args": { + "External id": 3285641,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595327029.045, "dur": 9.799, + "args": { + "External id": 3285642,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595327033.801, "dur": 2.919, + "args": { + "External id": 3285643,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595327050.071, "dur": 33.223, + "args": { + "External id": 3285644,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595327053.157, "dur": 0.785, + "args": { + "External id": 3285645,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595327055.609, "dur": 0.515, + "args": { + "External id": 3285646,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595327057.814, "dur": 0.391, + "args": { + "External id": 3285647,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595327061.578, "dur": 2.221, + "args": { + "External id": 3285648,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595327065.532, "dur": 0.505, + "args": { + "External id": 3285649,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595327067.702, "dur": 2.753, + "args": { + "External id": 3285650,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595327072.079, "dur": 0.352, + "args": { + "External id": 3285651,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595327074.071, "dur": 0.697, + "args": { + "External id": 3285652,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595327078.494, "dur": 0.195, + "args": { + "External id": 3285653,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595327097.186, "dur": 31.766, + "args": { + "External id": 3285654,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595327195.457, "dur": 343.180, + "args": { + "External id": 3285655,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595327229.389, "dur": 304.810, + "args": { + "External id": 3285656,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6959, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595327239.903, "dur": 288.722, + "args": { + "External id": 3285657,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595327563.008, "dur": 2.345, + "args": { + "External id": 3285658,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6961, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 1336756, "tid": 1336756, + "ts": 1587595327646.544, "dur": 17265.215, + "args": { + "External id": 3285659,"Record function id": 0, "Ev Idx": 6962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595327734.860, "dur": 5.478, + "args": { + "External id": 3285660,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595327743.251, "dur": 0.897, + "args": { + "External id": 3285661,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595327745.870, "dur": 2.632, + "args": { + "External id": 3285662,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595327750.098, "dur": 0.924, + "args": { + "External id": 3285663,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595327752.431, "dur": 0.866, + "args": { + "External id": 3285664,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595327754.523, "dur": 1.279, + "args": { + "External id": 3285665,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595327759.707, "dur": 0.998, + "args": { + "External id": 3285666,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595327762.063, "dur": 2.458, + "args": { + "External id": 3285667,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595327766.188, "dur": 0.631, + "args": { + "External id": 3285668,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595327768.229, "dur": 0.810, + "args": { + "External id": 3285669,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595327791.227, "dur": 17070.238, + "args": { + "External id": 3285670,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595327806.906, "dur": 17047.797, + "args": { + "External id": 3285671,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595327831.012, "dur": 12.382, + "args": { + "External id": 3285672,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595327846.368, "dur": 16975.498, + "args": { + "External id": 3285673,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595327848.523, "dur": 16972.669, + "args": { + "External id": 3285674,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595327854.527, "dur": 5.854, + "args": { + "External id": 3285675,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595327862.142, "dur": 16956.146, + "args": { + "External id": 3285676,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595345067.994, "dur": 37.206, + "args": { + "External id": 3285677,"Sequence number": 32987129, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6980 + } + }, + { + "ph": "s", "id": 203, "pid": 1336756, "tid": 1336756, "ts": 1587595345067.994, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595345092.442, "dur": 8.343, + "args": { + "External id": 3285678,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595345096.250, "dur": 4.154, + "args": { + "External id": 3285679,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595345170.283, "dur": 76.018, + "args": { + "External id": 3285680,"Record function id": 0, "Ev Idx": 6983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595345247.921, "dur": 1072.952, + "args": { + "External id": 3285681,"Record function id": 0, "Ev Idx": 6984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595345286.295, "dur": 1022.298, + "args": { + "External id": 3285682,"Sequence number": 32987130, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6985 + } + }, + { + "ph": "s", "id": 202, "pid": 1336756, "tid": 1336756, "ts": 1587595345286.295, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595345350.916, "dur": 43.306, + "args": { + "External id": 3285683,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595345406.587, "dur": 103.063, + "args": { + "External id": 3285684,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595345518.307, "dur": 40.186, + "args": { + "External id": 3285685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595345566.646, "dur": 31.156, + "args": { + "External id": 3285686,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595345623.336, "dur": 27.868, + "args": { + "External id": 3285687,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595345671.510, "dur": 16.479, + "args": { + "External id": 3285688,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595345704.826, "dur": 122.495, + "args": { + "External id": 3285689,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595345751.493, "dur": 10.636, + "args": { + "External id": 3285690,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595345756.845, "dur": 4.544, + "args": { + "External id": 3285691,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595345764.847, "dur": 4.790, + "args": { + "External id": 3285692,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595345771.165, "dur": 1.035, + "args": { + "External id": 3285693,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595345774.747, "dur": 4.516, + "args": { + "External id": 3285694,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595345837.926, "dur": 64.913, + "args": { + "External id": 3285695,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595345935.310, "dur": 29.130, + "args": { + "External id": 3285696,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595345974.646, "dur": 82.117, + "args": { + "External id": 3285697,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595346069.025, "dur": 36.913, + "args": { + "External id": 3285698,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595346135.297, "dur": 27.798, + "args": { + "External id": 3285699,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595346171.366, "dur": 34.934, + "args": { + "External id": 3285700,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595346224.423, "dur": 18.902, + "args": { + "External id": 3285701,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7004 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 1336756, "tid": 1336756, + "ts": 1587595346383.882, "dur": 72.930, + "args": { + "External id": 3285702,"Record function id": 0, "Ev Idx": 7005 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595346528.263, "dur": 48.447, + "args": { + "External id": 3285703,"Record function id": 0, "Ev Idx": 7006 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 1336756, "tid": 1336756, + "ts": 1587595346586.116, "dur": 18210.897, + "args": { + "External id": 3285704,"Record function id": 0, "Ev Idx": 7007 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 1336756, "tid": 1336756, + "ts": 1587595346594.503, "dur": 893.157, + "args": { + "External id": 3285705,"Record function id": 0, "Ev Idx": 7008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595346669.790, "dur": 7.757, + "args": { + "External id": 3285706,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595346690.536, "dur": 39.006, + "args": { + "External id": 3285707,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346696.111, "dur": 2.452, + "args": { + "External id": 3285708,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346703.646, "dur": 0.296, + "args": { + "External id": 3285709,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346705.731, "dur": 0.689, + "args": { + "External id": 3285710,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346707.958, "dur": 0.461, + "args": { + "External id": 3285711,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346711.992, "dur": 0.817, + "args": { + "External id": 3285712,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346714.436, "dur": 0.582, + "args": { + "External id": 3285713,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346716.420, "dur": 3.739, + "args": { + "External id": 3285714,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346721.543, "dur": 0.180, + "args": { + "External id": 3285715,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346723.281, "dur": 0.494, + "args": { + "External id": 3285716,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595346741.098, "dur": 39.003, + "args": { + "External id": 3285717,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595346811.764, "dur": 140.265, + "args": { + "External id": 3285718,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595346824.521, "dur": 3.292, + "args": { + "External id": 3285719,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595346832.386, "dur": 10.438, + "args": { + "External id": 3285720,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595346836.791, "dur": 5.617, + "args": { + "External id": 3285721,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346840.357, "dur": 0.553, + "args": { + "External id": 3285722,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595346849.399, "dur": 52.053, + "args": { + "External id": 3285723,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346852.069, "dur": 2.376, + "args": { + "External id": 3285724,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346856.275, "dur": 0.354, + "args": { + "External id": 3285725,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346858.241, "dur": 0.598, + "args": { + "External id": 3285726,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346862.558, "dur": 1.469, + "args": { + "External id": 3285727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346865.632, "dur": 0.603, + "args": { + "External id": 3285728,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346867.901, "dur": 0.182, + "args": { + "External id": 3285729,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346871.763, "dur": 16.930, + "args": { + "External id": 3285730,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346891.606, "dur": 0.380, + "args": { + "External id": 3285731,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595346893.836, "dur": 2.611, + "args": { + "External id": 3285732,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595346915.598, "dur": 28.290, + "args": { + "External id": 3285733,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595347045.551, "dur": 351.038, + "args": { + "External id": 3285734,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595347081.300, "dur": 310.773, + "args": { + "External id": 3285735,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7038, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595347092.271, "dur": 294.533, + "args": { + "External id": 3285736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595347420.992, "dur": 2.655, + "args": { + "External id": 3285737,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7040, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 1336756, "tid": 1336756, + "ts": 1587595347507.633, "dur": 17107.582, + "args": { + "External id": 3285738,"Record function id": 0, "Ev Idx": 7041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595347598.117, "dur": 6.159, + "args": { + "External id": 3285739,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595347607.252, "dur": 1.227, + "args": { + "External id": 3285740,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595347610.094, "dur": 2.223, + "args": { + "External id": 3285741,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595347613.767, "dur": 1.117, + "args": { + "External id": 3285742,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595347616.047, "dur": 1.107, + "args": { + "External id": 3285743,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595347618.383, "dur": 1.251, + "args": { + "External id": 3285744,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595347623.826, "dur": 0.686, + "args": { + "External id": 3285745,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595347626.170, "dur": 2.160, + "args": { + "External id": 3285746,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595347629.756, "dur": 0.842, + "args": { + "External id": 3285747,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595347632.025, "dur": 0.789, + "args": { + "External id": 3285748,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595347652.658, "dur": 16925.090, + "args": { + "External id": 3285749,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595347667.843, "dur": 16902.859, + "args": { + "External id": 3285750,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595347690.751, "dur": 15.351, + "args": { + "External id": 3285751,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595347709.107, "dur": 16828.545, + "args": { + "External id": 3285752,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595347711.389, "dur": 16825.724, + "args": { + "External id": 3285753,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595347717.789, "dur": 4.890, + "args": { + "External id": 3285754,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595347724.442, "dur": 16809.751, + "args": { + "External id": 3285755,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595364739.139, "dur": 34.463, + "args": { + "External id": 3285756,"Sequence number": 32987131, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7059 + } + }, + { + "ph": "s", "id": 201, "pid": 1336756, "tid": 1336756, "ts": 1587595364739.139, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595364761.937, "dur": 7.338, + "args": { + "External id": 3285757,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595364765.183, "dur": 3.879, + "args": { + "External id": 3285758,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595364836.920, "dur": 87.286, + "args": { + "External id": 3285759,"Record function id": 0, "Ev Idx": 7062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595364926.935, "dur": 1048.388, + "args": { + "External id": 3285760,"Record function id": 0, "Ev Idx": 7063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595364967.989, "dur": 995.023, + "args": { + "External id": 3285761,"Sequence number": 32987132, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7064 + } + }, + { + "ph": "s", "id": 200, "pid": 1336756, "tid": 1336756, "ts": 1587595364967.989, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595365075.566, "dur": 43.755, + "args": { + "External id": 3285762,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595365131.638, "dur": 101.815, + "args": { + "External id": 3285763,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595365243.718, "dur": 38.301, + "args": { + "External id": 3285764,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595365289.880, "dur": 30.791, + "args": { + "External id": 3285765,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595365344.096, "dur": 23.901, + "args": { + "External id": 3285766,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595365386.770, "dur": 16.436, + "args": { + "External id": 3285767,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595365422.237, "dur": 128.520, + "args": { + "External id": 3285768,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595365472.830, "dur": 10.252, + "args": { + "External id": 3285769,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595365477.974, "dur": 4.416, + "args": { + "External id": 3285770,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595365486.255, "dur": 4.745, + "args": { + "External id": 3285771,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595365492.382, "dur": 0.865, + "args": { + "External id": 3285772,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595365495.671, "dur": 4.030, + "args": { + "External id": 3285773,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595365561.738, "dur": 42.759, + "args": { + "External id": 3285774,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595365634.384, "dur": 25.948, + "args": { + "External id": 3285775,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595365668.916, "dur": 39.898, + "args": { + "External id": 3285776,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595365716.635, "dur": 35.447, + "args": { + "External id": 3285777,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595365774.479, "dur": 27.079, + "args": { + "External id": 3285778,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595365806.967, "dur": 33.882, + "args": { + "External id": 3285779,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595365860.917, "dur": 33.141, + "args": { + "External id": 3285780,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7083 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 1336756, "tid": 1336756, + "ts": 1587595366073.478, "dur": 75.284, + "args": { + "External id": 3285781,"Record function id": 0, "Ev Idx": 7084 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595366224.308, "dur": 48.037, + "args": { + "External id": 3285782,"Record function id": 0, "Ev Idx": 7085 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 1336756, "tid": 1336756, + "ts": 1587595366282.054, "dur": 18207.971, + "args": { + "External id": 3285783,"Record function id": 0, "Ev Idx": 7086 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 1336756, "tid": 1336756, + "ts": 1587595366290.877, "dur": 910.706, + "args": { + "External id": 3285784,"Record function id": 0, "Ev Idx": 7087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595366368.884, "dur": 8.424, + "args": { + "External id": 3285785,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595366390.120, "dur": 46.007, + "args": { + "External id": 3285786,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366395.661, "dur": 2.566, + "args": { + "External id": 3285787,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366403.340, "dur": 0.326, + "args": { + "External id": 3285788,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366404.969, "dur": 0.386, + "args": { + "External id": 3285789,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366407.422, "dur": 0.268, + "args": { + "External id": 3285790,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366411.878, "dur": 0.453, + "args": { + "External id": 3285791,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366413.679, "dur": 0.384, + "args": { + "External id": 3285792,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366420.738, "dur": 4.402, + "args": { + "External id": 3285793,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366427.040, "dur": 0.383, + "args": { + "External id": 3285794,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366428.822, "dur": 0.523, + "args": { + "External id": 3285795,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595366450.344, "dur": 41.593, + "args": { + "External id": 3285796,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595366523.024, "dur": 114.711, + "args": { + "External id": 3285797,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595366535.964, "dur": 5.262, + "args": { + "External id": 3285798,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595366546.211, "dur": 10.814, + "args": { + "External id": 3285799,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595366550.202, "dur": 6.415, + "args": { + "External id": 3285800,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366554.350, "dur": 1.069, + "args": { + "External id": 3285801,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595366564.097, "dur": 30.255, + "args": { + "External id": 3285802,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366567.130, "dur": 0.391, + "args": { + "External id": 3285803,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366569.173, "dur": 2.495, + "args": { + "External id": 3285804,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366573.277, "dur": 0.490, + "args": { + "External id": 3285805,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366575.228, "dur": 1.470, + "args": { + "External id": 3285806,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366580.252, "dur": 0.187, + "args": { + "External id": 3285807,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366581.791, "dur": 0.397, + "args": { + "External id": 3285808,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366583.714, "dur": 0.367, + "args": { + "External id": 3285809,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366587.464, "dur": 0.584, + "args": { + "External id": 3285810,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595366589.768, "dur": 0.347, + "args": { + "External id": 3285811,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595366605.368, "dur": 25.153, + "args": { + "External id": 3285812,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595366689.707, "dur": 410.415, + "args": { + "External id": 3285813,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595366723.448, "dur": 370.965, + "args": { + "External id": 3285814,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7117, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595366733.186, "dur": 354.778, + "args": { + "External id": 3285815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595367129.091, "dur": 2.916, + "args": { + "External id": 3285816,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7119, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 1336756, "tid": 1336756, + "ts": 1587595367222.421, "dur": 17074.623, + "args": { + "External id": 3285817,"Record function id": 0, "Ev Idx": 7120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595367317.619, "dur": 6.198, + "args": { + "External id": 3285818,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595367327.242, "dur": 1.079, + "args": { + "External id": 3285819,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595367329.963, "dur": 2.407, + "args": { + "External id": 3285820,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595367333.925, "dur": 0.956, + "args": { + "External id": 3285821,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595367336.272, "dur": 0.982, + "args": { + "External id": 3285822,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595367338.776, "dur": 0.883, + "args": { + "External id": 3285823,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595367343.215, "dur": 0.899, + "args": { + "External id": 3285824,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595367345.588, "dur": 2.460, + "args": { + "External id": 3285825,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595367349.608, "dur": 0.752, + "args": { + "External id": 3285826,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595367351.729, "dur": 0.731, + "args": { + "External id": 3285827,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595367372.801, "dur": 16885.046, + "args": { + "External id": 3285828,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595367388.611, "dur": 16862.591, + "args": { + "External id": 3285829,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595367412.378, "dur": 13.905, + "args": { + "External id": 3285830,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595367429.286, "dur": 16786.858, + "args": { + "External id": 3285831,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595367431.784, "dur": 16783.818, + "args": { + "External id": 3285832,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595367437.559, "dur": 6.054, + "args": { + "External id": 3285833,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595367445.392, "dur": 16767.335, + "args": { + "External id": 3285834,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595384430.466, "dur": 34.993, + "args": { + "External id": 3285835,"Sequence number": 32987133, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7138 + } + }, + { + "ph": "s", "id": 199, "pid": 1336756, "tid": 1336756, "ts": 1587595384430.466, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595384453.242, "dur": 7.829, + "args": { + "External id": 3285836,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595384456.695, "dur": 4.175, + "args": { + "External id": 3285837,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595384528.839, "dur": 74.168, + "args": { + "External id": 3285838,"Record function id": 0, "Ev Idx": 7141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595384604.657, "dur": 1090.521, + "args": { + "External id": 3285839,"Record function id": 0, "Ev Idx": 7142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595384644.197, "dur": 1038.759, + "args": { + "External id": 3285840,"Sequence number": 32987134, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7143 + } + }, + { + "ph": "s", "id": 198, "pid": 1336756, "tid": 1336756, "ts": 1587595384644.197, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595384709.214, "dur": 39.553, + "args": { + "External id": 3285841,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595384760.228, "dur": 102.922, + "args": { + "External id": 3285842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595384888.346, "dur": 44.826, + "args": { + "External id": 3285843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595384944.926, "dur": 30.801, + "args": { + "External id": 3285844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595385038.731, "dur": 30.519, + "args": { + "External id": 3285845,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595385088.860, "dur": 15.127, + "args": { + "External id": 3285846,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595385122.592, "dur": 131.368, + "args": { + "External id": 3285847,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595385173.214, "dur": 12.282, + "args": { + "External id": 3285848,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595385178.508, "dur": 6.074, + "args": { + "External id": 3285849,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595385188.144, "dur": 5.901, + "args": { + "External id": 3285850,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595385195.652, "dur": 1.213, + "args": { + "External id": 3285851,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595385199.409, "dur": 4.699, + "args": { + "External id": 3285852,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595385272.329, "dur": 51.220, + "args": { + "External id": 3285853,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595385360.161, "dur": 28.931, + "args": { + "External id": 3285854,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595385398.945, "dur": 40.357, + "args": { + "External id": 3285855,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595385448.452, "dur": 35.120, + "args": { + "External id": 3285856,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595385504.861, "dur": 25.932, + "args": { + "External id": 3285857,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595385536.460, "dur": 34.814, + "args": { + "External id": 3285858,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595385593.432, "dur": 20.208, + "args": { + "External id": 3285859,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7162 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 1336756, "tid": 1336756, + "ts": 1587595385757.169, "dur": 72.082, + "args": { + "External id": 3285860,"Record function id": 0, "Ev Idx": 7163 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595385917.744, "dur": 46.809, + "args": { + "External id": 3285861,"Record function id": 0, "Ev Idx": 7164 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 1336756, "tid": 1336756, + "ts": 1587595385974.072, "dur": 18137.937, + "args": { + "External id": 3285862,"Record function id": 0, "Ev Idx": 7165 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 1336756, "tid": 1336756, + "ts": 1587595386015.356, "dur": 1044.069, + "args": { + "External id": 3285863,"Record function id": 0, "Ev Idx": 7166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595386095.110, "dur": 9.338, + "args": { + "External id": 3285864,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595386118.394, "dur": 39.941, + "args": { + "External id": 3285865,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386123.665, "dur": 2.312, + "args": { + "External id": 3285866,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386130.949, "dur": 0.471, + "args": { + "External id": 3285867,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386133.065, "dur": 0.376, + "args": { + "External id": 3285868,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386135.374, "dur": 0.607, + "args": { + "External id": 3285869,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386139.425, "dur": 0.639, + "args": { + "External id": 3285870,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386141.582, "dur": 0.630, + "args": { + "External id": 3285871,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386143.820, "dur": 4.139, + "args": { + "External id": 3285872,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386149.695, "dur": 0.401, + "args": { + "External id": 3285873,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386151.517, "dur": 0.375, + "args": { + "External id": 3285874,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595386171.900, "dur": 41.901, + "args": { + "External id": 3285875,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595386247.396, "dur": 128.229, + "args": { + "External id": 3285876,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595386258.171, "dur": 4.088, + "args": { + "External id": 3285877,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595386267.437, "dur": 10.368, + "args": { + "External id": 3285878,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595386271.830, "dur": 5.562, + "args": { + "External id": 3285879,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386275.562, "dur": 0.642, + "args": { + "External id": 3285880,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595386284.872, "dur": 34.934, + "args": { + "External id": 3285881,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386288.117, "dur": 2.878, + "args": { + "External id": 3285882,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386292.667, "dur": 0.352, + "args": { + "External id": 3285883,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386294.718, "dur": 0.357, + "args": { + "External id": 3285884,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386299.210, "dur": 1.649, + "args": { + "External id": 3285885,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386302.399, "dur": 0.398, + "args": { + "External id": 3285886,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386304.207, "dur": 0.311, + "args": { + "External id": 3285887,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386308.061, "dur": 0.558, + "args": { + "External id": 3285888,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386310.015, "dur": 0.425, + "args": { + "External id": 3285889,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595386311.895, "dur": 2.669, + "args": { + "External id": 3285890,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595386342.265, "dur": 25.611, + "args": { + "External id": 3285891,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595386427.769, "dur": 486.882, + "args": { + "External id": 3285892,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595386461.120, "dur": 447.616, + "args": { + "External id": 3285893,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7196, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595386473.120, "dur": 426.151, + "args": { + "External id": 3285894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595386944.729, "dur": 3.216, + "args": { + "External id": 3285895,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7198, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 1336756, "tid": 1336756, + "ts": 1587595387082.794, "dur": 16805.894, + "args": { + "External id": 3285896,"Record function id": 0, "Ev Idx": 7199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595387187.397, "dur": 7.368, + "args": { + "External id": 3285897,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595387198.316, "dur": 1.306, + "args": { + "External id": 3285898,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595387201.345, "dur": 2.437, + "args": { + "External id": 3285899,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595387205.576, "dur": 1.030, + "args": { + "External id": 3285900,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595387208.032, "dur": 1.059, + "args": { + "External id": 3285901,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595387210.354, "dur": 0.688, + "args": { + "External id": 3285902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595387215.010, "dur": 1.258, + "args": { + "External id": 3285903,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595387217.834, "dur": 1.822, + "args": { + "External id": 3285904,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595387221.361, "dur": 1.064, + "args": { + "External id": 3285905,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595387224.020, "dur": 0.923, + "args": { + "External id": 3285906,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595387247.232, "dur": 16593.633, + "args": { + "External id": 3285907,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595387263.924, "dur": 16570.137, + "args": { + "External id": 3285908,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595387286.021, "dur": 14.156, + "args": { + "External id": 3285909,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595387303.213, "dur": 16497.676, + "args": { + "External id": 3285910,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595387305.736, "dur": 16494.661, + "args": { + "External id": 3285911,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595387311.671, "dur": 5.521, + "args": { + "External id": 3285912,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595387319.024, "dur": 16478.398, + "args": { + "External id": 3285913,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595404048.584, "dur": 37.814, + "args": { + "External id": 3285914,"Sequence number": 32987135, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7217 + } + }, + { + "ph": "s", "id": 197, "pid": 1336756, "tid": 1336756, "ts": 1587595404048.584, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595404073.917, "dur": 7.730, + "args": { + "External id": 3285915,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595404076.962, "dur": 4.357, + "args": { + "External id": 3285916,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595404153.519, "dur": 73.626, + "args": { + "External id": 3285917,"Record function id": 0, "Ev Idx": 7220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595404229.088, "dur": 1097.693, + "args": { + "External id": 3285918,"Record function id": 0, "Ev Idx": 7221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595404271.926, "dur": 1041.277, + "args": { + "External id": 3285919,"Sequence number": 32987136, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7222 + } + }, + { + "ph": "s", "id": 196, "pid": 1336756, "tid": 1336756, "ts": 1587595404271.926, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595404340.970, "dur": 42.280, + "args": { + "External id": 3285920,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595404396.179, "dur": 105.899, + "args": { + "External id": 3285921,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595404512.774, "dur": 38.701, + "args": { + "External id": 3285922,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595404560.657, "dur": 32.378, + "args": { + "External id": 3285923,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595404618.763, "dur": 26.267, + "args": { + "External id": 3285924,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595404663.915, "dur": 16.647, + "args": { + "External id": 3285925,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595404699.010, "dur": 126.790, + "args": { + "External id": 3285926,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595404745.761, "dur": 11.534, + "args": { + "External id": 3285927,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595404751.256, "dur": 5.047, + "args": { + "External id": 3285928,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595404760.179, "dur": 6.174, + "args": { + "External id": 3285929,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595404767.874, "dur": 1.493, + "args": { + "External id": 3285930,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595404772.021, "dur": 4.173, + "args": { + "External id": 3285931,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595404836.695, "dur": 60.904, + "args": { + "External id": 3285932,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595404932.463, "dur": 30.957, + "args": { + "External id": 3285933,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595404973.525, "dur": 80.875, + "args": { + "External id": 3285934,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595405067.713, "dur": 36.650, + "args": { + "External id": 3285935,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595405130.439, "dur": 28.850, + "args": { + "External id": 3285936,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595405165.251, "dur": 34.534, + "args": { + "External id": 3285937,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595405220.759, "dur": 18.182, + "args": { + "External id": 3285938,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7241 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 1336756, "tid": 1336756, + "ts": 1587595405391.041, "dur": 71.207, + "args": { + "External id": 3285939,"Record function id": 0, "Ev Idx": 7242 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595405534.329, "dur": 47.730, + "args": { + "External id": 3285940,"Record function id": 0, "Ev Idx": 7243 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 1336756, "tid": 1336756, + "ts": 1587595405591.879, "dur": 18139.144, + "args": { + "External id": 3285941,"Record function id": 0, "Ev Idx": 7244 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 1336756, "tid": 1336756, + "ts": 1587595405601.445, "dur": 884.391, + "args": { + "External id": 3285942,"Record function id": 0, "Ev Idx": 7245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595405678.478, "dur": 7.798, + "args": { + "External id": 3285943,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595405699.069, "dur": 37.754, + "args": { + "External id": 3285944,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405704.065, "dur": 2.359, + "args": { + "External id": 3285945,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405711.429, "dur": 0.272, + "args": { + "External id": 3285946,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405713.098, "dur": 0.163, + "args": { + "External id": 3285947,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405714.918, "dur": 0.560, + "args": { + "External id": 3285948,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405719.094, "dur": 0.294, + "args": { + "External id": 3285949,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405721.144, "dur": 0.385, + "args": { + "External id": 3285950,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405722.858, "dur": 3.517, + "args": { + "External id": 3285951,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405728.323, "dur": 0.390, + "args": { + "External id": 3285952,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405730.180, "dur": 0.295, + "args": { + "External id": 3285953,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595405749.080, "dur": 38.091, + "args": { + "External id": 3285954,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595405817.846, "dur": 141.947, + "args": { + "External id": 3285955,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595405827.850, "dur": 4.016, + "args": { + "External id": 3285956,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595405836.911, "dur": 14.562, + "args": { + "External id": 3285957,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595405845.258, "dur": 5.801, + "args": { + "External id": 3285958,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405849.470, "dur": 0.456, + "args": { + "External id": 3285959,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595405858.644, "dur": 51.596, + "args": { + "External id": 3285960,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405860.878, "dur": 2.213, + "args": { + "External id": 3285961,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405865.118, "dur": 0.443, + "args": { + "External id": 3285962,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405866.881, "dur": 0.250, + "args": { + "External id": 3285963,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405871.339, "dur": 18.083, + "args": { + "External id": 3285964,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405892.198, "dur": 0.261, + "args": { + "External id": 3285965,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405894.540, "dur": 0.825, + "args": { + "External id": 3285966,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405898.876, "dur": 0.406, + "args": { + "External id": 3285967,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405900.895, "dur": 0.492, + "args": { + "External id": 3285968,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595405902.814, "dur": 2.195, + "args": { + "External id": 3285969,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595405923.020, "dur": 28.569, + "args": { + "External id": 3285970,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595406051.757, "dur": 345.790, + "args": { + "External id": 3285971,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595406086.489, "dur": 306.362, + "args": { + "External id": 3285972,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7275, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595406097.117, "dur": 290.201, + "args": { + "External id": 3285973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595406421.643, "dur": 2.367, + "args": { + "External id": 3285974,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7277, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 1336756, "tid": 1336756, + "ts": 1587595406505.594, "dur": 17035.318, + "args": { + "External id": 3285975,"Record function id": 0, "Ev Idx": 7278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595406601.327, "dur": 6.051, + "args": { + "External id": 3285976,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595406610.414, "dur": 1.006, + "args": { + "External id": 3285977,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595406613.427, "dur": 2.659, + "args": { + "External id": 3285978,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595406618.035, "dur": 1.027, + "args": { + "External id": 3285979,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595406620.446, "dur": 1.363, + "args": { + "External id": 3285980,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595406623.095, "dur": 1.061, + "args": { + "External id": 3285981,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595406628.308, "dur": 1.025, + "args": { + "External id": 3285982,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595406631.000, "dur": 1.772, + "args": { + "External id": 3285983,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595406634.393, "dur": 1.048, + "args": { + "External id": 3285984,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595406636.780, "dur": 0.960, + "args": { + "External id": 3285985,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595406658.907, "dur": 16846.068, + "args": { + "External id": 3285986,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595406674.771, "dur": 16823.592, + "args": { + "External id": 3285987,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595406696.385, "dur": 13.936, + "args": { + "External id": 3285988,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595406713.161, "dur": 16752.161, + "args": { + "External id": 3285989,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595406715.747, "dur": 16749.032, + "args": { + "External id": 3285990,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595406721.932, "dur": 5.518, + "args": { + "External id": 3285991,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595406729.235, "dur": 16732.507, + "args": { + "External id": 3285992,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595423670.593, "dur": 35.503, + "args": { + "External id": 3285993,"Sequence number": 32987137, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7296 + } + }, + { + "ph": "s", "id": 195, "pid": 1336756, "tid": 1336756, "ts": 1587595423670.593, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595423694.124, "dur": 7.268, + "args": { + "External id": 3285994,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595423697.261, "dur": 3.837, + "args": { + "External id": 3285995,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595423769.969, "dur": 74.533, + "args": { + "External id": 3285996,"Record function id": 0, "Ev Idx": 7299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595423845.984, "dur": 1127.005, + "args": { + "External id": 3285997,"Record function id": 0, "Ev Idx": 7300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595423901.738, "dur": 1057.960, + "args": { + "External id": 3285998,"Sequence number": 32987138, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7301 + } + }, + { + "ph": "s", "id": 194, "pid": 1336756, "tid": 1336756, "ts": 1587595423901.738, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595423971.240, "dur": 73.940, + "args": { + "External id": 3285999,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595424062.006, "dur": 105.138, + "args": { + "External id": 3286000,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595424177.453, "dur": 39.251, + "args": { + "External id": 3286001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595424226.136, "dur": 30.787, + "args": { + "External id": 3286002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595424290.943, "dur": 30.724, + "args": { + "External id": 3286003,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595424338.690, "dur": 14.640, + "args": { + "External id": 3286004,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595424371.720, "dur": 128.540, + "args": { + "External id": 3286005,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595424419.755, "dur": 11.376, + "args": { + "External id": 3286006,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595424425.236, "dur": 5.208, + "args": { + "External id": 3286007,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595424434.180, "dur": 5.675, + "args": { + "External id": 3286008,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595424441.150, "dur": 1.215, + "args": { + "External id": 3286009,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595424444.977, "dur": 5.786, + "args": { + "External id": 3286010,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595424510.259, "dur": 44.765, + "args": { + "External id": 3286011,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595424586.956, "dur": 29.502, + "args": { + "External id": 3286012,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595424626.395, "dur": 40.259, + "args": { + "External id": 3286013,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595424673.334, "dur": 35.849, + "args": { + "External id": 3286014,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595424733.573, "dur": 28.682, + "args": { + "External id": 3286015,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595424769.319, "dur": 36.843, + "args": { + "External id": 3286016,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595424852.532, "dur": 37.147, + "args": { + "External id": 3286017,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 1336756, "tid": 1336756, + "ts": 1587595425072.788, "dur": 74.636, + "args": { + "External id": 3286018,"Record function id": 0, "Ev Idx": 7321 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595425219.627, "dur": 46.681, + "args": { + "External id": 3286019,"Record function id": 0, "Ev Idx": 7322 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 1336756, "tid": 1336756, + "ts": 1587595425275.985, "dur": 18179.697, + "args": { + "External id": 3286020,"Record function id": 0, "Ev Idx": 7323 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 1336756, "tid": 1336756, + "ts": 1587595425283.389, "dur": 892.794, + "args": { + "External id": 3286021,"Record function id": 0, "Ev Idx": 7324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595425362.572, "dur": 9.438, + "args": { + "External id": 3286022,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595425385.836, "dur": 41.726, + "args": { + "External id": 3286023,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425391.442, "dur": 2.363, + "args": { + "External id": 3286024,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425398.491, "dur": 0.392, + "args": { + "External id": 3286025,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425400.722, "dur": 0.608, + "args": { + "External id": 3286026,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425402.991, "dur": 0.255, + "args": { + "External id": 3286027,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425408.923, "dur": 0.464, + "args": { + "External id": 3286028,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425410.751, "dur": 0.634, + "args": { + "External id": 3286029,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425413.337, "dur": 3.978, + "args": { + "External id": 3286030,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425418.782, "dur": 0.196, + "args": { + "External id": 3286031,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425420.364, "dur": 0.484, + "args": { + "External id": 3286032,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595425438.760, "dur": 41.158, + "args": { + "External id": 3286033,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595425511.221, "dur": 114.975, + "args": { + "External id": 3286034,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595425521.277, "dur": 3.830, + "args": { + "External id": 3286035,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595425530.014, "dur": 10.638, + "args": { + "External id": 3286036,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595425534.518, "dur": 5.720, + "args": { + "External id": 3286037,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425538.424, "dur": 0.540, + "args": { + "External id": 3286038,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595425547.610, "dur": 32.982, + "args": { + "External id": 3286039,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425550.103, "dur": 2.814, + "args": { + "External id": 3286040,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425554.574, "dur": 0.202, + "args": { + "External id": 3286041,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425556.413, "dur": 0.382, + "args": { + "External id": 3286042,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425560.832, "dur": 1.582, + "args": { + "External id": 3286043,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425564.265, "dur": 0.368, + "args": { + "External id": 3286044,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425566.145, "dur": 0.214, + "args": { + "External id": 3286045,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425570.259, "dur": 0.195, + "args": { + "External id": 3286046,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425572.099, "dur": 0.521, + "args": { + "External id": 3286047,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595425574.016, "dur": 2.248, + "args": { + "External id": 3286048,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595425593.373, "dur": 25.394, + "args": { + "External id": 3286049,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595425677.504, "dur": 399.763, + "args": { + "External id": 3286050,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595425710.651, "dur": 361.176, + "args": { + "External id": 3286051,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7354, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595425720.304, "dur": 345.225, + "args": { + "External id": 3286052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595426105.119, "dur": 2.917, + "args": { + "External id": 3286053,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7356, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 1336756, "tid": 1336756, + "ts": 1587595426197.066, "dur": 17069.614, + "args": { + "External id": 3286054,"Record function id": 0, "Ev Idx": 7357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595426297.911, "dur": 6.311, + "args": { + "External id": 3286055,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595426307.588, "dur": 1.195, + "args": { + "External id": 3286056,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595426310.475, "dur": 2.586, + "args": { + "External id": 3286057,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595426314.822, "dur": 0.585, + "args": { + "External id": 3286058,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595426316.661, "dur": 0.770, + "args": { + "External id": 3286059,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595426318.824, "dur": 0.721, + "args": { + "External id": 3286060,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595426323.584, "dur": 1.265, + "args": { + "External id": 3286061,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595426326.348, "dur": 1.901, + "args": { + "External id": 3286062,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595426329.709, "dur": 0.947, + "args": { + "External id": 3286063,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595426332.211, "dur": 0.591, + "args": { + "External id": 3286064,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595426354.227, "dur": 16875.323, + "args": { + "External id": 3286065,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595426370.413, "dur": 16852.313, + "args": { + "External id": 3286066,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595426395.389, "dur": 14.510, + "args": { + "External id": 3286067,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595426412.674, "dur": 16777.708, + "args": { + "External id": 3286068,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595426415.220, "dur": 16774.686, + "args": { + "External id": 3286069,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595426422.076, "dur": 5.892, + "args": { + "External id": 3286070,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595426429.550, "dur": 16757.072, + "args": { + "External id": 3286071,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595443394.700, "dur": 37.149, + "args": { + "External id": 3286072,"Sequence number": 32987139, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7375 + } + }, + { + "ph": "s", "id": 193, "pid": 1336756, "tid": 1336756, "ts": 1587595443394.700, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595443419.718, "dur": 7.795, + "args": { + "External id": 3286073,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595443423.043, "dur": 4.229, + "args": { + "External id": 3286074,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595443494.028, "dur": 75.004, + "args": { + "External id": 3286075,"Record function id": 0, "Ev Idx": 7378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595443570.749, "dur": 1075.077, + "args": { + "External id": 3286076,"Record function id": 0, "Ev Idx": 7379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595443609.443, "dur": 1023.835, + "args": { + "External id": 3286077,"Sequence number": 32987140, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7380 + } + }, + { + "ph": "s", "id": 192, "pid": 1336756, "tid": 1336756, "ts": 1587595443609.443, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595443672.924, "dur": 36.081, + "args": { + "External id": 3286078,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595443721.136, "dur": 100.029, + "args": { + "External id": 3286079,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595443831.819, "dur": 36.978, + "args": { + "External id": 3286080,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595443897.648, "dur": 38.212, + "args": { + "External id": 3286081,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595443963.643, "dur": 59.473, + "args": { + "External id": 3286082,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595444045.549, "dur": 17.990, + "args": { + "External id": 3286083,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595444083.052, "dur": 129.725, + "args": { + "External id": 3286084,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595444131.617, "dur": 12.064, + "args": { + "External id": 3286085,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595444137.089, "dur": 5.830, + "args": { + "External id": 3286086,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595444146.836, "dur": 6.049, + "args": { + "External id": 3286087,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595444154.064, "dur": 1.459, + "args": { + "External id": 3286088,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595444158.175, "dur": 4.232, + "args": { + "External id": 3286089,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595444223.472, "dur": 50.513, + "args": { + "External id": 3286090,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595444305.619, "dur": 28.321, + "args": { + "External id": 3286091,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595444343.096, "dur": 41.161, + "args": { + "External id": 3286092,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595444392.506, "dur": 34.064, + "args": { + "External id": 3286093,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595444450.355, "dur": 31.289, + "args": { + "External id": 3286094,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595444487.361, "dur": 34.731, + "args": { + "External id": 3286095,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595444542.535, "dur": 17.786, + "args": { + "External id": 3286096,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7399 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 1336756, "tid": 1336756, + "ts": 1587595444708.859, "dur": 73.806, + "args": { + "External id": 3286097,"Record function id": 0, "Ev Idx": 7400 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595444856.387, "dur": 66.371, + "args": { + "External id": 3286098,"Record function id": 0, "Ev Idx": 7401 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 1336756, "tid": 1336756, + "ts": 1587595444933.574, "dur": 18322.314, + "args": { + "External id": 3286099,"Record function id": 0, "Ev Idx": 7402 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 1336756, "tid": 1336756, + "ts": 1587595444944.628, "dur": 874.766, + "args": { + "External id": 3286100,"Record function id": 0, "Ev Idx": 7403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595445063.216, "dur": 9.292, + "args": { + "External id": 3286101,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595445086.759, "dur": 39.024, + "args": { + "External id": 3286102,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445092.183, "dur": 2.445, + "args": { + "External id": 3286103,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445099.574, "dur": 0.565, + "args": { + "External id": 3286104,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445102.246, "dur": 0.384, + "args": { + "External id": 3286105,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445104.114, "dur": 0.410, + "args": { + "External id": 3286106,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445107.785, "dur": 0.561, + "args": { + "External id": 3286107,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445109.889, "dur": 0.385, + "args": { + "External id": 3286108,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445111.549, "dur": 3.353, + "args": { + "External id": 3286109,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445116.656, "dur": 0.376, + "args": { + "External id": 3286110,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445118.890, "dur": 0.144, + "args": { + "External id": 3286111,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595445137.753, "dur": 42.115, + "args": { + "External id": 3286112,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595445212.623, "dur": 121.808, + "args": { + "External id": 3286113,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595445222.809, "dur": 4.732, + "args": { + "External id": 3286114,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595445232.299, "dur": 10.622, + "args": { + "External id": 3286115,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595445236.679, "dur": 5.824, + "args": { + "External id": 3286116,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445240.665, "dur": 0.614, + "args": { + "External id": 3286117,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595445250.160, "dur": 39.461, + "args": { + "External id": 3286118,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445252.479, "dur": 2.513, + "args": { + "External id": 3286119,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445256.774, "dur": 0.556, + "args": { + "External id": 3286120,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445258.810, "dur": 0.197, + "args": { + "External id": 3286121,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445266.812, "dur": 2.513, + "args": { + "External id": 3286122,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445270.788, "dur": 0.295, + "args": { + "External id": 3286123,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445272.766, "dur": 0.475, + "args": { + "External id": 3286124,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445277.177, "dur": 0.521, + "args": { + "External id": 3286125,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445279.227, "dur": 0.410, + "args": { + "External id": 3286126,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595445281.218, "dur": 2.425, + "args": { + "External id": 3286127,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595445300.762, "dur": 25.663, + "args": { + "External id": 3286128,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595445386.135, "dur": 346.519, + "args": { + "External id": 3286129,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595445420.119, "dur": 307.570, + "args": { + "External id": 3286130,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7433, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595445430.085, "dur": 292.193, + "args": { + "External id": 3286131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595445755.886, "dur": 1.998, + "args": { + "External id": 3286132,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7435, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 1336756, "tid": 1336756, + "ts": 1587595445838.351, "dur": 17227.735, + "args": { + "External id": 3286133,"Record function id": 0, "Ev Idx": 7436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595445955.020, "dur": 6.519, + "args": { + "External id": 3286134,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595445965.346, "dur": 1.216, + "args": { + "External id": 3286135,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595445968.397, "dur": 2.143, + "args": { + "External id": 3286136,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595445972.172, "dur": 0.995, + "args": { + "External id": 3286137,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595445974.728, "dur": 1.107, + "args": { + "External id": 3286138,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595445977.226, "dur": 0.668, + "args": { + "External id": 3286139,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595446015.031, "dur": 1.827, + "args": { + "External id": 3286140,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595446020.153, "dur": 2.427, + "args": { + "External id": 3286141,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595446024.480, "dur": 0.866, + "args": { + "External id": 3286142,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595446027.127, "dur": 0.594, + "args": { + "External id": 3286143,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595446050.126, "dur": 16977.283, + "args": { + "External id": 3286144,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595446066.600, "dur": 16953.995, + "args": { + "External id": 3286145,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595446090.577, "dur": 13.493, + "args": { + "External id": 3286146,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595446106.966, "dur": 16858.072, + "args": { + "External id": 3286147,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595446109.384, "dur": 16855.204, + "args": { + "External id": 3286148,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595446115.761, "dur": 6.363, + "args": { + "External id": 3286149,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595446123.781, "dur": 16837.833, + "args": { + "External id": 3286150,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595463196.949, "dur": 34.331, + "args": { + "External id": 3286151,"Sequence number": 32987141, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7454 + } + }, + { + "ph": "s", "id": 191, "pid": 1336756, "tid": 1336756, "ts": 1587595463196.949, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595463219.088, "dur": 7.332, + "args": { + "External id": 3286152,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595463222.503, "dur": 3.627, + "args": { + "External id": 3286153,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595463295.354, "dur": 75.075, + "args": { + "External id": 3286154,"Record function id": 0, "Ev Idx": 7457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595463372.074, "dur": 1076.256, + "args": { + "External id": 3286155,"Record function id": 0, "Ev Idx": 7458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595463414.533, "dur": 1021.423, + "args": { + "External id": 3286156,"Sequence number": 32987142, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7459 + } + }, + { + "ph": "s", "id": 190, "pid": 1336756, "tid": 1336756, "ts": 1587595463414.533, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595463484.649, "dur": 40.016, + "args": { + "External id": 3286157,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595463538.262, "dur": 104.012, + "args": { + "External id": 3286158,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595463652.167, "dur": 39.774, + "args": { + "External id": 3286159,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595463698.892, "dur": 31.347, + "args": { + "External id": 3286160,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595463757.234, "dur": 26.066, + "args": { + "External id": 3286161,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595463799.321, "dur": 18.656, + "args": { + "External id": 3286162,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595463832.814, "dur": 180.957, + "args": { + "External id": 3286163,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595463897.130, "dur": 12.366, + "args": { + "External id": 3286164,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595463902.133, "dur": 6.301, + "args": { + "External id": 3286165,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595463912.522, "dur": 5.806, + "args": { + "External id": 3286166,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595463919.991, "dur": 3.198, + "args": { + "External id": 3286167,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595463925.569, "dur": 3.902, + "args": { + "External id": 3286168,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595464028.188, "dur": 54.473, + "args": { + "External id": 3286169,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595464115.890, "dur": 29.467, + "args": { + "External id": 3286170,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595464154.515, "dur": 40.660, + "args": { + "External id": 3286171,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595464201.974, "dur": 35.667, + "args": { + "External id": 3286172,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595464263.560, "dur": 26.576, + "args": { + "External id": 3286173,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595464295.765, "dur": 34.668, + "args": { + "External id": 3286174,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595464348.560, "dur": 21.483, + "args": { + "External id": 3286175,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7478 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 1336756, "tid": 1336756, + "ts": 1587595464511.403, "dur": 70.011, + "args": { + "External id": 3286176,"Record function id": 0, "Ev Idx": 7479 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595464652.845, "dur": 46.806, + "args": { + "External id": 3286177,"Record function id": 0, "Ev Idx": 7480 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 1336756, "tid": 1336756, + "ts": 1587595464708.823, "dur": 18324.982, + "args": { + "External id": 3286178,"Record function id": 0, "Ev Idx": 7481 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 1336756, "tid": 1336756, + "ts": 1587595464717.398, "dur": 883.829, + "args": { + "External id": 3286179,"Record function id": 0, "Ev Idx": 7482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595464791.511, "dur": 7.002, + "args": { + "External id": 3286180,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595464811.369, "dur": 38.851, + "args": { + "External id": 3286181,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595464817.183, "dur": 2.102, + "args": { + "External id": 3286182,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595464824.083, "dur": 0.350, + "args": { + "External id": 3286183,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595464825.933, "dur": 0.222, + "args": { + "External id": 3286184,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595464827.842, "dur": 0.331, + "args": { + "External id": 3286185,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595464831.735, "dur": 0.227, + "args": { + "External id": 3286186,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595464834.323, "dur": 0.148, + "args": { + "External id": 3286187,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595464836.293, "dur": 3.213, + "args": { + "External id": 3286188,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595464841.128, "dur": 0.233, + "args": { + "External id": 3286189,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595464843.209, "dur": 0.145, + "args": { + "External id": 3286190,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595464861.526, "dur": 55.513, + "args": { + "External id": 3286191,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595464951.517, "dur": 161.165, + "args": { + "External id": 3286192,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595464962.669, "dur": 4.515, + "args": { + "External id": 3286193,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595464972.219, "dur": 45.777, + "args": { + "External id": 3286194,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595464976.322, "dur": 41.245, + "args": { + "External id": 3286195,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595464980.552, "dur": 34.572, + "args": { + "External id": 3286196,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595465028.247, "dur": 32.893, + "args": { + "External id": 3286197,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595465031.367, "dur": 2.882, + "args": { + "External id": 3286198,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595465035.976, "dur": 0.780, + "args": { + "External id": 3286199,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595465038.181, "dur": 0.481, + "args": { + "External id": 3286200,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595465042.407, "dur": 1.620, + "args": { + "External id": 3286201,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595465045.665, "dur": 0.138, + "args": { + "External id": 3286202,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595465047.615, "dur": 0.479, + "args": { + "External id": 3286203,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595465050.214, "dur": 0.189, + "args": { + "External id": 3286204,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595465051.679, "dur": 0.185, + "args": { + "External id": 3286205,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595465053.974, "dur": 2.260, + "args": { + "External id": 3286206,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595465075.622, "dur": 28.906, + "args": { + "External id": 3286207,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595465168.132, "dur": 343.572, + "args": { + "External id": 3286208,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595465201.640, "dur": 305.400, + "args": { + "External id": 3286209,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7512, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595465211.806, "dur": 290.272, + "args": { + "External id": 3286210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595465536.134, "dur": 2.254, + "args": { + "External id": 3286211,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7514, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 1336756, "tid": 1336756, + "ts": 1587595465621.448, "dur": 17168.202, + "args": { + "External id": 3286212,"Record function id": 0, "Ev Idx": 7515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595465719.020, "dur": 5.202, + "args": { + "External id": 3286213,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595465727.563, "dur": 0.778, + "args": { + "External id": 3286214,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595465730.034, "dur": 2.500, + "args": { + "External id": 3286215,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595465734.030, "dur": 0.607, + "args": { + "External id": 3286216,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595465736.391, "dur": 0.685, + "args": { + "External id": 3286217,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595465738.318, "dur": 0.982, + "args": { + "External id": 3286218,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595465743.709, "dur": 1.043, + "args": { + "External id": 3286219,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595465746.215, "dur": 2.067, + "args": { + "External id": 3286220,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595465750.272, "dur": 1.150, + "args": { + "External id": 3286221,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595465753.078, "dur": 0.907, + "args": { + "External id": 3286222,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595465774.203, "dur": 16978.268, + "args": { + "External id": 3286223,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595465789.599, "dur": 16956.348, + "args": { + "External id": 3286224,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595465815.125, "dur": 13.267, + "args": { + "External id": 3286225,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595465831.366, "dur": 16881.929, + "args": { + "External id": 3286226,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595465837.594, "dur": 16875.122, + "args": { + "External id": 3286227,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595465846.920, "dur": 5.678, + "args": { + "External id": 3286228,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595465854.372, "dur": 16855.242, + "args": { + "External id": 3286229,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595482930.231, "dur": 38.537, + "args": { + "External id": 3286230,"Sequence number": 32987143, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7533 + } + }, + { + "ph": "s", "id": 189, "pid": 1336756, "tid": 1336756, "ts": 1587595482930.231, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595482955.381, "dur": 8.100, + "args": { + "External id": 3286231,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595482958.926, "dur": 4.195, + "args": { + "External id": 3286232,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595483074.796, "dur": 73.908, + "args": { + "External id": 3286233,"Record function id": 0, "Ev Idx": 7536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595483150.936, "dur": 1088.431, + "args": { + "External id": 3286234,"Record function id": 0, "Ev Idx": 7537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595483194.002, "dur": 1031.879, + "args": { + "External id": 3286235,"Sequence number": 32987144, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7538 + } + }, + { + "ph": "s", "id": 188, "pid": 1336756, "tid": 1336756, "ts": 1587595483194.002, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595483263.825, "dur": 41.155, + "args": { + "External id": 3286236,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595483317.603, "dur": 103.263, + "args": { + "External id": 3286237,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595483430.610, "dur": 39.333, + "args": { + "External id": 3286238,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595483478.093, "dur": 31.649, + "args": { + "External id": 3286239,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595483534.749, "dur": 27.701, + "args": { + "External id": 3286240,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595483581.128, "dur": 16.387, + "args": { + "External id": 3286241,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595483615.407, "dur": 125.198, + "args": { + "External id": 3286242,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595483661.961, "dur": 10.942, + "args": { + "External id": 3286243,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595483666.785, "dur": 5.168, + "args": { + "External id": 3286244,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595483675.600, "dur": 5.463, + "args": { + "External id": 3286245,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595483682.519, "dur": 1.446, + "args": { + "External id": 3286246,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595483686.470, "dur": 4.506, + "args": { + "External id": 3286247,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595483751.088, "dur": 42.835, + "args": { + "External id": 3286248,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595483824.883, "dur": 29.720, + "args": { + "External id": 3286249,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595483863.599, "dur": 60.971, + "args": { + "External id": 3286250,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595483936.975, "dur": 36.796, + "args": { + "External id": 3286251,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595484033.598, "dur": 32.538, + "args": { + "External id": 3286252,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595484073.255, "dur": 38.273, + "args": { + "External id": 3286253,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595484134.161, "dur": 18.802, + "args": { + "External id": 3286254,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7557 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 1336756, "tid": 1336756, + "ts": 1587595484303.773, "dur": 74.940, + "args": { + "External id": 3286255,"Record function id": 0, "Ev Idx": 7558 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595484450.948, "dur": 46.866, + "args": { + "External id": 3286256,"Record function id": 0, "Ev Idx": 7559 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 1336756, "tid": 1336756, + "ts": 1587595484507.886, "dur": 18097.139, + "args": { + "External id": 3286257,"Record function id": 0, "Ev Idx": 7560 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 1336756, "tid": 1336756, + "ts": 1587595484515.357, "dur": 944.240, + "args": { + "External id": 3286258,"Record function id": 0, "Ev Idx": 7561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595484593.675, "dur": 8.187, + "args": { + "External id": 3286259,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595484615.108, "dur": 39.051, + "args": { + "External id": 3286260,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484620.165, "dur": 2.074, + "args": { + "External id": 3286261,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484626.863, "dur": 0.475, + "args": { + "External id": 3286262,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484629.043, "dur": 0.430, + "args": { + "External id": 3286263,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484631.103, "dur": 0.435, + "args": { + "External id": 3286264,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484635.256, "dur": 0.403, + "args": { + "External id": 3286265,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484637.078, "dur": 0.370, + "args": { + "External id": 3286266,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484639.054, "dur": 4.014, + "args": { + "External id": 3286267,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484644.591, "dur": 0.548, + "args": { + "External id": 3286268,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484647.039, "dur": 0.381, + "args": { + "External id": 3286269,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595484666.789, "dur": 39.980, + "args": { + "External id": 3286270,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595484738.040, "dur": 118.201, + "args": { + "External id": 3286271,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595484748.710, "dur": 4.140, + "args": { + "External id": 3286272,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595484757.481, "dur": 9.916, + "args": { + "External id": 3286273,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595484761.504, "dur": 5.504, + "args": { + "External id": 3286274,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484765.061, "dur": 0.653, + "args": { + "External id": 3286275,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595484774.518, "dur": 34.257, + "args": { + "External id": 3286276,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484777.452, "dur": 2.817, + "args": { + "External id": 3286277,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484781.806, "dur": 0.707, + "args": { + "External id": 3286278,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484784.091, "dur": 0.542, + "args": { + "External id": 3286279,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484788.484, "dur": 2.147, + "args": { + "External id": 3286280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484792.352, "dur": 0.175, + "args": { + "External id": 3286281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484794.129, "dur": 0.196, + "args": { + "External id": 3286282,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484797.598, "dur": 0.376, + "args": { + "External id": 3286283,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484799.459, "dur": 0.167, + "args": { + "External id": 3286284,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595484800.856, "dur": 2.630, + "args": { + "External id": 3286285,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595484823.111, "dur": 25.645, + "args": { + "External id": 3286286,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595484930.200, "dur": 428.688, + "args": { + "External id": 3286287,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595484965.755, "dur": 387.449, + "args": { + "External id": 3286288,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7591, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595484978.286, "dur": 369.233, + "args": { + "External id": 3286289,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595485388.043, "dur": 2.337, + "args": { + "External id": 3286290,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7593, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 1336756, "tid": 1336756, + "ts": 1587595485479.797, "dur": 16932.186, + "args": { + "External id": 3286291,"Record function id": 0, "Ev Idx": 7594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595485577.527, "dur": 6.330, + "args": { + "External id": 3286292,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595485586.761, "dur": 2.029, + "args": { + "External id": 3286293,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595485590.470, "dur": 2.494, + "args": { + "External id": 3286294,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595485594.854, "dur": 1.157, + "args": { + "External id": 3286295,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595485597.358, "dur": 0.711, + "args": { + "External id": 3286296,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595485599.306, "dur": 0.810, + "args": { + "External id": 3286297,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595485604.112, "dur": 1.224, + "args": { + "External id": 3286298,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595485606.942, "dur": 1.948, + "args": { + "External id": 3286299,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595485610.702, "dur": 0.753, + "args": { + "External id": 3286300,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595485613.043, "dur": 0.647, + "args": { + "External id": 3286301,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595485633.702, "dur": 16740.589, + "args": { + "External id": 3286302,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595485650.227, "dur": 16717.208, + "args": { + "External id": 3286303,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595485672.805, "dur": 13.815, + "args": { + "External id": 3286304,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595485689.725, "dur": 16644.700, + "args": { + "External id": 3286305,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595485692.157, "dur": 16641.701, + "args": { + "External id": 3286306,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595485698.518, "dur": 5.210, + "args": { + "External id": 3286307,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595485705.578, "dur": 16625.170, + "args": { + "External id": 3286308,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595502541.733, "dur": 38.908, + "args": { + "External id": 3286309,"Sequence number": 32987145, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7612 + } + }, + { + "ph": "s", "id": 187, "pid": 1336756, "tid": 1336756, "ts": 1587595502541.733, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595502564.657, "dur": 11.091, + "args": { + "External id": 3286310,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595502571.246, "dur": 4.249, + "args": { + "External id": 3286311,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595502642.279, "dur": 74.718, + "args": { + "External id": 3286312,"Record function id": 0, "Ev Idx": 7615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595502718.529, "dur": 1070.983, + "args": { + "External id": 3286313,"Record function id": 0, "Ev Idx": 7616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595502758.374, "dur": 1018.469, + "args": { + "External id": 3286314,"Sequence number": 32987146, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7617 + } + }, + { + "ph": "s", "id": 186, "pid": 1336756, "tid": 1336756, "ts": 1587595502758.374, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595502822.478, "dur": 41.964, + "args": { + "External id": 3286315,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595502892.961, "dur": 125.910, + "args": { + "External id": 3286316,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595503036.303, "dur": 45.254, + "args": { + "External id": 3286317,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595503088.719, "dur": 31.007, + "args": { + "External id": 3286318,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595503150.758, "dur": 30.363, + "args": { + "External id": 3286319,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595503198.232, "dur": 16.467, + "args": { + "External id": 3286320,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595503232.033, "dur": 134.874, + "args": { + "External id": 3286321,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595503283.297, "dur": 11.817, + "args": { + "External id": 3286322,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595503288.838, "dur": 5.544, + "args": { + "External id": 3286323,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595503298.064, "dur": 5.850, + "args": { + "External id": 3286324,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595503305.230, "dur": 1.564, + "args": { + "External id": 3286325,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595503311.449, "dur": 4.519, + "args": { + "External id": 3286326,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595503378.294, "dur": 48.341, + "args": { + "External id": 3286327,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595503458.278, "dur": 28.808, + "args": { + "External id": 3286328,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595503497.139, "dur": 41.075, + "args": { + "External id": 3286329,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595503544.837, "dur": 35.925, + "args": { + "External id": 3286330,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595503604.349, "dur": 27.983, + "args": { + "External id": 3286331,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595503637.708, "dur": 33.681, + "args": { + "External id": 3286332,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595503689.639, "dur": 17.781, + "args": { + "External id": 3286333,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7636 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 1336756, "tid": 1336756, + "ts": 1587595503852.446, "dur": 92.727, + "args": { + "External id": 3286334,"Record function id": 0, "Ev Idx": 7637 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595504060.655, "dur": 50.749, + "args": { + "External id": 3286335,"Record function id": 0, "Ev Idx": 7638 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 1336756, "tid": 1336756, + "ts": 1587595504121.553, "dur": 18338.687, + "args": { + "External id": 3286336,"Record function id": 0, "Ev Idx": 7639 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 1336756, "tid": 1336756, + "ts": 1587595504130.014, "dur": 914.032, + "args": { + "External id": 3286337,"Record function id": 0, "Ev Idx": 7640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595504210.538, "dur": 9.383, + "args": { + "External id": 3286338,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595504233.007, "dur": 39.379, + "args": { + "External id": 3286339,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504238.872, "dur": 2.103, + "args": { + "External id": 3286340,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504245.967, "dur": 0.389, + "args": { + "External id": 3286341,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504247.787, "dur": 0.221, + "args": { + "External id": 3286342,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504249.728, "dur": 0.773, + "args": { + "External id": 3286343,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504254.274, "dur": 0.285, + "args": { + "External id": 3286344,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504256.470, "dur": 0.378, + "args": { + "External id": 3286345,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504258.269, "dur": 3.633, + "args": { + "External id": 3286346,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504263.212, "dur": 0.222, + "args": { + "External id": 3286347,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504264.885, "dur": 0.344, + "args": { + "External id": 3286348,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595504284.673, "dur": 42.578, + "args": { + "External id": 3286349,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595504358.281, "dur": 115.965, + "args": { + "External id": 3286350,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595504368.710, "dur": 4.058, + "args": { + "External id": 3286351,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595504377.676, "dur": 10.007, + "args": { + "External id": 3286352,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595504381.621, "dur": 5.649, + "args": { + "External id": 3286353,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504385.430, "dur": 0.651, + "args": { + "External id": 3286354,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595504394.859, "dur": 33.132, + "args": { + "External id": 3286355,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504397.608, "dur": 2.544, + "args": { + "External id": 3286356,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504401.742, "dur": 0.358, + "args": { + "External id": 3286357,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504403.976, "dur": 0.372, + "args": { + "External id": 3286358,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504408.493, "dur": 1.572, + "args": { + "External id": 3286359,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504411.573, "dur": 0.588, + "args": { + "External id": 3286360,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504413.604, "dur": 0.290, + "args": { + "External id": 3286361,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504417.431, "dur": 0.156, + "args": { + "External id": 3286362,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504418.864, "dur": 0.321, + "args": { + "External id": 3286363,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595504420.454, "dur": 2.364, + "args": { + "External id": 3286364,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595504441.815, "dur": 24.645, + "args": { + "External id": 3286365,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595504525.646, "dur": 381.613, + "args": { + "External id": 3286366,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595504558.510, "dur": 343.084, + "args": { + "External id": 3286367,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7670, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595504577.370, "dur": 316.938, + "args": { + "External id": 3286368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595504935.363, "dur": 3.051, + "args": { + "External id": 3286369,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7672, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 1336756, "tid": 1336756, + "ts": 1587595505068.519, "dur": 17195.160, + "args": { + "External id": 3286370,"Record function id": 0, "Ev Idx": 7673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595505171.574, "dur": 6.416, + "args": { + "External id": 3286371,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595505181.498, "dur": 0.848, + "args": { + "External id": 3286372,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595505184.292, "dur": 2.588, + "args": { + "External id": 3286373,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595505188.625, "dur": 0.968, + "args": { + "External id": 3286374,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595505190.897, "dur": 1.053, + "args": { + "External id": 3286375,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595505193.641, "dur": 0.942, + "args": { + "External id": 3286376,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595505198.636, "dur": 0.756, + "args": { + "External id": 3286377,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595505201.003, "dur": 2.000, + "args": { + "External id": 3286378,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595505204.459, "dur": 0.639, + "args": { + "External id": 3286379,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595505206.901, "dur": 0.637, + "args": { + "External id": 3286380,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595505228.115, "dur": 16996.852, + "args": { + "External id": 3286381,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595505244.874, "dur": 16973.270, + "args": { + "External id": 3286382,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595505265.930, "dur": 13.194, + "args": { + "External id": 3286383,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595505282.205, "dur": 16902.520, + "args": { + "External id": 3286384,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595505284.890, "dur": 16899.246, + "args": { + "External id": 3286385,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595505291.090, "dur": 4.662, + "args": { + "External id": 3286386,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595505297.338, "dur": 16883.440, + "args": { + "External id": 3286387,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595522400.710, "dur": 35.661, + "args": { + "External id": 3286388,"Sequence number": 32987147, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7691 + } + }, + { + "ph": "s", "id": 185, "pid": 1336756, "tid": 1336756, "ts": 1587595522400.710, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595522423.625, "dur": 7.831, + "args": { + "External id": 3286389,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595522426.833, "dur": 4.424, + "args": { + "External id": 3286390,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595522497.845, "dur": 75.239, + "args": { + "External id": 3286391,"Record function id": 0, "Ev Idx": 7694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595522574.801, "dur": 1087.714, + "args": { + "External id": 3286392,"Record function id": 0, "Ev Idx": 7695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595522616.748, "dur": 1032.952, + "args": { + "External id": 3286393,"Sequence number": 32987148, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7696 + } + }, + { + "ph": "s", "id": 184, "pid": 1336756, "tid": 1336756, "ts": 1587595522616.748, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595522682.886, "dur": 39.439, + "args": { + "External id": 3286394,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595522734.286, "dur": 102.827, + "args": { + "External id": 3286395,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595522846.765, "dur": 56.603, + "args": { + "External id": 3286396,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595522916.093, "dur": 34.897, + "args": { + "External id": 3286397,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595522978.881, "dur": 63.182, + "args": { + "External id": 3286398,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595523064.115, "dur": 16.891, + "args": { + "External id": 3286399,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595523099.537, "dur": 134.512, + "args": { + "External id": 3286400,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595523150.518, "dur": 13.596, + "args": { + "External id": 3286401,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595523156.801, "dur": 6.278, + "args": { + "External id": 3286402,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595523167.112, "dur": 5.145, + "args": { + "External id": 3286403,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595523173.690, "dur": 1.448, + "args": { + "External id": 3286404,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595523177.818, "dur": 4.674, + "args": { + "External id": 3286405,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595523244.415, "dur": 49.940, + "args": { + "External id": 3286406,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595523325.065, "dur": 30.962, + "args": { + "External id": 3286407,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595523364.482, "dur": 41.378, + "args": { + "External id": 3286408,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595523415.002, "dur": 34.107, + "args": { + "External id": 3286409,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595523472.469, "dur": 28.299, + "args": { + "External id": 3286410,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595523507.208, "dur": 33.698, + "args": { + "External id": 3286411,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595523560.482, "dur": 18.297, + "args": { + "External id": 3286412,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7715 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 1336756, "tid": 1336756, + "ts": 1587595523725.286, "dur": 71.341, + "args": { + "External id": 3286413,"Record function id": 0, "Ev Idx": 7716 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595523866.912, "dur": 63.945, + "args": { + "External id": 3286414,"Record function id": 0, "Ev Idx": 7717 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 1336756, "tid": 1336756, + "ts": 1587595523941.716, "dur": 18302.099, + "args": { + "External id": 3286415,"Record function id": 0, "Ev Idx": 7718 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 1336756, "tid": 1336756, + "ts": 1587595523952.163, "dur": 809.989, + "args": { + "External id": 3286416,"Record function id": 0, "Ev Idx": 7719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595524068.057, "dur": 9.522, + "args": { + "External id": 3286417,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595524091.550, "dur": 39.998, + "args": { + "External id": 3286418,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524097.147, "dur": 2.221, + "args": { + "External id": 3286419,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524104.820, "dur": 0.366, + "args": { + "External id": 3286420,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524107.210, "dur": 0.391, + "args": { + "External id": 3286421,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524108.963, "dur": 0.620, + "args": { + "External id": 3286422,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524113.349, "dur": 0.376, + "args": { + "External id": 3286423,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524115.094, "dur": 0.569, + "args": { + "External id": 3286424,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524117.870, "dur": 3.952, + "args": { + "External id": 3286425,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524123.292, "dur": 0.376, + "args": { + "External id": 3286426,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524125.229, "dur": 0.173, + "args": { + "External id": 3286427,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595524143.788, "dur": 42.408, + "args": { + "External id": 3286428,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595524218.756, "dur": 113.558, + "args": { + "External id": 3286429,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595524229.010, "dur": 4.584, + "args": { + "External id": 3286430,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595524238.859, "dur": 9.865, + "args": { + "External id": 3286431,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595524243.077, "dur": 5.262, + "args": { + "External id": 3286432,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524246.708, "dur": 0.409, + "args": { + "External id": 3286433,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595524255.681, "dur": 33.911, + "args": { + "External id": 3286434,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524258.075, "dur": 2.847, + "args": { + "External id": 3286435,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524262.611, "dur": 0.608, + "args": { + "External id": 3286436,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524264.592, "dur": 0.358, + "args": { + "External id": 3286437,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524268.426, "dur": 1.929, + "args": { + "External id": 3286438,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524272.017, "dur": 0.403, + "args": { + "External id": 3286439,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524274.070, "dur": 0.773, + "args": { + "External id": 3286440,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524278.218, "dur": 0.321, + "args": { + "External id": 3286441,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524280.368, "dur": 0.334, + "args": { + "External id": 3286442,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595524282.543, "dur": 2.142, + "args": { + "External id": 3286443,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595524300.513, "dur": 24.226, + "args": { + "External id": 3286444,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595524384.743, "dur": 291.880, + "args": { + "External id": 3286445,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595524425.366, "dur": 246.782, + "args": { + "External id": 3286446,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7749, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595524435.352, "dur": 231.419, + "args": { + "External id": 3286447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595524698.261, "dur": 2.267, + "args": { + "External id": 3286448,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7751, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 1336756, "tid": 1336756, + "ts": 1587595524782.049, "dur": 17263.579, + "args": { + "External id": 3286449,"Record function id": 0, "Ev Idx": 7752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595524895.776, "dur": 6.113, + "args": { + "External id": 3286450,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595524905.908, "dur": 1.234, + "args": { + "External id": 3286451,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595524908.898, "dur": 2.026, + "args": { + "External id": 3286452,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595524912.263, "dur": 1.010, + "args": { + "External id": 3286453,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595524914.600, "dur": 1.092, + "args": { + "External id": 3286454,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595524919.038, "dur": 0.953, + "args": { + "External id": 3286455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595524921.788, "dur": 1.046, + "args": { + "External id": 3286456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595524924.401, "dur": 1.485, + "args": { + "External id": 3286457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595524927.458, "dur": 0.935, + "args": { + "External id": 3286458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595524932.209, "dur": 0.860, + "args": { + "External id": 3286459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595524952.010, "dur": 17053.206, + "args": { + "External id": 3286460,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595524973.117, "dur": 17002.594, + "args": { + "External id": 3286461,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595525024.925, "dur": 14.984, + "args": { + "External id": 3286462,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595525042.997, "dur": 16899.059, + "args": { + "External id": 3286463,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595525045.436, "dur": 16896.136, + "args": { + "External id": 3286464,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595525051.986, "dur": 6.849, + "args": { + "External id": 3286465,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595525060.367, "dur": 16878.370, + "args": { + "External id": 3286466,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595542179.837, "dur": 37.405, + "args": { + "External id": 3286467,"Sequence number": 32987149, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7770 + } + }, + { + "ph": "s", "id": 183, "pid": 1336756, "tid": 1336756, "ts": 1587595542179.837, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595542205.105, "dur": 7.832, + "args": { + "External id": 3286468,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595542208.556, "dur": 4.187, + "args": { + "External id": 3286469,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595542282.409, "dur": 75.023, + "args": { + "External id": 3286470,"Record function id": 0, "Ev Idx": 7773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595542359.102, "dur": 1092.270, + "args": { + "External id": 3286471,"Record function id": 0, "Ev Idx": 7774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595542397.640, "dur": 1040.921, + "args": { + "External id": 3286472,"Sequence number": 32987150, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7775 + } + }, + { + "ph": "s", "id": 182, "pid": 1336756, "tid": 1336756, "ts": 1587595542397.640, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595542464.860, "dur": 40.552, + "args": { + "External id": 3286473,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595542517.372, "dur": 103.579, + "args": { + "External id": 3286474,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595542630.555, "dur": 39.764, + "args": { + "External id": 3286475,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595542678.627, "dur": 31.127, + "args": { + "External id": 3286476,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595542732.535, "dur": 28.025, + "args": { + "External id": 3286477,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595542780.163, "dur": 15.906, + "args": { + "External id": 3286478,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595542813.822, "dur": 151.718, + "args": { + "External id": 3286479,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595542862.689, "dur": 29.164, + "args": { + "External id": 3286480,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595542867.695, "dur": 23.128, + "args": { + "External id": 3286481,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595542895.805, "dur": 6.068, + "args": { + "External id": 3286482,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595542903.594, "dur": 1.396, + "args": { + "External id": 3286483,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595542907.681, "dur": 4.527, + "args": { + "External id": 3286484,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595542977.009, "dur": 90.416, + "args": { + "External id": 3286485,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595543101.216, "dur": 32.512, + "args": { + "External id": 3286486,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595543143.534, "dur": 42.594, + "args": { + "External id": 3286487,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595543195.012, "dur": 35.533, + "args": { + "External id": 3286488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595543255.284, "dur": 29.094, + "args": { + "External id": 3286489,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595543289.954, "dur": 33.609, + "args": { + "External id": 3286490,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595543346.504, "dur": 19.966, + "args": { + "External id": 3286491,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7794 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 1336756, "tid": 1336756, + "ts": 1587595543515.702, "dur": 72.748, + "args": { + "External id": 3286492,"Record function id": 0, "Ev Idx": 7795 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595543660.284, "dur": 47.346, + "args": { + "External id": 3286493,"Record function id": 0, "Ev Idx": 7796 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 1336756, "tid": 1336756, + "ts": 1587595543717.673, "dur": 18224.173, + "args": { + "External id": 3286494,"Record function id": 0, "Ev Idx": 7797 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 1336756, "tid": 1336756, + "ts": 1587595543727.289, "dur": 886.064, + "args": { + "External id": 3286495,"Record function id": 0, "Ev Idx": 7798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595543802.729, "dur": 7.597, + "args": { + "External id": 3286496,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595543823.712, "dur": 36.827, + "args": { + "External id": 3286497,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595543828.632, "dur": 2.342, + "args": { + "External id": 3286498,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595543835.716, "dur": 0.307, + "args": { + "External id": 3286499,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595543837.492, "dur": 0.170, + "args": { + "External id": 3286500,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595543839.355, "dur": 0.279, + "args": { + "External id": 3286501,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595543842.930, "dur": 0.567, + "args": { + "External id": 3286502,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595543845.332, "dur": 0.684, + "args": { + "External id": 3286503,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595543847.652, "dur": 4.007, + "args": { + "External id": 3286504,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595543853.408, "dur": 0.175, + "args": { + "External id": 3286505,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595543854.678, "dur": 0.222, + "args": { + "External id": 3286506,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595543888.673, "dur": 42.962, + "args": { + "External id": 3286507,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595543965.981, "dur": 160.026, + "args": { + "External id": 3286508,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595543977.909, "dur": 41.521, + "args": { + "External id": 3286509,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595544026.552, "dur": 11.555, + "args": { + "External id": 3286510,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595544031.064, "dur": 6.628, + "args": { + "External id": 3286511,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595544034.940, "dur": 1.009, + "args": { + "External id": 3286512,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595544046.003, "dur": 30.418, + "args": { + "External id": 3286513,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595544048.238, "dur": 2.647, + "args": { + "External id": 3286514,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595544052.125, "dur": 0.531, + "args": { + "External id": 3286515,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595544054.099, "dur": 0.550, + "args": { + "External id": 3286516,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595544058.193, "dur": 1.757, + "args": { + "External id": 3286517,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595544060.974, "dur": 0.181, + "args": { + "External id": 3286518,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595544062.197, "dur": 0.559, + "args": { + "External id": 3286519,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595544066.528, "dur": 0.372, + "args": { + "External id": 3286520,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595544068.133, "dur": 0.144, + "args": { + "External id": 3286521,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595544069.116, "dur": 2.725, + "args": { + "External id": 3286522,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595544090.341, "dur": 27.357, + "args": { + "External id": 3286523,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595544181.745, "dur": 345.476, + "args": { + "External id": 3286524,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595544214.609, "dur": 308.197, + "args": { + "External id": 3286525,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7828, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595544224.621, "dur": 290.649, + "args": { + "External id": 3286526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595544549.294, "dur": 2.437, + "args": { + "External id": 3286527,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7830, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 1336756, "tid": 1336756, + "ts": 1587595544633.195, "dur": 17103.201, + "args": { + "External id": 3286528,"Record function id": 0, "Ev Idx": 7831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595544756.148, "dur": 5.203, + "args": { + "External id": 3286529,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595544764.739, "dur": 1.037, + "args": { + "External id": 3286530,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595544767.303, "dur": 1.713, + "args": { + "External id": 3286531,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595544772.881, "dur": 0.861, + "args": { + "External id": 3286532,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595544775.104, "dur": 0.731, + "args": { + "External id": 3286533,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595544777.031, "dur": 0.520, + "args": { + "External id": 3286534,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595544779.148, "dur": 0.516, + "args": { + "External id": 3286535,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595544782.920, "dur": 1.479, + "args": { + "External id": 3286536,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595544785.709, "dur": 0.629, + "args": { + "External id": 3286537,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595544787.582, "dur": 0.604, + "args": { + "External id": 3286538,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595544807.093, "dur": 16890.539, + "args": { + "External id": 3286539,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595544822.067, "dur": 16868.696, + "args": { + "External id": 3286540,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595544834.688, "dur": 13.263, + "args": { + "External id": 3286541,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595544853.475, "dur": 16804.952, + "args": { + "External id": 3286542,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595544855.810, "dur": 16801.941, + "args": { + "External id": 3286543,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595544861.481, "dur": 4.975, + "args": { + "External id": 3286544,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595544867.947, "dur": 16786.689, + "args": { + "External id": 3286545,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595561864.241, "dur": 51.077, + "args": { + "External id": 3286546,"Sequence number": 32987151, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7849 + } + }, + { + "ph": "s", "id": 181, "pid": 1336756, "tid": 1336756, "ts": 1587595561864.241, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595561901.804, "dur": 8.120, + "args": { + "External id": 3286547,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595561904.881, "dur": 4.662, + "args": { + "External id": 3286548,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595561980.793, "dur": 100.252, + "args": { + "External id": 3286549,"Record function id": 0, "Ev Idx": 7852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595562083.861, "dur": 1102.854, + "args": { + "External id": 3286550,"Record function id": 0, "Ev Idx": 7853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595562128.484, "dur": 1044.770, + "args": { + "External id": 3286551,"Sequence number": 32987152, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7854 + } + }, + { + "ph": "s", "id": 180, "pid": 1336756, "tid": 1336756, "ts": 1587595562128.484, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595562201.674, "dur": 43.669, + "args": { + "External id": 3286552,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595562258.383, "dur": 103.776, + "args": { + "External id": 3286553,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595562371.386, "dur": 39.429, + "args": { + "External id": 3286554,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595562419.841, "dur": 31.007, + "args": { + "External id": 3286555,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595562477.200, "dur": 27.834, + "args": { + "External id": 3286556,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595562523.530, "dur": 17.901, + "args": { + "External id": 3286557,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595562559.707, "dur": 129.082, + "args": { + "External id": 3286558,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595562609.532, "dur": 11.276, + "args": { + "External id": 3286559,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595562614.479, "dur": 5.577, + "args": { + "External id": 3286560,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595562623.219, "dur": 5.673, + "args": { + "External id": 3286561,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595562630.094, "dur": 1.289, + "args": { + "External id": 3286562,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595562633.649, "dur": 4.474, + "args": { + "External id": 3286563,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595562698.854, "dur": 46.569, + "args": { + "External id": 3286564,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595562776.610, "dur": 29.540, + "args": { + "External id": 3286565,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595562815.034, "dur": 43.047, + "args": { + "External id": 3286566,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595562867.173, "dur": 55.300, + "args": { + "External id": 3286567,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595562946.695, "dur": 28.071, + "args": { + "External id": 3286568,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595562980.998, "dur": 73.416, + "args": { + "External id": 3286569,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595563082.164, "dur": 20.085, + "args": { + "External id": 3286570,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7873 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 1336756, "tid": 1336756, + "ts": 1587595563251.525, "dur": 75.400, + "args": { + "External id": 3286571,"Record function id": 0, "Ev Idx": 7874 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595563399.165, "dur": 48.319, + "args": { + "External id": 3286572,"Record function id": 0, "Ev Idx": 7875 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 1336756, "tid": 1336756, + "ts": 1587595563457.079, "dur": 18559.780, + "args": { + "External id": 3286573,"Record function id": 0, "Ev Idx": 7876 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 1336756, "tid": 1336756, + "ts": 1587595563465.626, "dur": 879.173, + "args": { + "External id": 3286574,"Record function id": 0, "Ev Idx": 7877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595563543.431, "dur": 8.172, + "args": { + "External id": 3286575,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595563564.741, "dur": 35.332, + "args": { + "External id": 3286576,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563569.595, "dur": 2.194, + "args": { + "External id": 3286577,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563576.263, "dur": 0.513, + "args": { + "External id": 3286578,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563577.809, "dur": 0.561, + "args": { + "External id": 3286579,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563579.500, "dur": 0.364, + "args": { + "External id": 3286580,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563583.116, "dur": 0.383, + "args": { + "External id": 3286581,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563584.750, "dur": 0.237, + "args": { + "External id": 3286582,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563586.354, "dur": 3.732, + "args": { + "External id": 3286583,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563591.741, "dur": 0.580, + "args": { + "External id": 3286584,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563593.644, "dur": 0.285, + "args": { + "External id": 3286585,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595563611.797, "dur": 37.920, + "args": { + "External id": 3286586,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595563680.223, "dur": 108.735, + "args": { + "External id": 3286587,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595563689.995, "dur": 4.317, + "args": { + "External id": 3286588,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595563699.335, "dur": 10.340, + "args": { + "External id": 3286589,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595563703.976, "dur": 5.296, + "args": { + "External id": 3286590,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563707.445, "dur": 0.520, + "args": { + "External id": 3286591,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595563716.689, "dur": 29.468, + "args": { + "External id": 3286592,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563718.998, "dur": 3.105, + "args": { + "External id": 3286593,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563723.543, "dur": 0.239, + "args": { + "External id": 3286594,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563724.873, "dur": 0.363, + "args": { + "External id": 3286595,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563728.541, "dur": 1.184, + "args": { + "External id": 3286596,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563730.621, "dur": 0.160, + "args": { + "External id": 3286597,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563731.985, "dur": 0.293, + "args": { + "External id": 3286598,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563735.620, "dur": 0.183, + "args": { + "External id": 3286599,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563737.401, "dur": 0.280, + "args": { + "External id": 3286600,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595563739.125, "dur": 2.702, + "args": { + "External id": 3286601,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595563756.955, "dur": 24.555, + "args": { + "External id": 3286602,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595563840.553, "dur": 407.978, + "args": { + "External id": 3286603,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595563890.235, "dur": 352.718, + "args": { + "External id": 3286604,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7907, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595563902.355, "dur": 334.588, + "args": { + "External id": 3286605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595564275.509, "dur": 2.165, + "args": { + "External id": 3286606,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7909, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 1336756, "tid": 1336756, + "ts": 1587595564365.199, "dur": 17418.590, + "args": { + "External id": 3286607,"Record function id": 0, "Ev Idx": 7910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595564463.379, "dur": 6.339, + "args": { + "External id": 3286608,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595564473.073, "dur": 1.663, + "args": { + "External id": 3286609,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595564476.460, "dur": 2.540, + "args": { + "External id": 3286610,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595564480.642, "dur": 0.807, + "args": { + "External id": 3286611,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595564491.680, "dur": 0.800, + "args": { + "External id": 3286612,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595564495.475, "dur": 0.665, + "args": { + "External id": 3286613,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595564500.317, "dur": 0.757, + "args": { + "External id": 3286614,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595564502.575, "dur": 1.907, + "args": { + "External id": 3286615,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595564505.754, "dur": 0.791, + "args": { + "External id": 3286616,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595564508.060, "dur": 0.615, + "args": { + "External id": 3286617,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595564530.726, "dur": 17215.893, + "args": { + "External id": 3286618,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595564546.368, "dur": 17193.506, + "args": { + "External id": 3286619,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595564564.150, "dur": 13.194, + "args": { + "External id": 3286620,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595564580.463, "dur": 17126.642, + "args": { + "External id": 3286621,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595564582.745, "dur": 17123.855, + "args": { + "External id": 3286622,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595564588.543, "dur": 5.910, + "args": { + "External id": 3286623,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595564596.002, "dur": 17107.964, + "args": { + "External id": 3286624,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595581928.149, "dur": 36.420, + "args": { + "External id": 3286625,"Sequence number": 32987153, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7928 + } + }, + { + "ph": "s", "id": 179, "pid": 1336756, "tid": 1336756, "ts": 1587595581928.149, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595581951.366, "dur": 8.000, + "args": { + "External id": 3286626,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595581954.681, "dur": 4.327, + "args": { + "External id": 3286627,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595582057.917, "dur": 76.245, + "args": { + "External id": 3286628,"Record function id": 0, "Ev Idx": 7931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595582136.105, "dur": 1082.680, + "args": { + "External id": 3286629,"Record function id": 0, "Ev Idx": 7932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595582178.552, "dur": 1027.220, + "args": { + "External id": 3286630,"Sequence number": 32987154, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7933 + } + }, + { + "ph": "s", "id": 178, "pid": 1336756, "tid": 1336756, "ts": 1587595582178.552, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595582246.748, "dur": 42.867, + "args": { + "External id": 3286631,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595582302.660, "dur": 104.630, + "args": { + "External id": 3286632,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595582417.016, "dur": 38.932, + "args": { + "External id": 3286633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595582465.717, "dur": 30.342, + "args": { + "External id": 3286634,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595582521.713, "dur": 27.914, + "args": { + "External id": 3286635,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595582568.016, "dur": 15.132, + "args": { + "External id": 3286636,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595582600.464, "dur": 125.494, + "args": { + "External id": 3286637,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595582648.390, "dur": 10.878, + "args": { + "External id": 3286638,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595582653.110, "dur": 5.331, + "args": { + "External id": 3286639,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595582661.741, "dur": 5.718, + "args": { + "External id": 3286640,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595582668.843, "dur": 1.311, + "args": { + "External id": 3286641,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595582672.412, "dur": 3.867, + "args": { + "External id": 3286642,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595582736.836, "dur": 44.812, + "args": { + "External id": 3286643,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595582811.953, "dur": 27.243, + "args": { + "External id": 3286644,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595582848.137, "dur": 58.122, + "args": { + "External id": 3286645,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595582918.295, "dur": 38.197, + "args": { + "External id": 3286646,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595583014.916, "dur": 31.779, + "args": { + "External id": 3286647,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595583054.472, "dur": 40.894, + "args": { + "External id": 3286648,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595583117.509, "dur": 18.782, + "args": { + "External id": 3286649,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7952 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 1336756, "tid": 1336756, + "ts": 1587595583281.642, "dur": 72.742, + "args": { + "External id": 3286650,"Record function id": 0, "Ev Idx": 7953 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595583422.600, "dur": 44.698, + "args": { + "External id": 3286651,"Record function id": 0, "Ev Idx": 7954 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 1336756, "tid": 1336756, + "ts": 1587595583476.871, "dur": 18590.412, + "args": { + "External id": 3286652,"Record function id": 0, "Ev Idx": 7955 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 1336756, "tid": 1336756, + "ts": 1587595583485.081, "dur": 932.346, + "args": { + "External id": 3286653,"Record function id": 0, "Ev Idx": 7956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595583563.241, "dur": 7.856, + "args": { + "External id": 3286654,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595583584.007, "dur": 40.713, + "args": { + "External id": 3286655,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583588.996, "dur": 2.314, + "args": { + "External id": 3286656,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583595.495, "dur": 0.270, + "args": { + "External id": 3286657,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583596.915, "dur": 0.616, + "args": { + "External id": 3286658,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583603.993, "dur": 0.188, + "args": { + "External id": 3286659,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583609.591, "dur": 0.321, + "args": { + "External id": 3286660,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583610.974, "dur": 0.180, + "args": { + "External id": 3286661,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583611.874, "dur": 3.296, + "args": { + "External id": 3286662,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583616.461, "dur": 0.514, + "args": { + "External id": 3286663,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583618.529, "dur": 0.307, + "args": { + "External id": 3286664,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595583636.378, "dur": 37.572, + "args": { + "External id": 3286665,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595583704.780, "dur": 109.938, + "args": { + "External id": 3286666,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595583715.701, "dur": 3.716, + "args": { + "External id": 3286667,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595583724.090, "dur": 9.657, + "args": { + "External id": 3286668,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595583728.341, "dur": 4.983, + "args": { + "External id": 3286669,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583731.895, "dur": 0.342, + "args": { + "External id": 3286670,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595583740.608, "dur": 28.683, + "args": { + "External id": 3286671,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583742.445, "dur": 2.714, + "args": { + "External id": 3286672,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583746.863, "dur": 0.423, + "args": { + "External id": 3286673,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583748.302, "dur": 0.211, + "args": { + "External id": 3286674,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583752.324, "dur": 1.119, + "args": { + "External id": 3286675,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583755.030, "dur": 0.181, + "args": { + "External id": 3286676,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583756.423, "dur": 0.307, + "args": { + "External id": 3286677,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583759.716, "dur": 0.309, + "args": { + "External id": 3286678,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583761.244, "dur": 0.329, + "args": { + "External id": 3286679,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595583762.981, "dur": 2.416, + "args": { + "External id": 3286680,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595583782.401, "dur": 24.859, + "args": { + "External id": 3286681,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595583864.520, "dur": 450.907, + "args": { + "External id": 3286682,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595583922.101, "dur": 387.467, + "args": { + "External id": 3286683,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7986, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595583932.452, "dur": 371.213, + "args": { + "External id": 3286684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595584340.389, "dur": 2.780, + "args": { + "External id": 3286685,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7988, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 1336756, "tid": 1336756, + "ts": 1587595584439.250, "dur": 17386.037, + "args": { + "External id": 3286686,"Record function id": 0, "Ev Idx": 7989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595584542.872, "dur": 6.662, + "args": { + "External id": 3286687,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595584553.155, "dur": 1.145, + "args": { + "External id": 3286688,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595584556.225, "dur": 2.170, + "args": { + "External id": 3286689,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595584560.078, "dur": 0.743, + "args": { + "External id": 3286690,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595584562.365, "dur": 0.842, + "args": { + "External id": 3286691,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595584564.428, "dur": 1.074, + "args": { + "External id": 3286692,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595584568.920, "dur": 0.806, + "args": { + "External id": 3286693,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595584571.089, "dur": 1.804, + "args": { + "External id": 3286694,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595584574.444, "dur": 0.818, + "args": { + "External id": 3286695,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595584576.922, "dur": 0.734, + "args": { + "External id": 3286696,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595584599.309, "dur": 17184.942, + "args": { + "External id": 3286697,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 8000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595584615.865, "dur": 17161.526, + "args": { + "External id": 3286698,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 8001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595584638.890, "dur": 13.342, + "args": { + "External id": 3286699,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595584655.085, "dur": 17089.671, + "args": { + "External id": 3286700,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 8003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595584657.604, "dur": 17086.604, + "args": { + "External id": 3286701,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 8004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595584663.440, "dur": 5.661, + "args": { + "External id": 3286702,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595584670.864, "dur": 17070.030, + "args": { + "External id": 3286703,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 8006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595601977.481, "dur": 63.001, + "args": { + "External id": 3286704,"Sequence number": 32987155, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 8007 + } + }, + { + "ph": "s", "id": 177, "pid": 1336756, "tid": 1336756, "ts": 1587595601977.481, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595602026.148, "dur": 9.046, + "args": { + "External id": 3286705,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 8008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595602029.570, "dur": 5.198, + "args": { + "External id": 3286706,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595602106.049, "dur": 76.812, + "args": { + "External id": 3286707,"Record function id": 0, "Ev Idx": 8010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595602184.654, "dur": 1101.200, + "args": { + "External id": 3286708,"Record function id": 0, "Ev Idx": 8011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595602227.281, "dur": 1045.795, + "args": { + "External id": 3286709,"Sequence number": 32987156, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 8012 + } + }, + { + "ph": "s", "id": 176, "pid": 1336756, "tid": 1336756, "ts": 1587595602227.281, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595602297.421, "dur": 43.651, + "args": { + "External id": 3286710,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 8013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595602354.071, "dur": 101.605, + "args": { + "External id": 3286711,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595602465.140, "dur": 41.167, + "args": { + "External id": 3286712,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595602515.810, "dur": 31.474, + "args": { + "External id": 3286713,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595602573.538, "dur": 27.431, + "args": { + "External id": 3286714,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595602619.639, "dur": 16.532, + "args": { + "External id": 3286715,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595602652.098, "dur": 126.491, + "args": { + "External id": 3286716,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 8019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595602700.998, "dur": 10.759, + "args": { + "External id": 3286717,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595602705.671, "dur": 5.240, + "args": { + "External id": 3286718,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595602714.617, "dur": 6.018, + "args": { + "External id": 3286719,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595602721.944, "dur": 1.821, + "args": { + "External id": 3286720,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595602726.136, "dur": 3.177, + "args": { + "External id": 3286721,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595602789.401, "dur": 50.845, + "args": { + "External id": 3286722,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595602887.583, "dur": 32.689, + "args": { + "External id": 3286723,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 8026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595602931.095, "dur": 46.637, + "args": { + "External id": 3286724,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 8027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595603022.498, "dur": 40.634, + "args": { + "External id": 3286725,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 8028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595603090.422, "dur": 28.635, + "args": { + "External id": 3286726,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 8029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595603126.121, "dur": 33.946, + "args": { + "External id": 3286727,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 8030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595603181.567, "dur": 18.341, + "args": { + "External id": 3286728,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 8031 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 1336756, "tid": 1336756, + "ts": 1587595603350.451, "dur": 74.470, + "args": { + "External id": 3286729,"Record function id": 0, "Ev Idx": 8032 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595603496.795, "dur": 45.943, + "args": { + "External id": 3286730,"Record function id": 0, "Ev Idx": 8033 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.26)", "pid": 1336756, "tid": 1336756, + "ts": 1587595603551.802, "dur": 18295.484, + "args": { + "External id": 3286731,"Record function id": 0, "Ev Idx": 8034 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.26)", "pid": 1336756, "tid": 1336756, + "ts": 1587595603559.803, "dur": 884.006, + "args": { + "External id": 3286732,"Record function id": 0, "Ev Idx": 8035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595603639.488, "dur": 7.892, + "args": { + "External id": 3286733,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595603659.735, "dur": 34.812, + "args": { + "External id": 3286734,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 8037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603664.994, "dur": 2.377, + "args": { + "External id": 3286735,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603671.736, "dur": 0.674, + "args": { + "External id": 3286736,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603673.820, "dur": 0.797, + "args": { + "External id": 3286737,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603675.692, "dur": 0.834, + "args": { + "External id": 3286738,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603679.743, "dur": 0.629, + "args": { + "External id": 3286739,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603681.301, "dur": 0.542, + "args": { + "External id": 3286740,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603682.750, "dur": 2.640, + "args": { + "External id": 3286741,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603686.527, "dur": 0.652, + "args": { + "External id": 3286742,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603688.238, "dur": 0.414, + "args": { + "External id": 3286743,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595603706.190, "dur": 38.718, + "args": { + "External id": 3286744,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 8047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595603775.685, "dur": 135.131, + "args": { + "External id": 3286745,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 8048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595603786.316, "dur": 4.795, + "args": { + "External id": 3286746,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595603796.342, "dur": 10.027, + "args": { + "External id": 3286747,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 8050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595603800.431, "dur": 5.536, + "args": { + "External id": 3286748,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 8051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603804.050, "dur": 0.771, + "args": { + "External id": 3286749,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 8052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595603812.812, "dur": 33.206, + "args": { + "External id": 3286750,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 8053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603814.637, "dur": 2.655, + "args": { + "External id": 3286751,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603818.987, "dur": 0.968, + "args": { + "External id": 3286752,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603821.036, "dur": 0.710, + "args": { + "External id": 3286753,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603824.966, "dur": 0.402, + "args": { + "External id": 3286754,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603826.461, "dur": 0.433, + "args": { + "External id": 3286755,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603832.119, "dur": 0.711, + "args": { + "External id": 3286756,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603835.963, "dur": 0.431, + "args": { + "External id": 3286757,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603837.705, "dur": 0.439, + "args": { + "External id": 3286758,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595603839.218, "dur": 2.659, + "args": { + "External id": 3286759,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595603856.736, "dur": 44.211, + "args": { + "External id": 3286760,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 8063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595603964.689, "dur": 387.089, + "args": { + "External id": 3286761,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 8064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595604033.481, "dur": 313.488, + "args": { + "External id": 3286762,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8065, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595604045.289, "dur": 296.063, + "args": { + "External id": 3286763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 8066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595604375.269, "dur": 2.169, + "args": { + "External id": 3286764,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8067, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.26)", "pid": 1336756, "tid": 1336756, + "ts": 1587595604464.271, "dur": 17194.207, + "args": { + "External id": 3286765,"Record function id": 0, "Ev Idx": 8068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595604562.077, "dur": 6.085, + "args": { + "External id": 3286766,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 8069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595604571.343, "dur": 1.114, + "args": { + "External id": 3286767,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 8070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595604574.251, "dur": 1.122, + "args": { + "External id": 3286768,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595604577.029, "dur": 0.999, + "args": { + "External id": 3286769,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595604579.515, "dur": 1.176, + "args": { + "External id": 3286770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595604581.901, "dur": 1.477, + "args": { + "External id": 3286771,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595604587.191, "dur": 1.287, + "args": { + "External id": 3286772,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 8075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595604590.111, "dur": 2.262, + "args": { + "External id": 3286773,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 8076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595604593.753, "dur": 1.079, + "args": { + "External id": 3286774,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 8077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595604596.646, "dur": 1.179, + "args": { + "External id": 3286775,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 8078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595604617.420, "dur": 17005.063, + "args": { + "External id": 3286776,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 8079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595604639.616, "dur": 16975.894, + "args": { + "External id": 3286777,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 8080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595604656.452, "dur": 13.136, + "args": { + "External id": 3286778,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595604672.613, "dur": 16909.037, + "args": { + "External id": 3286779,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 8082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595604674.799, "dur": 16906.097, + "args": { + "External id": 3286780,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 8083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595604680.166, "dur": 5.478, + "args": { + "External id": 3286781,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595604687.254, "dur": 16890.267, + "args": { + "External id": 3286782,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 8085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595621787.922, "dur": 35.680, + "args": { + "External id": 3286783,"Sequence number": 32987157, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 8086 + } + }, + { + "ph": "s", "id": 175, "pid": 1336756, "tid": 1336756, "ts": 1587595621787.922, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595621810.585, "dur": 8.355, + "args": { + "External id": 3286784,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 8087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595621814.144, "dur": 4.453, + "args": { + "External id": 3286785,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595621897.824, "dur": 76.727, + "args": { + "External id": 3286786,"Record function id": 0, "Ev Idx": 8089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595621976.385, "dur": 1124.212, + "args": { + "External id": 3286787,"Record function id": 0, "Ev Idx": 8090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595622062.679, "dur": 1024.505, + "args": { + "External id": 3286788,"Sequence number": 32987158, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 8091 + } + }, + { + "ph": "s", "id": 174, "pid": 1336756, "tid": 1336756, "ts": 1587595622062.679, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595622132.062, "dur": 45.418, + "args": { + "External id": 3286789,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 8092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595622190.464, "dur": 102.175, + "args": { + "External id": 3286790,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595622301.493, "dur": 40.348, + "args": { + "External id": 3286791,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595622350.729, "dur": 31.267, + "args": { + "External id": 3286792,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595622407.940, "dur": 29.188, + "args": { + "External id": 3286793,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595622454.923, "dur": 15.526, + "args": { + "External id": 3286794,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595622487.725, "dur": 124.001, + "args": { + "External id": 3286795,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 8098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595622536.349, "dur": 11.221, + "args": { + "External id": 3286796,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595622541.089, "dur": 5.655, + "args": { + "External id": 3286797,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595622550.191, "dur": 5.717, + "args": { + "External id": 3286798,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595622557.075, "dur": 1.632, + "args": { + "External id": 3286799,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595622561.163, "dur": 2.728, + "args": { + "External id": 3286800,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595622623.629, "dur": 45.251, + "args": { + "External id": 3286801,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595622700.849, "dur": 29.751, + "args": { + "External id": 3286802,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 8105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595622739.429, "dur": 39.641, + "args": { + "External id": 3286803,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 8106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595622787.901, "dur": 35.055, + "args": { + "External id": 3286804,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 8107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595622846.380, "dur": 44.890, + "args": { + "External id": 3286805,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 8108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595622899.364, "dur": 39.301, + "args": { + "External id": 3286806,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 8109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595622960.772, "dur": 19.076, + "args": { + "External id": 3286807,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 8110 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.26)", "pid": 1336756, "tid": 1336756, + "ts": 1587595623164.648, "dur": 75.403, + "args": { + "External id": 3286808,"Record function id": 0, "Ev Idx": 8111 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 1336756, "tid": 1336756, + "ts": 1587595623312.750, "dur": 48.237, + "args": { + "External id": 3286809,"Record function id": 0, "Ev Idx": 8112 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.27)", "pid": 1336756, "tid": 1336756, + "ts": 1587595623369.748, "dur": 18268.342, + "args": { + "External id": 3286810,"Record function id": 0, "Ev Idx": 8113 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.27)", "pid": 1336756, "tid": 1336756, + "ts": 1587595623380.183, "dur": 885.535, + "args": { + "External id": 3286811,"Record function id": 0, "Ev Idx": 8114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595623462.969, "dur": 8.717, + "args": { + "External id": 3286812,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595623483.964, "dur": 32.743, + "args": { + "External id": 3286813,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 8116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623488.694, "dur": 2.385, + "args": { + "External id": 3286814,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623495.552, "dur": 0.716, + "args": { + "External id": 3286815,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623497.299, "dur": 0.811, + "args": { + "External id": 3286816,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623499.259, "dur": 0.833, + "args": { + "External id": 3286817,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623502.876, "dur": 0.594, + "args": { + "External id": 3286818,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623504.661, "dur": 0.709, + "args": { + "External id": 3286819,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623506.101, "dur": 2.225, + "args": { + "External id": 3286820,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623509.267, "dur": 0.485, + "args": { + "External id": 3286821,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623510.491, "dur": 0.533, + "args": { + "External id": 3286822,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595623529.299, "dur": 41.957, + "args": { + "External id": 3286823,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 8126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 1336756, "tid": 1336756, + "ts": 1587595623601.698, "dur": 109.154, + "args": { + "External id": 3286824,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "3", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 8127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595623611.930, "dur": 4.835, + "args": { + "External id": 3286825,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 1336756, "tid": 1336756, + "ts": 1587595623621.493, "dur": 10.016, + "args": { + "External id": 3286826,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 8129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595623625.800, "dur": 5.281, + "args": { + "External id": 3286827,"Record function id": 0, "Concrete Inputs": ["", "0", "19269120", "25692160", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 8130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623629.155, "dur": 0.720, + "args": { + "External id": 3286828,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 8131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 1336756, "tid": 1336756, + "ts": 1587595623638.018, "dur": 29.864, + "args": { + "External id": 3286829,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 8132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623639.938, "dur": 3.158, + "args": { + "External id": 3286830,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "19269120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623644.532, "dur": 0.430, + "args": { + "External id": 3286831,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19269376"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623646.562, "dur": 0.541, + "args": { + "External id": 3286832,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19793664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623650.709, "dur": 0.460, + "args": { + "External id": 3286833,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20317952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623652.632, "dur": 0.462, + "args": { + "External id": 3286834,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "20842240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623654.852, "dur": 0.435, + "args": { + "External id": 3286835,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "21366528"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623657.990, "dur": 0.522, + "args": { + "External id": 3286836,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "21366784"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623659.944, "dur": 0.516, + "args": { + "External id": 3286837,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "22808576"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595623661.653, "dur": 2.350, + "args": { + "External id": 3286838,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "24250368"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 8141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595623680.266, "dur": 22.971, + "args": { + "External id": 3286839,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 8142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 1336756, "tid": 1336756, + "ts": 1587595623762.899, "dur": 405.845, + "args": { + "External id": 3286840,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 8143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595623795.296, "dur": 367.993, + "args": { + "External id": 3286841,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 3, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8144, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 1336756, "tid": 1336756, + "ts": 1587595623804.911, "dur": 352.389, + "args": { + "External id": 3286842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 8145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587595624194.564, "dur": 2.360, + "args": { + "External id": 3286843,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8146, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.27)", "pid": 1336756, "tid": 1336756, + "ts": 1587595624285.357, "dur": 17151.464, + "args": { + "External id": 3286844,"Record function id": 0, "Ev Idx": 8147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595624387.709, "dur": 6.495, + "args": { + "External id": 3286845,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 8148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595624397.435, "dur": 1.139, + "args": { + "External id": 3286846,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 8149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595624400.450, "dur": 1.310, + "args": { + "External id": 3286847,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595624415.942, "dur": 1.062, + "args": { + "External id": 3286848,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595624418.630, "dur": 0.967, + "args": { + "External id": 3286849,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595624421.192, "dur": 1.112, + "args": { + "External id": 3286850,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595624426.057, "dur": 0.947, + "args": { + "External id": 3286851,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 8154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595624428.373, "dur": 2.014, + "args": { + "External id": 3286852,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 8155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595624431.825, "dur": 0.958, + "args": { + "External id": 3286853,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 8156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595624434.111, "dur": 0.836, + "args": { + "External id": 3286854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 8157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595624456.812, "dur": 16938.940, + "args": { + "External id": 3286855,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 8158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595624472.487, "dur": 16915.986, + "args": { + "External id": 3286856,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 8159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595624490.237, "dur": 13.434, + "args": { + "External id": 3286857,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595624506.567, "dur": 16847.434, + "args": { + "External id": 3286858,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 8161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595624509.067, "dur": 16844.377, + "args": { + "External id": 3286859,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 8162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595624515.334, "dur": 5.895, + "args": { + "External id": 3286860,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595624522.909, "dur": 16827.252, + "args": { + "External id": 3286861,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 8164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595641572.855, "dur": 40.242, + "args": { + "External id": 3286862,"Sequence number": 32987159, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 8165 + } + }, + { + "ph": "s", "id": 173, "pid": 1336756, "tid": 1336756, "ts": 1587595641572.855, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595641600.098, "dur": 8.090, + "args": { + "External id": 3286863,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 8166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595641603.104, "dur": 4.868, + "args": { + "External id": 3286864,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595641675.863, "dur": 71.163, + "args": { + "External id": 3286865,"Record function id": 0, "Ev Idx": 8168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 1336756, "tid": 1336756, + "ts": 1587595641748.748, "dur": 1077.325, + "args": { + "External id": 3286866,"Record function id": 0, "Ev Idx": 8169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595641789.183, "dur": 1023.789, + "args": { + "External id": 3286867,"Sequence number": 32987160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 8170 + } + }, + { + "ph": "s", "id": 172, "pid": 1336756, "tid": 1336756, "ts": 1587595641789.183, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595641854.073, "dur": 56.418, + "args": { + "External id": 3286868,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 8171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595641926.577, "dur": 127.693, + "args": { + "External id": 3286869,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595642068.448, "dur": 41.350, + "args": { + "External id": 3286870,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595642119.710, "dur": 32.058, + "args": { + "External id": 3286871,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595642180.512, "dur": 27.653, + "args": { + "External id": 3286872,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595642229.630, "dur": 17.730, + "args": { + "External id": 3286873,"kernel_hash": "cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/ah/cahe5odluo4swiyj4n2lohwh57qg5lym5qsijyjsjca7pvto5oka.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595642264.882, "dur": 135.786, + "args": { + "External id": 3286874,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 8177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595642315.546, "dur": 11.694, + "args": { + "External id": 3286875,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595642320.112, "dur": 6.347, + "args": { + "External id": 3286876,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595642330.156, "dur": 4.880, + "args": { + "External id": 3286877,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595642336.592, "dur": 1.781, + "args": { + "External id": 3286878,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595642340.767, "dur": 3.629, + "args": { + "External id": 3286879,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595642411.713, "dur": 44.687, + "args": { + "External id": 3286880,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 8183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 1336756, "tid": 1336756, + "ts": 1587595642487.302, "dur": 29.437, + "args": { + "External id": 3286881,"kernel_hash": "ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kb/ckbsutlugbs4qoncxuwq6kkwome3xojo3ke4p7z74lcl5h4q62to.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 8184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595642525.299, "dur": 40.227, + "args": { + "External id": 3286882,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 8185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595642574.577, "dur": 34.714, + "args": { + "External id": 3286883,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 8186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595642632.230, "dur": 28.714, + "args": { + "External id": 3286884,"kernel_hash": "cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/gh/cgh2wg2b6li2ipjwrhdr326ycb37fu4v576nwebfoyhwx5kppapn.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 8187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595642667.341, "dur": 34.120, + "args": { + "External id": 3286885,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 8188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 1336756, "tid": 1336756, + "ts": 1587595642723.450, "dur": 17.990, + "args": { + "External id": 3286886,"kernel_hash": "cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/ta/cta7vcczabyyuqarwmn4dcnazkxudhfwrvnick7ql5cmsd5lz4rz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 8189 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.27)", "pid": 1336756, "tid": 1336756, + "ts": 1587595642905.009, "dur": 32.435, + "args": { + "External id": 3286887,"Record function id": 0, "Ev Idx": 8190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595643113.449, "dur": 291.148, + "args": { + "External id": 3286888,"Sequence number": 32987161, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [], [], [], [], []], "Ev Idx": 8191 + } + }, + { + "ph": "s", "id": 171, "pid": 1336756, "tid": 1336756, "ts": 1587595643113.449, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595643144.541, "dur": 8.940, + "args": { + "External id": 3286889,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595643146.901, "dur": 6.181, + "args": { + "External id": 3286890,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595643162.965, "dur": 12.833, + "args": { + "External id": 3286891,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595643166.062, "dur": 9.120, + "args": { + "External id": 3286892,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595643185.744, "dur": 5.340, + "args": { + "External id": 3286893,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595643384.262, "dur": 6.974, + "args": { + "External id": 3286894,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595643388.406, "dur": 2.583, + "args": { + "External id": 3286895,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595643436.680, "dur": 129.844, + "args": { + "External id": 3286896,"Sequence number": 32987162, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595643439.290, "dur": 13.867, + "args": { + "External id": 3286897,"Sequence number": 32987162, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8200 + } + }, + { + "ph": "s", "id": 170, "pid": 1336756, "tid": 1336756, "ts": 1587595643439.290, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595643444.302, "dur": 7.317, + "args": { + "External id": 3286898,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595643449.241, "dur": 2.044, + "args": { + "External id": 3286899,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595643455.785, "dur": 110.413, + "args": { + "External id": 3286900,"Sequence number": 32987163, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595643458.554, "dur": 6.973, + "args": { + "External id": 3286901,"Sequence number": 32987163, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595643459.329, "dur": 6.063, + "args": { + "External id": 3286902,"Sequence number": 32987163, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8205 + } + }, + { + "ph": "s", "id": 169, "pid": 1336756, "tid": 1336756, "ts": 1587595643459.329, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595643470.029, "dur": 86.840, + "args": { + "External id": 3286903,"Sequence number": 32987164, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8206 + } + }, + { + "ph": "s", "id": 168, "pid": 1336756, "tid": 1336756, "ts": 1587595643470.029, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595643560.493, "dur": 4.782, + "args": { + "External id": 3286904,"Sequence number": 32987165, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8207 + } + }, + { + "ph": "s", "id": 167, "pid": 1336756, "tid": 1336756, "ts": 1587595643560.493, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595643577.740, "dur": 70.147, + "args": { + "External id": 3286905,"Sequence number": 32987166, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595643578.687, "dur": 10.805, + "args": { + "External id": 3286906,"Sequence number": 32987166, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8209 + } + }, + { + "ph": "s", "id": 166, "pid": 1336756, "tid": 1336756, "ts": 1587595643578.687, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595643580.538, "dur": 7.819, + "args": { + "External id": 3286907,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595643587.060, "dur": 1.093, + "args": { + "External id": 3286908,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595643590.422, "dur": 57.165, + "args": { + "External id": 3286909,"Sequence number": 32987167, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595643591.564, "dur": 3.653, + "args": { + "External id": 3286910,"Sequence number": 32987167, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595643592.567, "dur": 2.503, + "args": { + "External id": 3286911,"Sequence number": 32987167, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8214 + } + }, + { + "ph": "s", "id": 165, "pid": 1336756, "tid": 1336756, "ts": 1587595643592.567, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595643595.899, "dur": 44.666, + "args": { + "External id": 3286912,"Sequence number": 32987168, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8215 + } + }, + { + "ph": "s", "id": 164, "pid": 1336756, "tid": 1336756, "ts": 1587595643595.899, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595643642.365, "dur": 4.938, + "args": { + "External id": 3286913,"Sequence number": 32987169, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8216 + } + }, + { + "ph": "s", "id": 163, "pid": 1336756, "tid": 1336756, "ts": 1587595643642.365, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595643655.233, "dur": 63.227, + "args": { + "External id": 3286914,"Sequence number": 32987170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595643656.053, "dur": 5.658, + "args": { + "External id": 3286915,"Sequence number": 32987170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8218 + } + }, + { + "ph": "s", "id": 162, "pid": 1336756, "tid": 1336756, "ts": 1587595643656.053, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595643658.125, "dur": 2.428, + "args": { + "External id": 3286916,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595643659.679, "dur": 0.733, + "args": { + "External id": 3286917,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595643664.517, "dur": 53.725, + "args": { + "External id": 3286918,"Sequence number": 32987171, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595643665.544, "dur": 4.658, + "args": { + "External id": 3286919,"Sequence number": 32987171, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595643666.385, "dur": 3.688, + "args": { + "External id": 3286920,"Sequence number": 32987171, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8223 + } + }, + { + "ph": "s", "id": 161, "pid": 1336756, "tid": 1336756, "ts": 1587595643666.385, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595643670.652, "dur": 39.968, + "args": { + "External id": 3286921,"Sequence number": 32987172, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8224 + } + }, + { + "ph": "s", "id": 160, "pid": 1336756, "tid": 1336756, "ts": 1587595643670.652, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595643712.030, "dur": 5.858, + "args": { + "External id": 3286922,"Sequence number": 32987173, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8225 + } + }, + { + "ph": "s", "id": 159, "pid": 1336756, "tid": 1336756, "ts": 1587595643712.030, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595643740.089, "dur": 3.999, + "args": { + "External id": 3286923,"Sequence number": 32987174, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595643740.812, "dur": 3.130, + "args": { + "External id": 3286924,"Sequence number": 32987174, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8227 + } + }, + { + "ph": "s", "id": 158, "pid": 1336756, "tid": 1336756, "ts": 1587595643740.812, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595643750.944, "dur": 5.246, + "args": { + "External id": 3286925,"Sequence number": 32987175, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595643751.742, "dur": 4.308, + "args": { + "External id": 3286926,"Sequence number": 32987175, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8229 + } + }, + { + "ph": "s", "id": 157, "pid": 1336756, "tid": 1336756, "ts": 1587595643751.742, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595643762.593, "dur": 2.991, + "args": { + "External id": 3286927,"Sequence number": 32987176, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595643763.909, "dur": 1.528, + "args": { + "External id": 3286928,"Sequence number": 32987176, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8231 + } + }, + { + "ph": "s", "id": 156, "pid": 1336756, "tid": 1336756, "ts": 1587595643763.909, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595643800.176, "dur": 233.055, + "args": { + "External id": 3286929,"Sequence number": 32987177, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8232 + } + }, + { + "ph": "s", "id": 155, "pid": 1336756, "tid": 1336756, "ts": 1587595643800.176, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595643822.216, "dur": 8.356, + "args": { + "External id": 3286930,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595643825.030, "dur": 5.090, + "args": { + "External id": 3286931,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595644051.015, "dur": 124.869, + "args": { + "External id": 3286932,"Sequence number": 32987178, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8235 + } + }, + { + "ph": "s", "id": 154, "pid": 1336756, "tid": 1336756, "ts": 1587595644051.015, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595644067.979, "dur": 9.550, + "args": { + "External id": 3286933,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595644070.818, "dur": 6.151, + "args": { + "External id": 3286934,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 1336756, "tid": 1336756, + "ts": 1587595644206.918, "dur": 189.292, + "args": { + "External id": 3286935,"Sequence number": 32987179, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], []], "Ev Idx": 8238 + } + }, + { + "ph": "s", "id": 153, "pid": 1336756, "tid": 1336756, "ts": 1587595644206.918, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595644237.204, "dur": 129.507, + "args": { + "External id": 3286936,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 8239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595644285.957, "dur": 8.003, + "args": { + "External id": 3286937,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595644288.360, "dur": 5.192, + "args": { + "External id": 3286938,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595644296.478, "dur": 4.510, + "args": { + "External id": 3286939,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595644304.710, "dur": 1.744, + "args": { + "External id": 3286940,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595644308.679, "dur": 3.702, + "args": { + "External id": 3286941,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1336756, + "ts": 1587595644381.306, "dur": 5.718, + "args": { + "External id": 3286942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 8245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595644401.336, "dur": 6.740, + "args": { + "External id": 3286943,"Sequence number": 32987180, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595644403.361, "dur": 4.539, + "args": { + "External id": 3286944,"Sequence number": 32987180, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8247 + } + }, + { + "ph": "s", "id": 152, "pid": 1336756, "tid": 1336756, "ts": 1587595644403.361, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595644420.632, "dur": 122.452, + "args": { + "External id": 3286945,"Sequence number": 32987181, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595644424.417, "dur": 9.126, + "args": { + "External id": 3286946,"Sequence number": 32987181, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8249 + } + }, + { + "ph": "s", "id": 151, "pid": 1336756, "tid": 1336756, "ts": 1587595644424.417, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595644427.424, "dur": 5.107, + "args": { + "External id": 3286947,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595644430.153, "dur": 2.101, + "args": { + "External id": 3286948,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595644434.919, "dur": 107.886, + "args": { + "External id": 3286949,"Sequence number": 32987182, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595644436.620, "dur": 7.927, + "args": { + "External id": 3286950,"Sequence number": 32987182, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595644439.729, "dur": 4.669, + "args": { + "External id": 3286951,"Sequence number": 32987182, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8254 + } + }, + { + "ph": "s", "id": 150, "pid": 1336756, "tid": 1336756, "ts": 1587595644439.729, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595644445.714, "dur": 90.095, + "args": { + "External id": 3286952,"Sequence number": 32987183, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8255 + } + }, + { + "ph": "s", "id": 149, "pid": 1336756, "tid": 1336756, "ts": 1587595644445.714, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595644538.412, "dur": 3.695, + "args": { + "External id": 3286953,"Sequence number": 32987184, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8256 + } + }, + { + "ph": "s", "id": 148, "pid": 1336756, "tid": 1336756, "ts": 1587595644538.412, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595644577.985, "dur": 230.490, + "args": { + "External id": 3286954,"Sequence number": 32987185, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [16, 4096, 2048], [], [], [], []], "Ev Idx": 8257 + } + }, + { + "ph": "s", "id": 147, "pid": 1336756, "tid": 1336756, "ts": 1587595644577.985, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595644598.899, "dur": 7.843, + "args": { + "External id": 3286955,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595644604.731, "dur": 1.839, + "args": { + "External id": 3286956,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595644612.649, "dur": 3.484, + "args": { + "External id": 3286957,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [2048, 1]], "Input Dims": [[16, 4096, 2048], [65536, 2048]], "Ev Idx": 8260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595644614.064, "dur": 1.961, + "args": { + "External id": 3286958,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595644614.762, "dur": 1.165, + "args": { + "External id": 3286959,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595644623.218, "dur": 8.750, + "args": { + "External id": 3286960,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595644627.448, "dur": 4.193, + "args": { + "External id": 3286961,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595644638.132, "dur": 3.789, + "args": { + "External id": 3286962,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595644647.635, "dur": 2.984, + "args": { + "External id": 3286963,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595644784.540, "dur": 3.638, + "args": { + "External id": 3286964,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595644785.763, "dur": 2.208, + "args": { + "External id": 3286965,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595644790.655, "dur": 2.420, + "args": { + "External id": 3286966,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595644791.649, "dur": 1.313, + "args": { + "External id": 3286967,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595644830.291, "dur": 118.708, + "args": { + "External id": 3286968,"Sequence number": 32987186, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595644831.537, "dur": 7.644, + "args": { + "External id": 3286969,"Sequence number": 32987186, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8272 + } + }, + { + "ph": "s", "id": 146, "pid": 1336756, "tid": 1336756, "ts": 1587595644831.537, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595644834.321, "dur": 3.765, + "args": { + "External id": 3286970,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595644836.350, "dur": 1.294, + "args": { + "External id": 3286971,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595644840.261, "dur": 108.395, + "args": { + "External id": 3286972,"Sequence number": 32987187, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595644843.620, "dur": 3.758, + "args": { + "External id": 3286973,"Sequence number": 32987187, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595644844.502, "dur": 2.737, + "args": { + "External id": 3286974,"Sequence number": 32987187, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8277 + } + }, + { + "ph": "s", "id": 145, "pid": 1336756, "tid": 1336756, "ts": 1587595644844.502, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595644848.223, "dur": 90.111, + "args": { + "External id": 3286975,"Sequence number": 32987188, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8278 + } + }, + { + "ph": "s", "id": 144, "pid": 1336756, "tid": 1336756, "ts": 1587595644848.223, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595644941.850, "dur": 6.076, + "args": { + "External id": 3286976,"Sequence number": 32987189, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8279 + } + }, + { + "ph": "s", "id": 143, "pid": 1336756, "tid": 1336756, "ts": 1587595644941.850, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595644959.579, "dur": 119.648, + "args": { + "External id": 3286977,"Sequence number": 32987190, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595644962.615, "dur": 6.567, + "args": { + "External id": 3286978,"Sequence number": 32987190, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8281 + } + }, + { + "ph": "s", "id": 142, "pid": 1336756, "tid": 1336756, "ts": 1587595644962.615, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595644964.516, "dur": 3.237, + "args": { + "External id": 3286979,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595644966.656, "dur": 0.855, + "args": { + "External id": 3286980,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595644969.975, "dur": 108.993, + "args": { + "External id": 3286981,"Sequence number": 32987191, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595644970.980, "dur": 7.680, + "args": { + "External id": 3286982,"Sequence number": 32987191, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595644974.783, "dur": 3.740, + "args": { + "External id": 3286983,"Sequence number": 32987191, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8286 + } + }, + { + "ph": "s", "id": 141, "pid": 1336756, "tid": 1336756, "ts": 1587595644974.783, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595644979.364, "dur": 92.142, + "args": { + "External id": 3286984,"Sequence number": 32987192, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8287 + } + }, + { + "ph": "s", "id": 140, "pid": 1336756, "tid": 1336756, "ts": 1587595644979.364, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595645074.729, "dur": 3.673, + "args": { + "External id": 3286985,"Sequence number": 32987193, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8288 + } + }, + { + "ph": "s", "id": 139, "pid": 1336756, "tid": 1336756, "ts": 1587595645074.729, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595645106.988, "dur": 167.983, + "args": { + "External id": 3286986,"Sequence number": 32987194, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8289 + } + }, + { + "ph": "s", "id": 138, "pid": 1336756, "tid": 1336756, "ts": 1587595645106.988, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595645154.387, "dur": 4.078, + "args": { + "External id": 3286987,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595645193.081, "dur": 67.479, + "args": { + "External id": 3286988,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595645194.267, "dur": 7.659, + "args": { + "External id": 3286989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 8292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595645195.559, "dur": 5.392, + "args": { + "External id": 3286990,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 8293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595645199.488, "dur": 1.301, + "args": { + "External id": 3286991,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 8294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595645202.863, "dur": 57.271, + "args": { + "External id": 3286992,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[23068672, 5632, 1], [1, 5632]], "Input Dims": [[16, 4096, 5632], [5632, 2048]], "Ev Idx": 8295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595645206.947, "dur": 2.895, + "args": { + "External id": 3286993,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595645208.051, "dur": 1.636, + "args": { + "External id": 3286994,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595645210.683, "dur": 45.211, + "args": { + "External id": 3286995,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 8298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595645258.011, "dur": 1.548, + "args": { + "External id": 3286996,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1336756, + "ts": 1587595645284.807, "dur": 27.103, + "args": { + "External id": 3286997,"Sequence number": 32987195, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 8300 + } + }, + { + "ph": "s", "id": 137, "pid": 1336756, "tid": 1336756, "ts": 1587595645284.807, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595645347.200, "dur": 183.755, + "args": { + "External id": 3286998,"Sequence number": 32987196, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [], [], [], [], []], "Ev Idx": 8301 + } + }, + { + "ph": "s", "id": 136, "pid": 1336756, "tid": 1336756, "ts": 1587595645347.200, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595645366.990, "dur": 5.854, + "args": { + "External id": 3286999,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595645370.160, "dur": 2.505, + "args": { + "External id": 3287000,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595645380.357, "dur": 7.730, + "args": { + "External id": 3287001,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595645383.306, "dur": 4.382, + "args": { + "External id": 3287002,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595645393.786, "dur": 3.826, + "args": { + "External id": 3287003,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595645516.258, "dur": 2.873, + "args": { + "External id": 3287004,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595645517.300, "dur": 1.576, + "args": { + "External id": 3287005,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595645551.832, "dur": 82.647, + "args": { + "External id": 3287006,"Sequence number": 32987197, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595645552.736, "dur": 8.817, + "args": { + "External id": 3287007,"Sequence number": 32987197, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8310 + } + }, + { + "ph": "s", "id": 135, "pid": 1336756, "tid": 1336756, "ts": 1587595645552.736, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595645557.160, "dur": 3.463, + "args": { + "External id": 3287008,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595645558.847, "dur": 1.591, + "args": { + "External id": 3287009,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595645562.725, "dur": 71.391, + "args": { + "External id": 3287010,"Sequence number": 32987198, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595645564.177, "dur": 6.136, + "args": { + "External id": 3287011,"Sequence number": 32987198, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595645567.119, "dur": 3.013, + "args": { + "External id": 3287012,"Sequence number": 32987198, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8315 + } + }, + { + "ph": "s", "id": 134, "pid": 1336756, "tid": 1336756, "ts": 1587595645567.119, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595645571.216, "dur": 55.186, + "args": { + "External id": 3287013,"Sequence number": 32987199, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8316 + } + }, + { + "ph": "s", "id": 133, "pid": 1336756, "tid": 1336756, "ts": 1587595645571.216, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595645628.333, "dur": 5.277, + "args": { + "External id": 3287014,"Sequence number": 32987200, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8317 + } + }, + { + "ph": "s", "id": 132, "pid": 1336756, "tid": 1336756, "ts": 1587595645628.333, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595645642.774, "dur": 69.390, + "args": { + "External id": 3287015,"Sequence number": 32987201, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595645643.700, "dur": 10.240, + "args": { + "External id": 3287016,"Sequence number": 32987201, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8319 + } + }, + { + "ph": "s", "id": 131, "pid": 1336756, "tid": 1336756, "ts": 1587595645643.700, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595645649.572, "dur": 3.233, + "args": { + "External id": 3287017,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595645651.577, "dur": 0.852, + "args": { + "External id": 3287018,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595645654.832, "dur": 57.106, + "args": { + "External id": 3287019,"Sequence number": 32987202, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595645656.308, "dur": 5.261, + "args": { + "External id": 3287020,"Sequence number": 32987202, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595645657.181, "dur": 4.240, + "args": { + "External id": 3287021,"Sequence number": 32987202, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8324 + } + }, + { + "ph": "s", "id": 130, "pid": 1336756, "tid": 1336756, "ts": 1587595645657.181, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595645662.053, "dur": 45.731, + "args": { + "External id": 3287022,"Sequence number": 32987203, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8325 + } + }, + { + "ph": "s", "id": 129, "pid": 1336756, "tid": 1336756, "ts": 1587595645662.053, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595645709.479, "dur": 2.139, + "args": { + "External id": 3287023,"Sequence number": 32987204, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8326 + } + }, + { + "ph": "s", "id": 128, "pid": 1336756, "tid": 1336756, "ts": 1587595645709.479, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595645719.032, "dur": 62.589, + "args": { + "External id": 3287024,"Sequence number": 32987205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595645719.804, "dur": 7.574, + "args": { + "External id": 3287025,"Sequence number": 32987205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8328 + } + }, + { + "ph": "s", "id": 127, "pid": 1336756, "tid": 1336756, "ts": 1587595645719.804, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595645721.595, "dur": 4.800, + "args": { + "External id": 3287026,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595645725.191, "dur": 0.894, + "args": { + "External id": 3287027,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595645727.875, "dur": 53.526, + "args": { + "External id": 3287028,"Sequence number": 32987206, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595645729.033, "dur": 4.530, + "args": { + "External id": 3287029,"Sequence number": 32987206, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595645729.939, "dur": 3.499, + "args": { + "External id": 3287030,"Sequence number": 32987206, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8333 + } + }, + { + "ph": "s", "id": 126, "pid": 1336756, "tid": 1336756, "ts": 1587595645729.939, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595645736.310, "dur": 40.569, + "args": { + "External id": 3287031,"Sequence number": 32987207, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8334 + } + }, + { + "ph": "s", "id": 125, "pid": 1336756, "tid": 1336756, "ts": 1587595645736.310, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595645778.779, "dur": 2.311, + "args": { + "External id": 3287032,"Sequence number": 32987208, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8335 + } + }, + { + "ph": "s", "id": 124, "pid": 1336756, "tid": 1336756, "ts": 1587595645778.779, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595645796.767, "dur": 4.507, + "args": { + "External id": 3287033,"Sequence number": 32987209, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595645797.907, "dur": 3.211, + "args": { + "External id": 3287034,"Sequence number": 32987209, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8337 + } + }, + { + "ph": "s", "id": 123, "pid": 1336756, "tid": 1336756, "ts": 1587595645797.907, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595645807.406, "dur": 9.697, + "args": { + "External id": 3287035,"Sequence number": 32987210, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595645810.856, "dur": 6.090, + "args": { + "External id": 3287036,"Sequence number": 32987210, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8339 + } + }, + { + "ph": "s", "id": 122, "pid": 1336756, "tid": 1336756, "ts": 1587595645810.856, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595645821.405, "dur": 3.297, + "args": { + "External id": 3287037,"Sequence number": 32987211, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595645822.978, "dur": 1.574, + "args": { + "External id": 3287038,"Sequence number": 32987211, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8341 + } + }, + { + "ph": "s", "id": 121, "pid": 1336756, "tid": 1336756, "ts": 1587595645822.978, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595645851.582, "dur": 244.437, + "args": { + "External id": 3287039,"Sequence number": 32987212, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8342 + } + }, + { + "ph": "s", "id": 120, "pid": 1336756, "tid": 1336756, "ts": 1587595645851.582, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595645869.970, "dur": 54.598, + "args": { + "External id": 3287040,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595645917.035, "dur": 6.933, + "args": { + "External id": 3287041,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595646114.002, "dur": 324.062, + "args": { + "External id": 3287042,"Sequence number": 32987213, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8345 + } + }, + { + "ph": "s", "id": 119, "pid": 1336756, "tid": 1336756, "ts": 1587595646114.002, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595646128.821, "dur": 8.771, + "args": { + "External id": 3287043,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595646131.543, "dur": 5.609, + "args": { + "External id": 3287044,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 1336756, "tid": 1336756, + "ts": 1587595646473.291, "dur": 346.568, + "args": { + "External id": 3287045,"Sequence number": 32987214, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], []], "Ev Idx": 8348 + } + }, + { + "ph": "s", "id": 118, "pid": 1336756, "tid": 1336756, "ts": 1587595646473.291, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595646504.505, "dur": 142.721, + "args": { + "External id": 3287046,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 8349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595646554.741, "dur": 11.061, + "args": { + "External id": 3287047,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595646558.368, "dur": 6.890, + "args": { + "External id": 3287048,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595646568.482, "dur": 5.166, + "args": { + "External id": 3287049,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595646574.611, "dur": 1.909, + "args": { + "External id": 3287050,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595646578.904, "dur": 3.917, + "args": { + "External id": 3287051,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1336756, + "ts": 1587595646659.879, "dur": 5.204, + "args": { + "External id": 3287052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 8355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595646827.747, "dur": 9.770, + "args": { + "External id": 3287053,"Sequence number": 32987215, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595646829.481, "dur": 7.847, + "args": { + "External id": 3287054,"Sequence number": 32987215, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8357 + } + }, + { + "ph": "s", "id": 117, "pid": 1336756, "tid": 1336756, "ts": 1587595646829.481, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595646858.279, "dur": 203.289, + "args": { + "External id": 3287055,"Sequence number": 32987216, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595646859.799, "dur": 9.570, + "args": { + "External id": 3287056,"Sequence number": 32987216, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8359 + } + }, + { + "ph": "s", "id": 116, "pid": 1336756, "tid": 1336756, "ts": 1587595646859.799, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595646862.914, "dur": 5.292, + "args": { + "External id": 3287057,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595646865.388, "dur": 2.477, + "args": { + "External id": 3287058,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595646887.492, "dur": 173.689, + "args": { + "External id": 3287059,"Sequence number": 32987217, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595646892.602, "dur": 7.152, + "args": { + "External id": 3287060,"Sequence number": 32987217, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595646893.514, "dur": 5.987, + "args": { + "External id": 3287061,"Sequence number": 32987217, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8364 + } + }, + { + "ph": "s", "id": 115, "pid": 1336756, "tid": 1336756, "ts": 1587595646893.514, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595646900.856, "dur": 149.044, + "args": { + "External id": 3287062,"Sequence number": 32987218, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8365 + } + }, + { + "ph": "s", "id": 114, "pid": 1336756, "tid": 1336756, "ts": 1587595646900.856, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595647053.710, "dur": 6.411, + "args": { + "External id": 3287063,"Sequence number": 32987219, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8366 + } + }, + { + "ph": "s", "id": 113, "pid": 1336756, "tid": 1336756, "ts": 1587595647053.710, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595647106.592, "dur": 245.401, + "args": { + "External id": 3287064,"Sequence number": 32987220, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [16, 4096, 2048], [], [], [], []], "Ev Idx": 8367 + } + }, + { + "ph": "s", "id": 112, "pid": 1336756, "tid": 1336756, "ts": 1587595647106.592, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595647132.489, "dur": 3.469, + "args": { + "External id": 3287065,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595647133.345, "dur": 2.400, + "args": { + "External id": 3287066,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595647140.259, "dur": 5.334, + "args": { + "External id": 3287067,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [2048, 1]], "Input Dims": [[16, 4096, 2048], [65536, 2048]], "Ev Idx": 8370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595647141.113, "dur": 4.373, + "args": { + "External id": 3287068,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595647144.119, "dur": 1.278, + "args": { + "External id": 3287069,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595647154.117, "dur": 9.636, + "args": { + "External id": 3287070,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595647157.450, "dur": 5.877, + "args": { + "External id": 3287071,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595647170.232, "dur": 4.826, + "args": { + "External id": 3287072,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595647178.784, "dur": 3.689, + "args": { + "External id": 3287073,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595647326.140, "dur": 3.702, + "args": { + "External id": 3287074,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595647327.651, "dur": 1.957, + "args": { + "External id": 3287075,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595647334.974, "dur": 2.574, + "args": { + "External id": 3287076,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595647336.069, "dur": 1.357, + "args": { + "External id": 3287077,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595647372.622, "dur": 110.559, + "args": { + "External id": 3287078,"Sequence number": 32987221, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595647373.783, "dur": 9.179, + "args": { + "External id": 3287079,"Sequence number": 32987221, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8382 + } + }, + { + "ph": "s", "id": 111, "pid": 1336756, "tid": 1336756, "ts": 1587595647373.783, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595647376.831, "dur": 4.644, + "args": { + "External id": 3287080,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595647379.121, "dur": 1.854, + "args": { + "External id": 3287081,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595647387.072, "dur": 95.768, + "args": { + "External id": 3287082,"Sequence number": 32987222, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595647388.717, "dur": 3.969, + "args": { + "External id": 3287083,"Sequence number": 32987222, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595647389.716, "dur": 2.838, + "args": { + "External id": 3287084,"Sequence number": 32987222, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8387 + } + }, + { + "ph": "s", "id": 110, "pid": 1336756, "tid": 1336756, "ts": 1587595647389.716, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595647393.763, "dur": 81.231, + "args": { + "External id": 3287085,"Sequence number": 32987223, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8388 + } + }, + { + "ph": "s", "id": 109, "pid": 1336756, "tid": 1336756, "ts": 1587595647393.763, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595647477.566, "dur": 4.637, + "args": { + "External id": 3287086,"Sequence number": 32987224, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8389 + } + }, + { + "ph": "s", "id": 108, "pid": 1336756, "tid": 1336756, "ts": 1587595647477.566, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595647492.145, "dur": 71.377, + "args": { + "External id": 3287087,"Sequence number": 32987225, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595647492.937, "dur": 6.177, + "args": { + "External id": 3287088,"Sequence number": 32987225, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8391 + } + }, + { + "ph": "s", "id": 107, "pid": 1336756, "tid": 1336756, "ts": 1587595647492.937, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595647495.038, "dur": 3.050, + "args": { + "External id": 3287089,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595647496.524, "dur": 1.406, + "args": { + "External id": 3287090,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595647499.864, "dur": 63.351, + "args": { + "External id": 3287091,"Sequence number": 32987226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595647503.277, "dur": 6.634, + "args": { + "External id": 3287092,"Sequence number": 32987226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595647504.186, "dur": 5.577, + "args": { + "External id": 3287093,"Sequence number": 32987226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8396 + } + }, + { + "ph": "s", "id": 106, "pid": 1336756, "tid": 1336756, "ts": 1587595647504.186, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595647511.023, "dur": 45.944, + "args": { + "External id": 3287094,"Sequence number": 32987227, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8397 + } + }, + { + "ph": "s", "id": 105, "pid": 1336756, "tid": 1336756, "ts": 1587595647511.023, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595647558.596, "dur": 4.257, + "args": { + "External id": 3287095,"Sequence number": 32987228, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8398 + } + }, + { + "ph": "s", "id": 104, "pid": 1336756, "tid": 1336756, "ts": 1587595647558.596, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595647587.276, "dur": 140.179, + "args": { + "External id": 3287096,"Sequence number": 32987229, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8399 + } + }, + { + "ph": "s", "id": 103, "pid": 1336756, "tid": 1336756, "ts": 1587595647587.276, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595647622.705, "dur": 4.111, + "args": { + "External id": 3287097,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595647656.784, "dur": 57.140, + "args": { + "External id": 3287098,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595647657.530, "dur": 5.055, + "args": { + "External id": 3287099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 8402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595647658.654, "dur": 3.050, + "args": { + "External id": 3287100,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 8403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595647660.455, "dur": 1.100, + "args": { + "External id": 3287101,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 8404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595647665.702, "dur": 47.927, + "args": { + "External id": 3287102,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[23068672, 5632, 1], [1, 5632]], "Input Dims": [[16, 4096, 5632], [5632, 2048]], "Ev Idx": 8405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595647666.853, "dur": 2.132, + "args": { + "External id": 3287103,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595647667.749, "dur": 1.114, + "args": { + "External id": 3287104,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595647669.491, "dur": 40.456, + "args": { + "External id": 3287105,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 8408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595647711.810, "dur": 1.263, + "args": { + "External id": 3287106,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1336756, + "ts": 1587595647735.209, "dur": 25.105, + "args": { + "External id": 3287107,"Sequence number": 32987230, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 8410 + } + }, + { + "ph": "s", "id": 102, "pid": 1336756, "tid": 1336756, "ts": 1587595647735.209, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595647793.554, "dur": 238.511, + "args": { + "External id": 3287108,"Sequence number": 32987231, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [], [], [], [], []], "Ev Idx": 8411 + } + }, + { + "ph": "s", "id": 101, "pid": 1336756, "tid": 1336756, "ts": 1587595647793.554, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595647812.792, "dur": 3.053, + "args": { + "External id": 3287109,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595647813.718, "dur": 1.914, + "args": { + "External id": 3287110,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595647823.147, "dur": 6.131, + "args": { + "External id": 3287111,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595647824.978, "dur": 3.826, + "args": { + "External id": 3287112,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595647834.472, "dur": 3.567, + "args": { + "External id": 3287113,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595647977.867, "dur": 37.791, + "args": { + "External id": 3287114,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595647978.914, "dur": 36.163, + "args": { + "External id": 3287115,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595648055.580, "dur": 97.335, + "args": { + "External id": 3287116,"Sequence number": 32987232, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595648056.522, "dur": 7.515, + "args": { + "External id": 3287117,"Sequence number": 32987232, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8420 + } + }, + { + "ph": "s", "id": 100, "pid": 1336756, "tid": 1336756, "ts": 1587595648056.522, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595648058.975, "dur": 3.724, + "args": { + "External id": 3287118,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595648061.091, "dur": 1.222, + "args": { + "External id": 3287119,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595648065.482, "dur": 87.129, + "args": { + "External id": 3287120,"Sequence number": 32987233, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595648069.342, "dur": 4.440, + "args": { + "External id": 3287121,"Sequence number": 32987233, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595648070.394, "dur": 3.263, + "args": { + "External id": 3287122,"Sequence number": 32987233, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8425 + } + }, + { + "ph": "s", "id": 99, "pid": 1336756, "tid": 1336756, "ts": 1587595648070.394, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595648074.836, "dur": 70.011, + "args": { + "External id": 3287123,"Sequence number": 32987234, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8426 + } + }, + { + "ph": "s", "id": 98, "pid": 1336756, "tid": 1336756, "ts": 1587595648074.836, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595648146.958, "dur": 4.871, + "args": { + "External id": 3287124,"Sequence number": 32987235, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8427 + } + }, + { + "ph": "s", "id": 97, "pid": 1336756, "tid": 1336756, "ts": 1587595648146.958, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595648161.321, "dur": 64.271, + "args": { + "External id": 3287125,"Sequence number": 32987236, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595648164.133, "dur": 5.899, + "args": { + "External id": 3287126,"Sequence number": 32987236, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8429 + } + }, + { + "ph": "s", "id": 96, "pid": 1336756, "tid": 1336756, "ts": 1587595648164.133, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595648166.034, "dur": 2.820, + "args": { + "External id": 3287127,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595648167.594, "dur": 1.112, + "args": { + "External id": 3287128,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595648170.538, "dur": 54.779, + "args": { + "External id": 3287129,"Sequence number": 32987237, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595648171.721, "dur": 5.348, + "args": { + "External id": 3287130,"Sequence number": 32987237, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595648174.811, "dur": 2.117, + "args": { + "External id": 3287131,"Sequence number": 32987237, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8434 + } + }, + { + "ph": "s", "id": 95, "pid": 1336756, "tid": 1336756, "ts": 1587595648174.811, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595648177.733, "dur": 42.719, + "args": { + "External id": 3287132,"Sequence number": 32987238, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8435 + } + }, + { + "ph": "s", "id": 94, "pid": 1336756, "tid": 1336756, "ts": 1587595648177.733, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595648222.163, "dur": 2.771, + "args": { + "External id": 3287133,"Sequence number": 32987239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8436 + } + }, + { + "ph": "s", "id": 93, "pid": 1336756, "tid": 1336756, "ts": 1587595648222.163, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595648232.796, "dur": 64.506, + "args": { + "External id": 3287134,"Sequence number": 32987240, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595648233.655, "dur": 8.978, + "args": { + "External id": 3287135,"Sequence number": 32987240, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8438 + } + }, + { + "ph": "s", "id": 92, "pid": 1336756, "tid": 1336756, "ts": 1587595648233.655, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595648237.053, "dur": 4.430, + "args": { + "External id": 3287136,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595648238.430, "dur": 2.850, + "args": { + "External id": 3287137,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595648243.322, "dur": 53.581, + "args": { + "External id": 3287138,"Sequence number": 32987241, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595648244.676, "dur": 7.032, + "args": { + "External id": 3287139,"Sequence number": 32987241, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595648247.640, "dur": 3.950, + "args": { + "External id": 3287140,"Sequence number": 32987241, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8443 + } + }, + { + "ph": "s", "id": 91, "pid": 1336756, "tid": 1336756, "ts": 1587595648247.640, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595648252.180, "dur": 38.509, + "args": { + "External id": 3287141,"Sequence number": 32987242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8444 + } + }, + { + "ph": "s", "id": 90, "pid": 1336756, "tid": 1336756, "ts": 1587595648252.180, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595648292.057, "dur": 4.413, + "args": { + "External id": 3287142,"Sequence number": 32987243, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8445 + } + }, + { + "ph": "s", "id": 89, "pid": 1336756, "tid": 1336756, "ts": 1587595648292.057, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595648313.129, "dur": 5.544, + "args": { + "External id": 3287143,"Sequence number": 32987244, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595648313.871, "dur": 4.637, + "args": { + "External id": 3287144,"Sequence number": 32987244, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8447 + } + }, + { + "ph": "s", "id": 88, "pid": 1336756, "tid": 1336756, "ts": 1587595648313.871, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595648325.343, "dur": 3.556, + "args": { + "External id": 3287145,"Sequence number": 32987245, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595648326.491, "dur": 2.270, + "args": { + "External id": 3287146,"Sequence number": 32987245, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8449 + } + }, + { + "ph": "s", "id": 87, "pid": 1336756, "tid": 1336756, "ts": 1587595648326.491, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595648333.461, "dur": 3.319, + "args": { + "External id": 3287147,"Sequence number": 32987246, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595648334.339, "dur": 2.304, + "args": { + "External id": 3287148,"Sequence number": 32987246, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8451 + } + }, + { + "ph": "s", "id": 86, "pid": 1336756, "tid": 1336756, "ts": 1587595648334.339, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595648366.529, "dur": 162.624, + "args": { + "External id": 3287149,"Sequence number": 32987247, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8452 + } + }, + { + "ph": "s", "id": 85, "pid": 1336756, "tid": 1336756, "ts": 1587595648366.529, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595648386.530, "dur": 10.989, + "args": { + "External id": 3287150,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595648389.550, "dur": 7.519, + "args": { + "External id": 3287151,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595648541.058, "dur": 110.576, + "args": { + "External id": 3287152,"Sequence number": 32987248, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8455 + } + }, + { + "ph": "s", "id": 84, "pid": 1336756, "tid": 1336756, "ts": 1587595648541.058, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595648554.751, "dur": 6.830, + "args": { + "External id": 3287153,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595648556.686, "dur": 4.541, + "args": { + "External id": 3287154,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 1336756, "tid": 1336756, + "ts": 1587595648678.726, "dur": 166.784, + "args": { + "External id": 3287155,"Sequence number": 32987249, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], []], "Ev Idx": 8458 + } + }, + { + "ph": "s", "id": 83, "pid": 1336756, "tid": 1336756, "ts": 1587595648678.726, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595648703.068, "dur": 118.985, + "args": { + "External id": 3287156,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 8459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595648749.168, "dur": 6.490, + "args": { + "External id": 3287157,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595648751.033, "dur": 4.286, + "args": { + "External id": 3287158,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595648758.294, "dur": 3.191, + "args": { + "External id": 3287159,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595648762.869, "dur": 1.769, + "args": { + "External id": 3287160,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595648767.071, "dur": 3.427, + "args": { + "External id": 3287161,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1336756, + "ts": 1587595648832.919, "dur": 3.849, + "args": { + "External id": 3287162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 8465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595648849.988, "dur": 4.968, + "args": { + "External id": 3287163,"Sequence number": 32987250, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595648851.556, "dur": 3.263, + "args": { + "External id": 3287164,"Sequence number": 32987250, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8467 + } + }, + { + "ph": "s", "id": 82, "pid": 1336756, "tid": 1336756, "ts": 1587595648851.556, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595648865.256, "dur": 171.107, + "args": { + "External id": 3287165,"Sequence number": 32987251, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595648866.494, "dur": 32.068, + "args": { + "External id": 3287166,"Sequence number": 32987251, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8469 + } + }, + { + "ph": "s", "id": 81, "pid": 1336756, "tid": 1336756, "ts": 1587595648866.494, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595648889.706, "dur": 7.392, + "args": { + "External id": 3287167,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595648894.719, "dur": 1.949, + "args": { + "External id": 3287168,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595648899.773, "dur": 136.202, + "args": { + "External id": 3287169,"Sequence number": 32987252, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595648901.842, "dur": 3.418, + "args": { + "External id": 3287170,"Sequence number": 32987252, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595648902.914, "dur": 2.249, + "args": { + "External id": 3287171,"Sequence number": 32987252, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8474 + } + }, + { + "ph": "s", "id": 80, "pid": 1336756, "tid": 1336756, "ts": 1587595648902.914, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595648906.071, "dur": 117.392, + "args": { + "External id": 3287172,"Sequence number": 32987253, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8475 + } + }, + { + "ph": "s", "id": 79, "pid": 1336756, "tid": 1336756, "ts": 1587595648906.071, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595649027.902, "dur": 7.237, + "args": { + "External id": 3287173,"Sequence number": 32987254, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8476 + } + }, + { + "ph": "s", "id": 78, "pid": 1336756, "tid": 1336756, "ts": 1587595649027.902, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595649073.327, "dur": 212.173, + "args": { + "External id": 3287174,"Sequence number": 32987255, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [16, 4096, 2048], [], [], [], []], "Ev Idx": 8477 + } + }, + { + "ph": "s", "id": 77, "pid": 1336756, "tid": 1336756, "ts": 1587595649073.327, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595649091.754, "dur": 3.203, + "args": { + "External id": 3287175,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595649092.975, "dur": 1.834, + "args": { + "External id": 3287176,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595649098.332, "dur": 6.454, + "args": { + "External id": 3287177,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [2048, 1]], "Input Dims": [[16, 4096, 2048], [65536, 2048]], "Ev Idx": 8480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595649102.580, "dur": 2.091, + "args": { + "External id": 3287178,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595649103.597, "dur": 0.966, + "args": { + "External id": 3287179,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595649112.434, "dur": 7.672, + "args": { + "External id": 3287180,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595649114.721, "dur": 4.867, + "args": { + "External id": 3287181,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595649125.953, "dur": 3.704, + "args": { + "External id": 3287182,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595649133.384, "dur": 3.419, + "args": { + "External id": 3287183,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595649262.909, "dur": 3.238, + "args": { + "External id": 3287184,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595649263.932, "dur": 2.031, + "args": { + "External id": 3287185,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595649268.883, "dur": 2.413, + "args": { + "External id": 3287186,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595649270.154, "dur": 1.032, + "args": { + "External id": 3287187,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595649301.891, "dur": 102.057, + "args": { + "External id": 3287188,"Sequence number": 32987256, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595649302.901, "dur": 10.518, + "args": { + "External id": 3287189,"Sequence number": 32987256, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8492 + } + }, + { + "ph": "s", "id": 76, "pid": 1336756, "tid": 1336756, "ts": 1587595649302.901, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595649305.831, "dur": 6.278, + "args": { + "External id": 3287190,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595649310.304, "dur": 1.425, + "args": { + "External id": 3287191,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595649314.289, "dur": 89.356, + "args": { + "External id": 3287192,"Sequence number": 32987257, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595649316.144, "dur": 3.851, + "args": { + "External id": 3287193,"Sequence number": 32987257, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595649317.111, "dur": 2.748, + "args": { + "External id": 3287194,"Sequence number": 32987257, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8497 + } + }, + { + "ph": "s", "id": 75, "pid": 1336756, "tid": 1336756, "ts": 1587595649317.111, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595649322.905, "dur": 73.318, + "args": { + "External id": 3287195,"Sequence number": 32987258, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8498 + } + }, + { + "ph": "s", "id": 74, "pid": 1336756, "tid": 1336756, "ts": 1587595649322.905, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595649397.938, "dur": 5.226, + "args": { + "External id": 3287196,"Sequence number": 32987259, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8499 + } + }, + { + "ph": "s", "id": 73, "pid": 1336756, "tid": 1336756, "ts": 1587595649397.938, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595649411.576, "dur": 79.696, + "args": { + "External id": 3287197,"Sequence number": 32987260, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595649412.313, "dur": 18.867, + "args": { + "External id": 3287198,"Sequence number": 32987260, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8501 + } + }, + { + "ph": "s", "id": 72, "pid": 1336756, "tid": 1336756, "ts": 1587595649412.313, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595649414.391, "dur": 15.361, + "args": { + "External id": 3287199,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595649425.877, "dur": 3.626, + "args": { + "External id": 3287200,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595649432.045, "dur": 58.997, + "args": { + "External id": 3287201,"Sequence number": 32987261, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595649433.206, "dur": 4.288, + "args": { + "External id": 3287202,"Sequence number": 32987261, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595649434.088, "dur": 3.277, + "args": { + "External id": 3287203,"Sequence number": 32987261, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8506 + } + }, + { + "ph": "s", "id": 71, "pid": 1336756, "tid": 1336756, "ts": 1587595649434.088, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595649438.054, "dur": 46.730, + "args": { + "External id": 3287204,"Sequence number": 32987262, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8507 + } + }, + { + "ph": "s", "id": 70, "pid": 1336756, "tid": 1336756, "ts": 1587595649438.054, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595649486.803, "dur": 3.849, + "args": { + "External id": 3287205,"Sequence number": 32987263, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8508 + } + }, + { + "ph": "s", "id": 69, "pid": 1336756, "tid": 1336756, "ts": 1587595649486.803, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595649509.993, "dur": 144.754, + "args": { + "External id": 3287206,"Sequence number": 32987264, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8509 + } + }, + { + "ph": "s", "id": 68, "pid": 1336756, "tid": 1336756, "ts": 1587595649509.993, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595649544.938, "dur": 4.286, + "args": { + "External id": 3287207,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595649582.234, "dur": 60.354, + "args": { + "External id": 3287208,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595649583.495, "dur": 4.926, + "args": { + "External id": 3287209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 8512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595649584.677, "dur": 2.756, + "args": { + "External id": 3287210,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 8513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595649586.347, "dur": 0.925, + "args": { + "External id": 3287211,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 8514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595649589.640, "dur": 52.542, + "args": { + "External id": 3287212,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[23068672, 5632, 1], [1, 5632]], "Input Dims": [[16, 4096, 5632], [5632, 2048]], "Ev Idx": 8515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595649591.051, "dur": 2.383, + "args": { + "External id": 3287213,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595649591.786, "dur": 1.497, + "args": { + "External id": 3287214,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595649593.974, "dur": 44.032, + "args": { + "External id": 3287215,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 8518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595649640.089, "dur": 1.471, + "args": { + "External id": 3287216,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1336756, + "ts": 1587595649662.354, "dur": 25.071, + "args": { + "External id": 3287217,"Sequence number": 32987265, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 8520 + } + }, + { + "ph": "s", "id": 67, "pid": 1336756, "tid": 1336756, "ts": 1587595649662.354, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595649721.975, "dur": 203.138, + "args": { + "External id": 3287218,"Sequence number": 32987266, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [], [], [], [], []], "Ev Idx": 8521 + } + }, + { + "ph": "s", "id": 66, "pid": 1336756, "tid": 1336756, "ts": 1587595649721.975, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595649738.659, "dur": 3.307, + "args": { + "External id": 3287219,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595649739.639, "dur": 2.172, + "args": { + "External id": 3287220,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595649749.014, "dur": 6.643, + "args": { + "External id": 3287221,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595649751.238, "dur": 3.992, + "args": { + "External id": 3287222,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595649760.906, "dur": 3.321, + "args": { + "External id": 3287223,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595649902.596, "dur": 10.326, + "args": { + "External id": 3287224,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595649907.212, "dur": 5.201, + "args": { + "External id": 3287225,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595649944.467, "dur": 137.403, + "args": { + "External id": 3287226,"Sequence number": 32987267, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595649945.752, "dur": 9.928, + "args": { + "External id": 3287227,"Sequence number": 32987267, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8530 + } + }, + { + "ph": "s", "id": 65, "pid": 1336756, "tid": 1336756, "ts": 1587595649945.752, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595649948.116, "dur": 6.421, + "args": { + "External id": 3287228,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595649952.366, "dur": 1.998, + "args": { + "External id": 3287229,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595649956.724, "dur": 124.823, + "args": { + "External id": 3287230,"Sequence number": 32987268, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595649958.058, "dur": 5.608, + "args": { + "External id": 3287231,"Sequence number": 32987268, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595649959.302, "dur": 4.232, + "args": { + "External id": 3287232,"Sequence number": 32987268, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8535 + } + }, + { + "ph": "s", "id": 64, "pid": 1336756, "tid": 1336756, "ts": 1587595649959.302, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595649964.445, "dur": 108.398, + "args": { + "External id": 3287233,"Sequence number": 32987269, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8536 + } + }, + { + "ph": "s", "id": 63, "pid": 1336756, "tid": 1336756, "ts": 1587595649964.445, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595650076.814, "dur": 3.800, + "args": { + "External id": 3287234,"Sequence number": 32987270, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8537 + } + }, + { + "ph": "s", "id": 62, "pid": 1336756, "tid": 1336756, "ts": 1587595650076.814, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595650092.320, "dur": 70.697, + "args": { + "External id": 3287235,"Sequence number": 32987271, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595650093.134, "dur": 6.627, + "args": { + "External id": 3287236,"Sequence number": 32987271, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8539 + } + }, + { + "ph": "s", "id": 61, "pid": 1336756, "tid": 1336756, "ts": 1587595650093.134, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595650095.457, "dur": 2.946, + "args": { + "External id": 3287237,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595650097.251, "dur": 0.990, + "args": { + "External id": 3287238,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595650102.927, "dur": 59.853, + "args": { + "External id": 3287239,"Sequence number": 32987272, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595650104.439, "dur": 5.986, + "args": { + "External id": 3287240,"Sequence number": 32987272, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595650105.161, "dur": 5.130, + "args": { + "External id": 3287241,"Sequence number": 32987272, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8544 + } + }, + { + "ph": "s", "id": 60, "pid": 1336756, "tid": 1336756, "ts": 1587595650105.161, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595650111.013, "dur": 43.392, + "args": { + "External id": 3287242,"Sequence number": 32987273, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8545 + } + }, + { + "ph": "s", "id": 59, "pid": 1336756, "tid": 1336756, "ts": 1587595650111.013, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595650155.832, "dur": 6.508, + "args": { + "External id": 3287243,"Sequence number": 32987274, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8546 + } + }, + { + "ph": "s", "id": 58, "pid": 1336756, "tid": 1336756, "ts": 1587595650155.832, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595650169.970, "dur": 67.569, + "args": { + "External id": 3287244,"Sequence number": 32987275, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595650170.537, "dur": 14.711, + "args": { + "External id": 3287245,"Sequence number": 32987275, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8548 + } + }, + { + "ph": "s", "id": 57, "pid": 1336756, "tid": 1336756, "ts": 1587595650170.537, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595650177.041, "dur": 6.884, + "args": { + "External id": 3287246,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595650182.999, "dur": 0.785, + "args": { + "External id": 3287247,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595650186.087, "dur": 51.214, + "args": { + "External id": 3287248,"Sequence number": 32987276, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595650189.440, "dur": 4.424, + "args": { + "External id": 3287249,"Sequence number": 32987276, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595650190.514, "dur": 3.196, + "args": { + "External id": 3287250,"Sequence number": 32987276, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8553 + } + }, + { + "ph": "s", "id": 56, "pid": 1336756, "tid": 1336756, "ts": 1587595650190.514, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595650194.399, "dur": 36.992, + "args": { + "External id": 3287251,"Sequence number": 32987277, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8554 + } + }, + { + "ph": "s", "id": 55, "pid": 1336756, "tid": 1336756, "ts": 1587595650194.399, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595650232.862, "dur": 4.164, + "args": { + "External id": 3287252,"Sequence number": 32987278, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8555 + } + }, + { + "ph": "s", "id": 54, "pid": 1336756, "tid": 1336756, "ts": 1587595650232.862, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595650255.266, "dur": 4.284, + "args": { + "External id": 3287253,"Sequence number": 32987279, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595650256.507, "dur": 2.897, + "args": { + "External id": 3287254,"Sequence number": 32987279, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8557 + } + }, + { + "ph": "s", "id": 53, "pid": 1336756, "tid": 1336756, "ts": 1587595650256.507, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595650266.235, "dur": 3.495, + "args": { + "External id": 3287255,"Sequence number": 32987280, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595650267.622, "dur": 1.961, + "args": { + "External id": 3287256,"Sequence number": 32987280, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8559 + } + }, + { + "ph": "s", "id": 52, "pid": 1336756, "tid": 1336756, "ts": 1587595650267.622, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595650273.723, "dur": 6.260, + "args": { + "External id": 3287257,"Sequence number": 32987281, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595650277.377, "dur": 2.458, + "args": { + "External id": 3287258,"Sequence number": 32987281, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 32, 64]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8561 + } + }, + { + "ph": "s", "id": 51, "pid": 1336756, "tid": 1336756, "ts": 1587595650277.377, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595650311.410, "dur": 156.529, + "args": { + "External id": 3287259,"Sequence number": 32987282, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8562 + } + }, + { + "ph": "s", "id": 50, "pid": 1336756, "tid": 1336756, "ts": 1587595650311.410, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595650331.495, "dur": 8.352, + "args": { + "External id": 3287260,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595650334.495, "dur": 4.879, + "args": { + "External id": 3287261,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595650480.155, "dur": 103.510, + "args": { + "External id": 3287262,"Sequence number": 32987283, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "8192"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [], [], [], [], []], "Ev Idx": 8565 + } + }, + { + "ph": "s", "id": 49, "pid": 1336756, "tid": 1336756, "ts": 1587595650480.155, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595650493.797, "dur": 7.122, + "args": { + "External id": 3287263,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595650496.275, "dur": 4.207, + "args": { + "External id": 3287264,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 1336756, "tid": 1336756, + "ts": 1587595650611.124, "dur": 176.182, + "args": { + "External id": 3287265,"Sequence number": 32987284, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], []], "Ev Idx": 8568 + } + }, + { + "ph": "s", "id": 48, "pid": 1336756, "tid": 1336756, "ts": 1587595650611.124, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595650639.523, "dur": 123.791, + "args": { + "External id": 3287266,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 8569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595650687.405, "dur": 6.429, + "args": { + "External id": 3287267,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 8570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595650689.526, "dur": 3.859, + "args": { + "External id": 3287268,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595650698.678, "dur": 3.616, + "args": { + "External id": 3287269,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595650703.628, "dur": 1.561, + "args": { + "External id": 3287270,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595650707.895, "dur": 5.674, + "args": { + "External id": 3287271,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 1336756, "tid": 1336756, + "ts": 1587595650773.755, "dur": 3.934, + "args": { + "External id": 3287272,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 64, 1]], "Input Dims": [[16, 4096, 32, 64]], "Ev Idx": 8575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595650791.895, "dur": 4.526, + "args": { + "External id": 3287273,"Sequence number": 32987285, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595650793.073, "dur": 3.191, + "args": { + "External id": 3287274,"Sequence number": 32987285, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 64, 1], []], "Input Dims": [[16, 4096, 32, 64], []], "Ev Idx": 8577 + } + }, + { + "ph": "s", "id": 47, "pid": 1336756, "tid": 1336756, "ts": 1587595650793.073, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595650806.216, "dur": 116.052, + "args": { + "External id": 3287275,"Sequence number": 32987286, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [2048, 2048], []], "Ev Idx": 8578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595650807.091, "dur": 8.454, + "args": { + "External id": 3287276,"Sequence number": 32987286, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 8579 + } + }, + { + "ph": "s", "id": 46, "pid": 1336756, "tid": 1336756, "ts": 1587595650807.091, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595650811.305, "dur": 3.087, + "args": { + "External id": 3287277,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[2048, 2048], [], []], "Ev Idx": 8580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595650812.911, "dur": 1.289, + "args": { + "External id": 3287278,"Record function id": 0, "Concrete Inputs": ["", "[2048, 2048]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[2048, 2048], [], [], []], "Ev Idx": 8581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595650816.561, "dur": 105.372, + "args": { + "External id": 3287279,"Sequence number": 32987287, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 2048]], "Ev Idx": 8582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595650818.477, "dur": 5.750, + "args": { + "External id": 3287280,"Sequence number": 32987287, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595650821.613, "dur": 2.435, + "args": { + "External id": 3287281,"Sequence number": 32987287, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8584 + } + }, + { + "ph": "s", "id": 45, "pid": 1336756, "tid": 1336756, "ts": 1587595650821.613, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595650825.232, "dur": 88.957, + "args": { + "External id": 3287282,"Sequence number": 32987288, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 2048]], "Ev Idx": 8585 + } + }, + { + "ph": "s", "id": 44, "pid": 1336756, "tid": 1336756, "ts": 1587595650825.232, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595650917.495, "dur": 3.600, + "args": { + "External id": 3287283,"Sequence number": 32987289, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8586 + } + }, + { + "ph": "s", "id": 43, "pid": 1336756, "tid": 1336756, "ts": 1587595650917.495, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595650957.389, "dur": 251.267, + "args": { + "External id": 3287284,"Sequence number": 32987290, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 1], [1], [], [8388608, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [2048], [], [16, 4096, 2048], [], [], [], []], "Ev Idx": 8587 + } + }, + { + "ph": "s", "id": 42, "pid": 1336756, "tid": 1336756, "ts": 1587595650957.389, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595650975.435, "dur": 39.920, + "args": { + "External id": 3287285,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595650978.663, "dur": 36.094, + "args": { + "External id": 3287286,"Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 1336756, "tid": 1336756, + "ts": 1587595651020.659, "dur": 4.140, + "args": { + "External id": 3287287,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [2048, 1]], "Input Dims": [[16, 4096, 2048], [65536, 2048]], "Ev Idx": 8590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595651022.072, "dur": 2.620, + "args": { + "External id": 3287288,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595651023.153, "dur": 1.399, + "args": { + "External id": 3287289,"Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595651032.581, "dur": 8.725, + "args": { + "External id": 3287290,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595651035.071, "dur": 5.882, + "args": { + "External id": 3287291,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595651049.730, "dur": 3.869, + "args": { + "External id": 3287292,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595651057.317, "dur": 2.953, + "args": { + "External id": 3287293,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595651184.643, "dur": 5.955, + "args": { + "External id": 3287294,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595651185.838, "dur": 4.479, + "args": { + "External id": 3287295,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595651192.996, "dur": 2.912, + "args": { + "External id": 3287296,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595651194.421, "dur": 1.176, + "args": { + "External id": 3287297,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595651227.416, "dur": 97.174, + "args": { + "External id": 3287298,"Sequence number": 32987291, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595651228.321, "dur": 9.031, + "args": { + "External id": 3287299,"Sequence number": 32987291, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8602 + } + }, + { + "ph": "s", "id": 41, "pid": 1336756, "tid": 1336756, "ts": 1587595651228.321, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595651232.830, "dur": 3.347, + "args": { + "External id": 3287300,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595651234.786, "dur": 1.200, + "args": { + "External id": 3287301,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595651238.314, "dur": 86.008, + "args": { + "External id": 3287302,"Sequence number": 32987292, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595651239.514, "dur": 8.513, + "args": { + "External id": 3287303,"Sequence number": 32987292, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595651242.764, "dur": 5.138, + "args": { + "External id": 3287304,"Sequence number": 32987292, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8607 + } + }, + { + "ph": "s", "id": 40, "pid": 1336756, "tid": 1336756, "ts": 1587595651242.764, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595651248.880, "dur": 69.301, + "args": { + "External id": 3287305,"Sequence number": 32987293, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8608 + } + }, + { + "ph": "s", "id": 39, "pid": 1336756, "tid": 1336756, "ts": 1587595651248.880, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595651320.119, "dur": 3.596, + "args": { + "External id": 3287306,"Sequence number": 32987294, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8609 + } + }, + { + "ph": "s", "id": 38, "pid": 1336756, "tid": 1336756, "ts": 1587595651320.119, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595651332.963, "dur": 78.529, + "args": { + "External id": 3287307,"Sequence number": 32987295, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [5632, 2048], []], "Ev Idx": 8610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595651333.677, "dur": 11.909, + "args": { + "External id": 3287308,"Sequence number": 32987295, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 8611 + } + }, + { + "ph": "s", "id": 37, "pid": 1336756, "tid": 1336756, "ts": 1587595651333.677, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595651339.436, "dur": 4.966, + "args": { + "External id": 3287309,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[5632, 2048], [], []], "Ev Idx": 8612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595651343.454, "dur": 0.774, + "args": { + "External id": 3287310,"Record function id": 0, "Concrete Inputs": ["", "[2048, 5632]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[5632, 2048], [], [], []], "Ev Idx": 8613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595651346.319, "dur": 64.906, + "args": { + "External id": 3287311,"Sequence number": 32987296, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1, 2048]], "Input Dims": [[16, 4096, 2048], [2048, 5632]], "Ev Idx": 8614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595651347.555, "dur": 6.610, + "args": { + "External id": 3287312,"Sequence number": 32987296, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595651348.349, "dur": 5.465, + "args": { + "External id": 3287313,"Sequence number": 32987296, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[65536, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8616 + } + }, + { + "ph": "s", "id": 36, "pid": 1336756, "tid": 1336756, "ts": 1587595651348.349, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595651354.828, "dur": 49.294, + "args": { + "External id": 3287314,"Sequence number": 32987297, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[65536, 2048], [2048, 5632]], "Ev Idx": 8617 + } + }, + { + "ph": "s", "id": 35, "pid": 1336756, "tid": 1336756, "ts": 1587595651354.828, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595651405.651, "dur": 5.196, + "args": { + "External id": 3287315,"Sequence number": 32987298, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[5632, 1], []], "Input Dims": [[65536, 5632], []], "Ev Idx": 8618 + } + }, + { + "ph": "s", "id": 34, "pid": 1336756, "tid": 1336756, "ts": 1587595651405.651, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595651432.039, "dur": 137.928, + "args": { + "External id": 3287316,"Sequence number": 32987299, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8619 + } + }, + { + "ph": "s", "id": 33, "pid": 1336756, "tid": 1336756, "ts": 1587595651432.039, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595651467.850, "dur": 4.170, + "args": { + "External id": 3287317,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 5632]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595651501.902, "dur": 56.025, + "args": { + "External id": 3287318,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[23068672, 5632, 1], [5632, 1], []], "Input Dims": [[16, 4096, 5632], [2048, 5632], []], "Ev Idx": 8621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595651502.503, "dur": 4.617, + "args": { + "External id": 3287319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 8622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595651503.496, "dur": 2.791, + "args": { + "External id": 3287320,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[5632, 1], [], []], "Input Dims": [[2048, 5632], [], []], "Ev Idx": 8623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595651505.257, "dur": 0.876, + "args": { + "External id": 3287321,"Record function id": 0, "Concrete Inputs": ["", "[5632, 2048]", "[1, 5632]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[5632, 1], [], [], []], "Input Dims": [[2048, 5632], [], [], []], "Ev Idx": 8624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595651507.941, "dur": 49.634, + "args": { + "External id": 3287322,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[23068672, 5632, 1], [1, 5632]], "Input Dims": [[16, 4096, 5632], [5632, 2048]], "Ev Idx": 8625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 1336756, "tid": 1336756, + "ts": 1587595651509.576, "dur": 4.187, + "args": { + "External id": 3287323,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595651512.406, "dur": 1.235, + "args": { + "External id": 3287324,"Record function id": 0, "Concrete Inputs": ["", "[65536, 5632]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[23068672, 5632, 1], []], "Input Dims": [[16, 4096, 5632], []], "Ev Idx": 8627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595651514.279, "dur": 39.660, + "args": { + "External id": 3287325,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632]], "Input Dims": [[65536, 5632], [5632, 2048]], "Ev Idx": 8628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 1336756, "tid": 1336756, + "ts": 1587595651555.787, "dur": 1.170, + "args": { + "External id": 3287326,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1336756, + "ts": 1587595651577.701, "dur": 24.546, + "args": { + "External id": 3287327,"Sequence number": 32987300, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 8630 + } + }, + { + "ph": "s", "id": 32, "pid": 1336756, "tid": 1336756, "ts": 1587595651577.701, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 1336756, "tid": 1336756, + "ts": 1587595651619.251, "dur": 42.475, + "args": { + "External id": 3287328,"Sequence number": 32987301, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "-2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[8388608, 2048, 1], [8388608, 2048, 1], [8388608, 2048, 1], [8388608, 2048, 1]], []], "Input Dims": [[[16, 4096, 2048], [16, 4096, 2048], [16, 4096, 2048], [16, 4096, 2048]], []], "Ev Idx": 8631 + } + }, + { + "ph": "s", "id": 31, "pid": 1336756, "tid": 1336756, "ts": 1587595651619.251, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 1336756, "tid": 1336756, + "ts": 1587595651627.397, "dur": 30.068, + "args": { + "External id": 3287329,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[8388608, 2048, 1], [8388608, 2048, 1], [8388608, 2048, 1], [8388608, 2048, 1]], []], "Input Dims": [[[16, 4096, 2048], [16, 4096, 2048], [16, 4096, 2048], [16, 4096, 2048]], []], "Ev Idx": 8632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595651658.731, "dur": 1.629, + "args": { + "External id": 3287330,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 8192], []], "Ev Idx": 8633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 1336756, "tid": 1336756, + "ts": 1587595651696.691, "dur": 46.934, + "args": { + "External id": 3287331,"Record function id": 0, "Ev Idx": 8634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 1336756, "tid": 1336756, + "ts": 1587595651744.832, "dur": 220.493, + "args": { + "External id": 3287332,"Record function id": 0, "Ev Idx": 8635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595651779.837, "dur": 176.544, + "args": { + "External id": 3287333,"Sequence number": 32987302, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [33554432, 8192, 2048, 1]], "Input Dims": [[2048], [16, 4096, 4, 2048]], "Ev Idx": 8636 + } + }, + { + "ph": "s", "id": 30, "pid": 1336756, "tid": 1336756, "ts": 1587595651779.837, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 1336756, "tid": 1336756, + "ts": 1587595651850.069, "dur": 63.034, + "args": { + "External id": 3287334,"kernel_hash": "cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/d2/cd26ogptnjelaibpl2zumx5nxdttspqepvjkh7lpmsxbzrrexbxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[262144, 2048], [262144, 2048], [2048], [262144], [], [], [], [], [], [], [], [], []], "Ev Idx": 8637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1336756, + "ts": 1587595652096.391, "dur": 43.828, + "args": { + "External id": 3287335,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 8638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595652099.574, "dur": 6.429, + "args": { + "External id": 3287336,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587595652109.219, "dur": 30.704, + "args": { + "External id": 3287337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 8640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595652112.909, "dur": 26.377, + "args": { + "External id": 3287338,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 8641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1336756, + "ts": 1587595652144.906, "dur": 23.821, + "args": { + "External id": 3287339,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 8642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595652146.059, "dur": 3.138, + "args": { + "External id": 3287340,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587595652149.943, "dur": 18.541, + "args": { + "External id": 3287341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 8644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595652153.255, "dur": 14.766, + "args": { + "External id": 3287342,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 8645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1336756, + "ts": 1587595652171.936, "dur": 17.333, + "args": { + "External id": 3287343,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 8646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595652172.783, "dur": 2.981, + "args": { + "External id": 3287344,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587595652176.239, "dur": 12.790, + "args": { + "External id": 3287345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 8648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595652177.324, "dur": 11.358, + "args": { + "External id": 3287346,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 8649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595652198.035, "dur": 0.705, + "args": { + "External id": 3287347,"Sequence number": 32987303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], [], []], "Ev Idx": 8650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 1336756, "tid": 1336756, + "ts": 1587595652206.921, "dur": 11.489, + "args": { + "External id": 3287348,"Sequence number": 32987303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "5", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 8651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652214.344, "dur": 2.418, + "args": { + "External id": 3287349,"Record function id": 0, "Concrete Inputs": ["", "[16, 8188, 5]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 8652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595652224.872, "dur": 7.016, + "args": { + "External id": 3287350,"Sequence number": 32987303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 8188, 5], [], [], [], []], "Ev Idx": 8653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652228.959, "dur": 1.263, + "args": { + "External id": 3287351,"Record function id": 0, "Concrete Inputs": ["", "[16, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 8188, 5], [], [], []], "Ev Idx": 8654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595652233.246, "dur": 3.889, + "args": { + "External id": 3287352,"Sequence number": 32987303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 8188, 5], [], [], [], []], "Ev Idx": 8655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652235.300, "dur": 1.052, + "args": { + "External id": 3287353,"Record function id": 0, "Concrete Inputs": ["", "[16, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 8188, 5], [], [], []], "Ev Idx": 8656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595652238.594, "dur": 3.105, + "args": { + "External id": 3287354,"Sequence number": 32987303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 8188, 5], [], [], [], []], "Ev Idx": 8657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652240.748, "dur": 0.470, + "args": { + "External id": 3287355,"Record function id": 0, "Concrete Inputs": ["", "[16, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 8188, 5], [], [], []], "Ev Idx": 8658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595652244.524, "dur": 3.444, + "args": { + "External id": 3287356,"Sequence number": 32987303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 8188, 4], [], [], [], []], "Ev Idx": 8659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652246.546, "dur": 0.862, + "args": { + "External id": 3287357,"Record function id": 0, "Concrete Inputs": ["", "[16, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 8188, 4], [], [], []], "Ev Idx": 8660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595652249.270, "dur": 4.138, + "args": { + "External id": 3287358,"Sequence number": 32987303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 8188, 4], [], [], [], []], "Ev Idx": 8661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652251.722, "dur": 0.878, + "args": { + "External id": 3287359,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 8188, 4], [], [], []], "Ev Idx": 8662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595652254.537, "dur": 3.584, + "args": { + "External id": 3287360,"Sequence number": 32987303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 4096, 4], [], [], [], []], "Ev Idx": 8663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652256.569, "dur": 1.034, + "args": { + "External id": 3287361,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4096, 4], [], [], []], "Ev Idx": 8664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595652261.755, "dur": 5.300, + "args": { + "External id": 3287362,"Sequence number": 32987303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[16, 4096, 4], [], []], "Ev Idx": 8665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652265.610, "dur": 0.673, + "args": { + "External id": 3287363,"Record function id": 0, "Concrete Inputs": ["", "[16, 4, 4096]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4096, 4], [], [], []], "Ev Idx": 8666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595652271.053, "dur": 3.370, + "args": { + "External id": 3287364,"Sequence number": 32987303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 4, 4096], [], [], [], []], "Ev Idx": 8667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652273.041, "dur": 0.831, + "args": { + "External id": 3287365,"Record function id": 0, "Concrete Inputs": ["", "[16, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 8668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1336756, + "ts": 1587595652277.706, "dur": 8.472, + "args": { + "External id": 3287366,"Sequence number": 32987303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[16, 4, 4096], [], []], "Ev Idx": 8669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652284.200, "dur": 0.907, + "args": { + "External id": 3287367,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 8670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595652287.347, "dur": 3.115, + "args": { + "External id": 3287368,"Sequence number": 32987303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 8671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652289.002, "dur": 0.879, + "args": { + "External id": 3287369,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 8672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595652293.021, "dur": 6.444, + "args": { + "External id": 3287370,"Sequence number": 32987303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 8673 + } + }, + { + "ph": "s", "id": 29, "pid": 1336756, "tid": 1336756, "ts": 1587595652293.021, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652296.979, "dur": 0.870, + "args": { + "External id": 3287371,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 8674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595652303.078, "dur": 5.160, + "args": { + "External id": 3287372,"Sequence number": 32987304, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 8675 + } + }, + { + "ph": "s", "id": 28, "pid": 1336756, "tid": 1336756, "ts": 1587595652303.078, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652306.654, "dur": 0.818, + "args": { + "External id": 3287373,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 8676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1336756, + "ts": 1587595652309.604, "dur": 6.146, + "args": { + "External id": 3287374,"Sequence number": 32987305, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 8677 + } + }, + { + "ph": "s", "id": 27, "pid": 1336756, "tid": 1336756, "ts": 1587595652309.604, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652313.572, "dur": 1.172, + "args": { + "External id": 3287375,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 8678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595652316.717, "dur": 4.580, + "args": { + "External id": 3287376,"Sequence number": 32987306, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 8679 + } + }, + { + "ph": "s", "id": 26, "pid": 1336756, "tid": 1336756, "ts": 1587595652316.717, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652319.557, "dur": 1.038, + "args": { + "External id": 3287377,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 8680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336756, "tid": 1336756, + "ts": 1587595652325.229, "dur": 43.240, + "args": { + "External id": 3287378,"Sequence number": 32987307, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 8681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1336756, + "ts": 1587595652329.857, "dur": 38.381, + "args": { + "External id": 3287379,"Sequence number": 32987307, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 8682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595652332.414, "dur": 7.950, + "args": { + "External id": 3287380,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 8683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595652334.314, "dur": 5.364, + "args": { + "External id": 3287381,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595652341.934, "dur": 25.701, + "args": { + "External id": 3287382,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 8685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595652392.974, "dur": 5.008, + "args": { + "External id": 3287383,"Sequence number": 32987307, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 8686 + } + }, + { + "ph": "s", "id": 25, "pid": 1336756, "tid": 1336756, "ts": 1587595652392.974, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595652400.031, "dur": 1.561, + "args": { + "External id": 3287384,"Sequence number": 32987308, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 8687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595652431.235, "dur": 70948.623, + "args": { + "External id": 3287385,"Sequence number": 32987308, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [1], [2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536], [32000, 2048], [], [], [], [], []], "Ev Idx": 8688 + } + }, + { + "ph": "s", "id": 24, "pid": 1336756, "tid": 1336756, "ts": 1587595652431.235, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336756, "tid": 1336756, + "ts": 1587595652444.542, "dur": 28.295, + "args": { + "External id": 3287386,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1336756, + "ts": 1587595652445.330, "dur": 27.326, + "args": { + "External id": 3287387,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595652446.954, "dur": 4.562, + "args": { + "External id": 3287388,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595652448.325, "dur": 2.746, + "args": { + "External id": 3287389,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595652452.337, "dur": 19.893, + "args": { + "External id": 3287390,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [8192, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 8693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595652489.945, "dur": 26.424, + "args": { + "External id": 3287391,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595652491.311, "dur": 6.942, + "args": { + "External id": 3287392,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 8695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652493.384, "dur": 4.569, + "args": { + "External id": 3287393,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587595652499.108, "dur": 17.078, + "args": { + "External id": 3287394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 8697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595652500.290, "dur": 15.490, + "args": { + "External id": 3287395,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 8698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595652520.078, "dur": 24.402, + "args": { + "External id": 3287396,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 8699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595652521.191, "dur": 6.780, + "args": { + "External id": 3287397,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 8700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652524.984, "dur": 2.753, + "args": { + "External id": 3287398,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587595652528.403, "dur": 15.872, + "args": { + "External id": 3287399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595652529.192, "dur": 14.728, + "args": { + "External id": 3287400,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 8703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1336756, + "ts": 1587595652548.438, "dur": 30.151, + "args": { + "External id": 3287401,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 8704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595652549.948, "dur": 3.584, + "args": { + "External id": 3287402,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587595652563.945, "dur": 14.400, + "args": { + "External id": 3287403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 8706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595652564.526, "dur": 13.423, + "args": { + "External id": 3287404,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 8707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336756, "tid": 1336756, + "ts": 1587595652587.631, "dur": 28.442, + "args": { + "External id": 3287405,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 8708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595652619.448, "dur": 62.005, + "args": { + "External id": 3287406,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 8709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595652622.097, "dur": 58.905, + "args": { + "External id": 3287407,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652628.155, "dur": 1.051, + "args": { + "External id": 3287408,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595652630.615, "dur": 29.453, + "args": { + "External id": 3287409,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595652632.939, "dur": 26.912, + "args": { + "External id": 3287410,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 8713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595652635.820, "dur": 3.016, + "args": { + "External id": 3287411,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595652640.087, "dur": 19.410, + "args": { + "External id": 3287412,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 8715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336756, "tid": 1336756, + "ts": 1587595652686.195, "dur": 64709.486, + "args": { + "External id": 3287413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 8716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336756, "tid": 1336756, + "ts": 1587595652688.704, "dur": 64705.648, + "args": { + "External id": 3287414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 8717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595717406.016, "dur": 9.069, + "args": { + "External id": 3287415,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595717412.183, "dur": 1.059, + "args": { + "External id": 3287416,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595717420.394, "dur": 113.151, + "args": { + "External id": 3287417,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595717422.089, "dur": 6.014, + "args": { + "External id": 3287418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595717424.292, "dur": 2.882, + "args": { + "External id": 3287419,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595717426.021, "dur": 0.917, + "args": { + "External id": 3287420,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595717429.532, "dur": 103.250, + "args": { + "External id": 3287421,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595717433.502, "dur": 98.333, + "args": { + "External id": 3287422,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595717537.272, "dur": 4.482, + "args": { + "External id": 3287423,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595717539.365, "dur": 1.051, + "args": { + "External id": 3287424,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595717551.355, "dur": 3.803, + "args": { + "External id": 3287425,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595717566.301, "dur": 6.959, + "args": { + "External id": 3287426,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595717568.781, "dur": 4.117, + "args": { + "External id": 3287427,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595717730.906, "dur": 305.111, + "args": { + "External id": 3287428,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595717736.926, "dur": 2.342, + "args": { + "External id": 3287429,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595717741.057, "dur": 294.293, + "args": { + "External id": 3287430,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595717744.575, "dur": 0.689, + "args": { + "External id": 3287431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595717748.323, "dur": 30.580, + "args": { + "External id": 3287432,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595717781.649, "dur": 6.769, + "args": { + "External id": 3287433,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595717786.988, "dur": 1.062, + "args": { + "External id": 3287434,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595717790.371, "dur": 32.297, + "args": { + "External id": 3287435,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595717794.027, "dur": 2.091, + "args": { + "External id": 3287436,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595717798.595, "dur": 23.770, + "args": { + "External id": 3287437,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595717803.379, "dur": 3.070, + "args": { + "External id": 3287438,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595717824.892, "dur": 24.765, + "args": { + "External id": 3287439,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595717851.932, "dur": 37.079, + "args": { + "External id": 3287440,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595717893.645, "dur": 20.423, + "args": { + "External id": 3287441,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595717916.078, "dur": 19.204, + "args": { + "External id": 3287442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595717939.085, "dur": 26.998, + "args": { + "External id": 3287443,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595717941.099, "dur": 2.073, + "args": { + "External id": 3287444,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595717947.647, "dur": 0.919, + "args": { + "External id": 3287445,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595717968.149, "dur": 48.445, + "args": { + "External id": 3287446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595718019.369, "dur": 14.218, + "args": { + "External id": 3287447,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595718045.315, "dur": 2.652, + "args": { + "External id": 3287448,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595718055.799, "dur": 4.438, + "args": { + "External id": 3287449,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595718058.649, "dur": 0.773, + "args": { + "External id": 3287450,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595718149.071, "dur": 79.446, + "args": { + "External id": 3287451,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595718236.846, "dur": 5.345, + "args": { + "External id": 3287452,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595718239.586, "dur": 1.469, + "args": { + "External id": 3287453,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595718244.011, "dur": 31.204, + "args": { + "External id": 3287454,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595718280.665, "dur": 6.885, + "args": { + "External id": 3287455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595718282.696, "dur": 4.165, + "args": { + "External id": 3287456,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595718284.853, "dur": 1.762, + "args": { + "External id": 3287457,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595718295.080, "dur": 53.066, + "args": { + "External id": 3287458,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595718296.878, "dur": 50.624, + "args": { + "External id": 3287459,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595718353.461, "dur": 18.766, + "args": { + "External id": 3287460,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595718378.757, "dur": 3.892, + "args": { + "External id": 3287461,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595718380.736, "dur": 0.970, + "args": { + "External id": 3287462,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595718387.104, "dur": 53.654, + "args": { + "External id": 3287463,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595718388.305, "dur": 6.369, + "args": { + "External id": 3287464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595718391.382, "dur": 2.676, + "args": { + "External id": 3287465,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595718392.998, "dur": 0.902, + "args": { + "External id": 3287466,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595718395.607, "dur": 44.820, + "args": { + "External id": 3287467,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595718396.414, "dur": 43.498, + "args": { + "External id": 3287468,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595718444.856, "dur": 3.717, + "args": { + "External id": 3287469,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595718446.907, "dur": 0.657, + "args": { + "External id": 3287470,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595718456.788, "dur": 1.884, + "args": { + "External id": 3287471,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595718467.005, "dur": 8.991, + "args": { + "External id": 3287472,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595718469.516, "dur": 6.114, + "args": { + "External id": 3287473,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595718569.498, "dur": 205.817, + "args": { + "External id": 3287474,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595718571.695, "dur": 2.218, + "args": { + "External id": 3287475,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595718575.224, "dur": 199.683, + "args": { + "External id": 3287476,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595718579.222, "dur": 0.553, + "args": { + "External id": 3287477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595718581.640, "dur": 25.426, + "args": { + "External id": 3287478,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595718608.818, "dur": 4.232, + "args": { + "External id": 3287479,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595718611.351, "dur": 1.252, + "args": { + "External id": 3287480,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595718614.295, "dur": 27.128, + "args": { + "External id": 3287481,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595718615.425, "dur": 2.318, + "args": { + "External id": 3287482,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595718619.182, "dur": 21.875, + "args": { + "External id": 3287483,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595718624.661, "dur": 2.776, + "args": { + "External id": 3287484,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595718642.926, "dur": 24.366, + "args": { + "External id": 3287485,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595718669.094, "dur": 13.462, + "args": { + "External id": 3287486,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595718687.510, "dur": 16.531, + "args": { + "External id": 3287487,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595718705.543, "dur": 13.479, + "args": { + "External id": 3287488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595718721.351, "dur": 21.200, + "args": { + "External id": 3287489,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595718723.773, "dur": 1.822, + "args": { + "External id": 3287490,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595718727.503, "dur": 0.793, + "args": { + "External id": 3287491,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595718744.302, "dur": 13.811, + "args": { + "External id": 3287492,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595718761.432, "dur": 12.312, + "args": { + "External id": 3287493,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595718781.668, "dur": 1.881, + "args": { + "External id": 3287494,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595718792.288, "dur": 3.468, + "args": { + "External id": 3287495,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595718794.450, "dur": 0.561, + "args": { + "External id": 3287496,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595718861.959, "dur": 73.134, + "args": { + "External id": 3287497,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595718942.181, "dur": 5.725, + "args": { + "External id": 3287498,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595718945.516, "dur": 0.922, + "args": { + "External id": 3287499,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595718949.644, "dur": 26.364, + "args": { + "External id": 3287500,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595719011.969, "dur": 10.904, + "args": { + "External id": 3287501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595719017.552, "dur": 4.457, + "args": { + "External id": 3287502,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595719019.492, "dur": 2.114, + "args": { + "External id": 3287503,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595719026.747, "dur": 51.793, + "args": { + "External id": 3287504,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595719028.110, "dur": 49.675, + "args": { + "External id": 3287505,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595719082.625, "dur": 18.688, + "args": { + "External id": 3287506,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595719108.231, "dur": 6.591, + "args": { + "External id": 3287507,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595719112.693, "dur": 1.260, + "args": { + "External id": 3287508,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595719118.991, "dur": 48.650, + "args": { + "External id": 3287509,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595719120.080, "dur": 3.716, + "args": { + "External id": 3287510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595719121.060, "dur": 2.154, + "args": { + "External id": 3287511,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595719122.367, "dur": 0.701, + "args": { + "External id": 3287512,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595719124.321, "dur": 42.937, + "args": { + "External id": 3287513,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595719125.142, "dur": 41.577, + "args": { + "External id": 3287514,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595719172.115, "dur": 5.813, + "args": { + "External id": 3287515,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595719173.984, "dur": 2.889, + "args": { + "External id": 3287516,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595719186.395, "dur": 1.726, + "args": { + "External id": 3287517,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595719196.307, "dur": 5.739, + "args": { + "External id": 3287518,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595719198.004, "dur": 3.769, + "args": { + "External id": 3287519,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595719296.319, "dur": 279.087, + "args": { + "External id": 3287520,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595719298.592, "dur": 2.780, + "args": { + "External id": 3287521,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595719303.477, "dur": 271.553, + "args": { + "External id": 3287522,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595719306.978, "dur": 0.295, + "args": { + "External id": 3287523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595719308.575, "dur": 37.219, + "args": { + "External id": 3287524,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595719347.997, "dur": 4.072, + "args": { + "External id": 3287525,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595719350.470, "dur": 1.051, + "args": { + "External id": 3287526,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595719352.844, "dur": 28.424, + "args": { + "External id": 3287527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595719354.217, "dur": 2.082, + "args": { + "External id": 3287528,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595719357.616, "dur": 23.347, + "args": { + "External id": 3287529,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595719362.994, "dur": 2.905, + "args": { + "External id": 3287530,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595719382.693, "dur": 44.021, + "args": { + "External id": 3287531,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595719428.461, "dur": 36.079, + "args": { + "External id": 3287532,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595719469.511, "dur": 24.786, + "args": { + "External id": 3287533,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595719495.877, "dur": 24.009, + "args": { + "External id": 3287534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595719521.588, "dur": 21.580, + "args": { + "External id": 3287535,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595719523.843, "dur": 1.604, + "args": { + "External id": 3287536,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595719527.111, "dur": 1.133, + "args": { + "External id": 3287537,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595719544.695, "dur": 15.288, + "args": { + "External id": 3287538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595719563.334, "dur": 10.581, + "args": { + "External id": 3287539,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595719581.782, "dur": 1.597, + "args": { + "External id": 3287540,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595719592.036, "dur": 3.795, + "args": { + "External id": 3287541,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595719594.447, "dur": 0.672, + "args": { + "External id": 3287542,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595719663.618, "dur": 49.803, + "args": { + "External id": 3287543,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595719717.993, "dur": 5.633, + "args": { + "External id": 3287544,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595719720.764, "dur": 1.724, + "args": { + "External id": 3287545,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595719725.129, "dur": 22.912, + "args": { + "External id": 3287546,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595719754.987, "dur": 4.967, + "args": { + "External id": 3287547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595719756.397, "dur": 2.975, + "args": { + "External id": 3287548,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595719758.381, "dur": 0.811, + "args": { + "External id": 3287549,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595719762.669, "dur": 40.601, + "args": { + "External id": 3287550,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595719764.003, "dur": 38.736, + "args": { + "External id": 3287551,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595719806.812, "dur": 15.706, + "args": { + "External id": 3287552,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595719856.389, "dur": 6.298, + "args": { + "External id": 3287553,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595719861.179, "dur": 0.717, + "args": { + "External id": 3287554,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595719866.576, "dur": 74.845, + "args": { + "External id": 3287555,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595719867.321, "dur": 21.723, + "args": { + "External id": 3287556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595719867.922, "dur": 20.243, + "args": { + "External id": 3287557,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595719886.645, "dur": 1.028, + "args": { + "External id": 3287558,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595719890.206, "dur": 50.746, + "args": { + "External id": 3287559,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595719892.768, "dur": 47.654, + "args": { + "External id": 3287560,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595719946.753, "dur": 4.457, + "args": { + "External id": 3287561,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595719949.192, "dur": 0.816, + "args": { + "External id": 3287562,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595719957.028, "dur": 1.497, + "args": { + "External id": 3287563,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595719966.141, "dur": 8.400, + "args": { + "External id": 3287564,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595719970.296, "dur": 3.928, + "args": { + "External id": 3287565,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595720106.540, "dur": 201.039, + "args": { + "External id": 3287566,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595720109.545, "dur": 3.393, + "args": { + "External id": 3287567,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595720114.646, "dur": 192.516, + "args": { + "External id": 3287568,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595720118.186, "dur": 0.615, + "args": { + "External id": 3287569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595720119.923, "dur": 25.085, + "args": { + "External id": 3287570,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595720146.673, "dur": 3.683, + "args": { + "External id": 3287571,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595720148.712, "dur": 1.301, + "args": { + "External id": 3287572,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595720151.188, "dur": 27.140, + "args": { + "External id": 3287573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595720156.885, "dur": 1.540, + "args": { + "External id": 3287574,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595720159.542, "dur": 18.521, + "args": { + "External id": 3287575,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595720162.413, "dur": 2.621, + "args": { + "External id": 3287576,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595720179.715, "dur": 22.663, + "args": { + "External id": 3287577,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595720204.360, "dur": 14.632, + "args": { + "External id": 3287578,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595720222.200, "dur": 15.226, + "args": { + "External id": 3287579,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595720239.137, "dur": 13.335, + "args": { + "External id": 3287580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595720254.190, "dur": 23.664, + "args": { + "External id": 3287581,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595720256.456, "dur": 1.586, + "args": { + "External id": 3287582,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595720262.575, "dur": 0.833, + "args": { + "External id": 3287583,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595720279.379, "dur": 12.889, + "args": { + "External id": 3287584,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595720293.374, "dur": 12.446, + "args": { + "External id": 3287585,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595720314.283, "dur": 2.218, + "args": { + "External id": 3287586,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595720325.489, "dur": 3.641, + "args": { + "External id": 3287587,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595720327.878, "dur": 0.450, + "args": { + "External id": 3287588,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595720396.849, "dur": 59.536, + "args": { + "External id": 3287589,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595720461.510, "dur": 8.959, + "args": { + "External id": 3287590,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595720466.412, "dur": 3.011, + "args": { + "External id": 3287591,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595720472.055, "dur": 26.201, + "args": { + "External id": 3287592,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595720503.035, "dur": 5.110, + "args": { + "External id": 3287593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595720504.489, "dur": 3.016, + "args": { + "External id": 3287594,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595720506.292, "dur": 0.982, + "args": { + "External id": 3287595,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595720510.959, "dur": 44.948, + "args": { + "External id": 3287596,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595720514.451, "dur": 40.708, + "args": { + "External id": 3287597,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595720559.595, "dur": 15.320, + "args": { + "External id": 3287598,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595720580.624, "dur": 3.871, + "args": { + "External id": 3287599,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595720582.848, "dur": 0.927, + "args": { + "External id": 3287600,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595720588.233, "dur": 50.099, + "args": { + "External id": 3287601,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595720589.039, "dur": 6.145, + "args": { + "External id": 3287602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595720589.918, "dur": 4.713, + "args": { + "External id": 3287603,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595720593.545, "dur": 0.765, + "args": { + "External id": 3287604,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595720595.777, "dur": 42.159, + "args": { + "External id": 3287605,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595720596.655, "dur": 40.745, + "args": { + "External id": 3287606,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595720642.108, "dur": 4.139, + "args": { + "External id": 3287607,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595720644.430, "dur": 0.749, + "args": { + "External id": 3287608,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595720651.254, "dur": 1.691, + "args": { + "External id": 3287609,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595720660.463, "dur": 7.304, + "args": { + "External id": 3287610,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595720664.424, "dur": 3.046, + "args": { + "External id": 3287611,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595720749.731, "dur": 210.231, + "args": { + "External id": 3287612,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595720751.739, "dur": 3.968, + "args": { + "External id": 3287613,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595720757.835, "dur": 201.634, + "args": { + "External id": 3287614,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595720758.937, "dur": 0.263, + "args": { + "External id": 3287615,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595720760.004, "dur": 19.919, + "args": { + "External id": 3287616,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595720781.696, "dur": 5.734, + "args": { + "External id": 3287617,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595720786.207, "dur": 1.001, + "args": { + "External id": 3287618,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595720788.390, "dur": 22.993, + "args": { + "External id": 3287619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595720791.659, "dur": 1.585, + "args": { + "External id": 3287620,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595720794.377, "dur": 16.751, + "args": { + "External id": 3287621,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595720796.486, "dur": 2.409, + "args": { + "External id": 3287622,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595720812.597, "dur": 20.019, + "args": { + "External id": 3287623,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595720833.967, "dur": 14.044, + "args": { + "External id": 3287624,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595720850.653, "dur": 14.270, + "args": { + "External id": 3287625,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595720865.998, "dur": 31.456, + "args": { + "External id": 3287626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595720900.379, "dur": 30.103, + "args": { + "External id": 3287627,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595720902.321, "dur": 2.282, + "args": { + "External id": 3287628,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595720908.962, "dur": 3.199, + "args": { + "External id": 3287629,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595720932.019, "dur": 13.587, + "args": { + "External id": 3287630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595720946.600, "dur": 11.573, + "args": { + "External id": 3287631,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595720967.092, "dur": 2.089, + "args": { + "External id": 3287632,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595720978.029, "dur": 38.894, + "args": { + "External id": 3287633,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595720980.577, "dur": 34.711, + "args": { + "External id": 3287634,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595721093.040, "dur": 58.642, + "args": { + "External id": 3287635,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595721159.588, "dur": 5.193, + "args": { + "External id": 3287636,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595721162.366, "dur": 1.424, + "args": { + "External id": 3287637,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595721166.490, "dur": 24.131, + "args": { + "External id": 3287638,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595721195.227, "dur": 6.014, + "args": { + "External id": 3287639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595721196.719, "dur": 3.913, + "args": { + "External id": 3287640,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595721198.901, "dur": 1.533, + "args": { + "External id": 3287641,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595721206.496, "dur": 41.845, + "args": { + "External id": 3287642,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595721207.739, "dur": 40.093, + "args": { + "External id": 3287643,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595721252.332, "dur": 16.507, + "args": { + "External id": 3287644,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595721274.564, "dur": 3.804, + "args": { + "External id": 3287645,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595721276.531, "dur": 1.102, + "args": { + "External id": 3287646,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595721282.222, "dur": 49.898, + "args": { + "External id": 3287647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595721283.097, "dur": 6.168, + "args": { + "External id": 3287648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595721286.203, "dur": 2.433, + "args": { + "External id": 3287649,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595721287.645, "dur": 0.847, + "args": { + "External id": 3287650,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595721289.793, "dur": 41.895, + "args": { + "External id": 3287651,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595721290.385, "dur": 40.788, + "args": { + "External id": 3287652,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595721335.858, "dur": 3.499, + "args": { + "External id": 3287653,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595721337.854, "dur": 0.531, + "args": { + "External id": 3287654,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595721347.054, "dur": 1.719, + "args": { + "External id": 3287655,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595721356.239, "dur": 7.168, + "args": { + "External id": 3287656,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595721358.516, "dur": 4.634, + "args": { + "External id": 3287657,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595721451.673, "dur": 204.896, + "args": { + "External id": 3287658,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595721456.091, "dur": 2.638, + "args": { + "External id": 3287659,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595721459.954, "dur": 196.056, + "args": { + "External id": 3287660,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595721463.385, "dur": 0.501, + "args": { + "External id": 3287661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595721465.072, "dur": 21.459, + "args": { + "External id": 3287662,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595721488.248, "dur": 13.288, + "args": { + "External id": 3287663,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595721500.098, "dur": 1.130, + "args": { + "External id": 3287664,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595721502.418, "dur": 22.297, + "args": { + "External id": 3287665,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595721503.687, "dur": 1.853, + "args": { + "External id": 3287666,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595721506.513, "dur": 17.813, + "args": { + "External id": 3287667,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595721509.016, "dur": 2.735, + "args": { + "External id": 3287668,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595721526.068, "dur": 21.362, + "args": { + "External id": 3287669,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595721548.712, "dur": 16.736, + "args": { + "External id": 3287670,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595721570.462, "dur": 14.858, + "args": { + "External id": 3287671,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595721586.631, "dur": 13.913, + "args": { + "External id": 3287672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595721604.579, "dur": 20.666, + "args": { + "External id": 3287673,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595721606.431, "dur": 1.610, + "args": { + "External id": 3287674,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595721610.174, "dur": 0.613, + "args": { + "External id": 3287675,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595721628.581, "dur": 13.181, + "args": { + "External id": 3287676,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595721642.864, "dur": 12.044, + "args": { + "External id": 3287677,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595721662.520, "dur": 1.520, + "args": { + "External id": 3287678,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595721671.679, "dur": 3.963, + "args": { + "External id": 3287679,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595721674.228, "dur": 0.582, + "args": { + "External id": 3287680,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595721736.014, "dur": 47.811, + "args": { + "External id": 3287681,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595721787.871, "dur": 4.280, + "args": { + "External id": 3287682,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595721790.311, "dur": 0.913, + "args": { + "External id": 3287683,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595721793.508, "dur": 24.765, + "args": { + "External id": 3287684,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595721824.466, "dur": 7.377, + "args": { + "External id": 3287685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595721825.741, "dur": 5.491, + "args": { + "External id": 3287686,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595721827.731, "dur": 3.259, + "args": { + "External id": 3287687,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595721833.869, "dur": 56.698, + "args": { + "External id": 3287688,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595721834.627, "dur": 54.624, + "args": { + "External id": 3287689,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595721895.912, "dur": 18.012, + "args": { + "External id": 3287690,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595721920.377, "dur": 5.879, + "args": { + "External id": 3287691,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595721924.677, "dur": 0.690, + "args": { + "External id": 3287692,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595721929.844, "dur": 89.635, + "args": { + "External id": 3287693,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595721930.617, "dur": 4.674, + "args": { + "External id": 3287694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595721931.698, "dur": 2.792, + "args": { + "External id": 3287695,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595721933.403, "dur": 0.903, + "args": { + "External id": 3287696,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595721935.801, "dur": 83.165, + "args": { + "External id": 3287697,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595721938.890, "dur": 78.796, + "args": { + "External id": 3287698,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595722026.329, "dur": 4.973, + "args": { + "External id": 3287699,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595722028.826, "dur": 1.271, + "args": { + "External id": 3287700,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595722037.944, "dur": 2.008, + "args": { + "External id": 3287701,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595722047.377, "dur": 6.748, + "args": { + "External id": 3287702,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595722049.461, "dur": 4.398, + "args": { + "External id": 3287703,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595722142.149, "dur": 193.052, + "args": { + "External id": 3287704,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595722144.337, "dur": 2.474, + "args": { + "External id": 3287705,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595722150.827, "dur": 183.903, + "args": { + "External id": 3287706,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595722152.520, "dur": 0.283, + "args": { + "External id": 3287707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595722153.990, "dur": 23.398, + "args": { + "External id": 3287708,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595722178.806, "dur": 5.138, + "args": { + "External id": 3287709,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595722181.074, "dur": 2.577, + "args": { + "External id": 3287710,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595722184.791, "dur": 23.010, + "args": { + "External id": 3287711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595722185.774, "dur": 1.710, + "args": { + "External id": 3287712,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595722188.607, "dur": 18.920, + "args": { + "External id": 3287713,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595722191.242, "dur": 2.794, + "args": { + "External id": 3287714,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595722211.631, "dur": 20.137, + "args": { + "External id": 3287715,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595722233.148, "dur": 14.775, + "args": { + "External id": 3287716,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595722250.521, "dur": 14.757, + "args": { + "External id": 3287717,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595722266.641, "dur": 14.198, + "args": { + "External id": 3287718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595722282.344, "dur": 23.628, + "args": { + "External id": 3287719,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595722284.074, "dur": 1.911, + "args": { + "External id": 3287720,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595722288.524, "dur": 3.073, + "args": { + "External id": 3287721,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595722309.261, "dur": 12.292, + "args": { + "External id": 3287722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595722322.562, "dur": 11.038, + "args": { + "External id": 3287723,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595722340.702, "dur": 1.607, + "args": { + "External id": 3287724,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595722349.972, "dur": 3.733, + "args": { + "External id": 3287725,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595722352.251, "dur": 0.709, + "args": { + "External id": 3287726,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595722419.441, "dur": 52.829, + "args": { + "External id": 3287727,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595722477.084, "dur": 4.326, + "args": { + "External id": 3287728,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595722479.594, "dur": 0.906, + "args": { + "External id": 3287729,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595722485.179, "dur": 23.391, + "args": { + "External id": 3287730,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595722512.550, "dur": 5.833, + "args": { + "External id": 3287731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595722514.038, "dur": 3.734, + "args": { + "External id": 3287732,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595722516.132, "dur": 1.477, + "args": { + "External id": 3287733,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595722520.962, "dur": 40.128, + "args": { + "External id": 3287734,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595722522.155, "dur": 38.199, + "args": { + "External id": 3287735,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595722564.779, "dur": 14.804, + "args": { + "External id": 3287736,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595722587.029, "dur": 3.713, + "args": { + "External id": 3287737,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595722589.065, "dur": 0.931, + "args": { + "External id": 3287738,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595722594.569, "dur": 53.303, + "args": { + "External id": 3287739,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595722595.193, "dur": 3.647, + "args": { + "External id": 3287740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595722595.862, "dur": 2.437, + "args": { + "External id": 3287741,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595722597.215, "dur": 0.958, + "args": { + "External id": 3287742,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595722601.659, "dur": 45.846, + "args": { + "External id": 3287743,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595722602.383, "dur": 44.550, + "args": { + "External id": 3287744,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595722651.403, "dur": 3.956, + "args": { + "External id": 3287745,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595722653.351, "dur": 1.072, + "args": { + "External id": 3287746,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595722660.176, "dur": 1.269, + "args": { + "External id": 3287747,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595722667.734, "dur": 7.987, + "args": { + "External id": 3287748,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595722671.512, "dur": 3.940, + "args": { + "External id": 3287749,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595722750.107, "dur": 207.399, + "args": { + "External id": 3287750,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595722754.055, "dur": 2.338, + "args": { + "External id": 3287751,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595722757.342, "dur": 199.743, + "args": { + "External id": 3287752,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595722758.520, "dur": 0.255, + "args": { + "External id": 3287753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595722759.891, "dur": 20.079, + "args": { + "External id": 3287754,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595722781.417, "dur": 5.446, + "args": { + "External id": 3287755,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595722785.683, "dur": 0.975, + "args": { + "External id": 3287756,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595722790.163, "dur": 20.633, + "args": { + "External id": 3287757,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595722791.260, "dur": 1.572, + "args": { + "External id": 3287758,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595722794.064, "dur": 16.482, + "args": { + "External id": 3287759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595722796.279, "dur": 2.216, + "args": { + "External id": 3287760,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595722811.990, "dur": 20.188, + "args": { + "External id": 3287761,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595722833.307, "dur": 15.558, + "args": { + "External id": 3287762,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595722851.441, "dur": 14.911, + "args": { + "External id": 3287763,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595722867.489, "dur": 32.286, + "args": { + "External id": 3287764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595722902.491, "dur": 25.622, + "args": { + "External id": 3287765,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595722906.633, "dur": 2.239, + "args": { + "External id": 3287766,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595722911.046, "dur": 1.219, + "args": { + "External id": 3287767,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595722929.502, "dur": 13.771, + "args": { + "External id": 3287768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595722944.145, "dur": 11.839, + "args": { + "External id": 3287769,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595722964.735, "dur": 2.182, + "args": { + "External id": 3287770,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595722976.706, "dur": 3.804, + "args": { + "External id": 3287771,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595722979.105, "dur": 0.592, + "args": { + "External id": 3287772,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595723085.707, "dur": 62.379, + "args": { + "External id": 3287773,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595723155.927, "dur": 5.385, + "args": { + "External id": 3287774,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595723159.066, "dur": 0.936, + "args": { + "External id": 3287775,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595723162.830, "dur": 25.820, + "args": { + "External id": 3287776,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595723193.437, "dur": 6.239, + "args": { + "External id": 3287777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595723195.021, "dur": 4.079, + "args": { + "External id": 3287778,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595723197.096, "dur": 1.714, + "args": { + "External id": 3287779,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595723204.723, "dur": 42.247, + "args": { + "External id": 3287780,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595723205.930, "dur": 40.410, + "args": { + "External id": 3287781,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595723250.806, "dur": 15.559, + "args": { + "External id": 3287782,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595723271.296, "dur": 29.433, + "args": { + "External id": 3287783,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595723273.582, "dur": 26.760, + "args": { + "External id": 3287784,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595723279.259, "dur": 2.891, + "args": { + "External id": 3287785,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595723306.028, "dur": 31.291, + "args": { + "External id": 3287786,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595723307.904, "dur": 29.180, + "args": { + "External id": 3287787,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 9090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595723314.443, "dur": 4.811, + "args": { + "External id": 3287788,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595723320.608, "dur": 15.894, + "args": { + "External id": 3287789,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595723350.374, "dur": 5.430, + "args": { + "External id": 3287790,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595723352.350, "dur": 3.194, + "args": { + "External id": 3287791,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595723356.914, "dur": 1.257, + "args": { + "External id": 3287792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595723357.394, "dur": 0.656, + "args": { + "External id": 3287793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595723396.835, "dur": 23.702, + "args": { + "External id": 3287794,"Sequence number": 32987309, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 9097 + } + }, + { + "ph": "s", "id": 23, "pid": 1336756, "tid": 1336756, "ts": 1587595723396.835, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595723428.639, "dur": 5.691, + "args": { + "External id": 3287795,"Sequence number": 32987310, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 4, 4096], [], [], [], []], "Ev Idx": 9098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595723431.834, "dur": 1.143, + "args": { + "External id": 3287796,"Record function id": 0, "Concrete Inputs": ["", "[16, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 9099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1336756, + "ts": 1587595723436.770, "dur": 6.565, + "args": { + "External id": 3287797,"Sequence number": 32987310, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[16, 4, 4096], [], []], "Ev Idx": 9100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595723440.880, "dur": 1.286, + "args": { + "External id": 3287798,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 9101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595723444.654, "dur": 3.327, + "args": { + "External id": 3287799,"Sequence number": 32987310, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 9102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595723446.522, "dur": 0.949, + "args": { + "External id": 3287800,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 9103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595723452.444, "dur": 7.898, + "args": { + "External id": 3287801,"Sequence number": 32987310, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 9104 + } + }, + { + "ph": "s", "id": 22, "pid": 1336756, "tid": 1336756, "ts": 1587595723452.444, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595723458.118, "dur": 0.960, + "args": { + "External id": 3287802,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595723461.263, "dur": 5.258, + "args": { + "External id": 3287803,"Sequence number": 32987311, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 9106 + } + }, + { + "ph": "s", "id": 21, "pid": 1336756, "tid": 1336756, "ts": 1587595723461.263, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595723464.761, "dur": 1.025, + "args": { + "External id": 3287804,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1336756, + "ts": 1587595723467.514, "dur": 6.945, + "args": { + "External id": 3287805,"Sequence number": 32987312, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 9108 + } + }, + { + "ph": "s", "id": 20, "pid": 1336756, "tid": 1336756, "ts": 1587595723467.514, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595723472.684, "dur": 0.820, + "args": { + "External id": 3287806,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "2048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595723475.447, "dur": 4.341, + "args": { + "External id": 3287807,"Sequence number": 32987313, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 9110 + } + }, + { + "ph": "s", "id": 19, "pid": 1336756, "tid": 1336756, "ts": 1587595723475.447, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595723478.287, "dur": 0.790, + "args": { + "External id": 3287808,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "2048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 9111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336756, "tid": 1336756, + "ts": 1587595723483.642, "dur": 30.352, + "args": { + "External id": 3287809,"Sequence number": 32987314, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1336756, + "ts": 1587595723485.108, "dur": 28.652, + "args": { + "External id": 3287810,"Sequence number": 32987314, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595723487.410, "dur": 8.347, + "args": { + "External id": 3287811,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 9114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595723491.864, "dur": 3.379, + "args": { + "External id": 3287812,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595723496.551, "dur": 16.722, + "args": { + "External id": 3287813,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 9116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595723537.292, "dur": 4.524, + "args": { + "External id": 3287814,"Sequence number": 32987314, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 9117 + } + }, + { + "ph": "s", "id": 18, "pid": 1336756, "tid": 1336756, "ts": 1587595723537.292, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595723546.326, "dur": 1.066, + "args": { + "External id": 3287815,"Sequence number": 32987315, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595723579.277, "dur": 41240.449, + "args": { + "External id": 3287816,"Sequence number": 32987315, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [1], [2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536], [32000, 2048], [], [], [], [], []], "Ev Idx": 9119 + } + }, + { + "ph": "s", "id": 17, "pid": 1336756, "tid": 1336756, "ts": 1587595723579.277, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336756, "tid": 1336756, + "ts": 1587595723592.800, "dur": 25.593, + "args": { + "External id": 3287817,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1336756, + "ts": 1587595723593.579, "dur": 24.547, + "args": { + "External id": 3287818,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595723595.018, "dur": 5.227, + "args": { + "External id": 3287819,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595723596.187, "dur": 3.686, + "args": { + "External id": 3287820,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595723601.012, "dur": 16.779, + "args": { + "External id": 3287821,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [8192, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 9124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595723633.547, "dur": 31.080, + "args": { + "External id": 3287822,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595723634.908, "dur": 6.022, + "args": { + "External id": 3287823,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595723636.972, "dur": 3.619, + "args": { + "External id": 3287824,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587595723642.269, "dur": 22.139, + "args": { + "External id": 3287825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595723646.302, "dur": 17.640, + "args": { + "External id": 3287826,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595723667.834, "dur": 21.185, + "args": { + "External id": 3287827,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595723668.694, "dur": 4.746, + "args": { + "External id": 3287828,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595723670.054, "dur": 3.100, + "args": { + "External id": 3287829,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587595723674.088, "dur": 14.716, + "args": { + "External id": 3287830,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595723674.587, "dur": 13.817, + "args": { + "External id": 3287831,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 9134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1336756, + "ts": 1587595723694.522, "dur": 19.941, + "args": { + "External id": 3287832,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595723695.904, "dur": 2.961, + "args": { + "External id": 3287833,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587595723699.357, "dur": 14.847, + "args": { + "External id": 3287834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 9137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595723702.238, "dur": 11.693, + "args": { + "External id": 3287835,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336756, "tid": 1336756, + "ts": 1587595723718.888, "dur": 21.626, + "args": { + "External id": 3287836,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595723742.735, "dur": 49.027, + "args": { + "External id": 3287837,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595723744.496, "dur": 46.897, + "args": { + "External id": 3287838,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595723748.803, "dur": 3.100, + "args": { + "External id": 3287839,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595723753.026, "dur": 22.414, + "args": { + "External id": 3287840,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595723754.714, "dur": 20.502, + "args": { + "External id": 3287841,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 9144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595723757.075, "dur": 2.832, + "args": { + "External id": 3287842,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595723760.573, "dur": 14.239, + "args": { + "External id": 3287843,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 9146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336756, "tid": 1336756, + "ts": 1587595723795.514, "dur": 35367.610, + "args": { + "External id": 3287844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336756, "tid": 1336756, + "ts": 1587595723796.779, "dur": 35365.042, + "args": { + "External id": 3287845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595759195.716, "dur": 12.780, + "args": { + "External id": 3287846,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595759205.631, "dur": 1.166, + "args": { + "External id": 3287847,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595759214.087, "dur": 110.039, + "args": { + "External id": 3287848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595759215.573, "dur": 6.869, + "args": { + "External id": 3287849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595759218.426, "dur": 2.943, + "args": { + "External id": 3287850,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595759220.360, "dur": 0.712, + "args": { + "External id": 3287851,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595759226.132, "dur": 97.345, + "args": { + "External id": 3287852,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595759227.843, "dur": 94.846, + "args": { + "External id": 3287853,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595759327.833, "dur": 5.108, + "args": { + "External id": 3287854,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595759330.330, "dur": 1.316, + "args": { + "External id": 3287855,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595759341.026, "dur": 2.368, + "args": { + "External id": 3287856,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595759352.975, "dur": 9.400, + "args": { + "External id": 3287857,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595759357.428, "dur": 4.654, + "args": { + "External id": 3287858,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595759482.599, "dur": 210.958, + "args": { + "External id": 3287859,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595759486.745, "dur": 2.351, + "args": { + "External id": 3287860,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595759490.547, "dur": 202.674, + "args": { + "External id": 3287861,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595759492.125, "dur": 0.597, + "args": { + "External id": 3287862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595759493.952, "dur": 27.983, + "args": { + "External id": 3287863,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595759523.873, "dur": 5.883, + "args": { + "External id": 3287864,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595759528.428, "dur": 1.034, + "args": { + "External id": 3287865,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595759530.839, "dur": 28.707, + "args": { + "External id": 3287866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595759534.751, "dur": 1.990, + "args": { + "External id": 3287867,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595759537.983, "dur": 21.338, + "args": { + "External id": 3287868,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595759541.758, "dur": 3.072, + "args": { + "External id": 3287869,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595759561.170, "dur": 23.668, + "args": { + "External id": 3287870,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595759586.446, "dur": 18.630, + "args": { + "External id": 3287871,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595759608.394, "dur": 15.264, + "args": { + "External id": 3287872,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595759625.139, "dur": 13.853, + "args": { + "External id": 3287873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595759640.858, "dur": 23.958, + "args": { + "External id": 3287874,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595759642.539, "dur": 1.785, + "args": { + "External id": 3287875,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595759649.123, "dur": 0.797, + "args": { + "External id": 3287876,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595759666.636, "dur": 13.078, + "args": { + "External id": 3287877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595759680.865, "dur": 11.403, + "args": { + "External id": 3287878,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595759699.749, "dur": 1.608, + "args": { + "External id": 3287879,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595759707.476, "dur": 4.021, + "args": { + "External id": 3287880,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595759709.887, "dur": 0.686, + "args": { + "External id": 3287881,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595759778.552, "dur": 63.442, + "args": { + "External id": 3287882,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595759849.444, "dur": 5.871, + "args": { + "External id": 3287883,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595759852.051, "dur": 1.137, + "args": { + "External id": 3287884,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595759856.844, "dur": 45.961, + "args": { + "External id": 3287885,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595759909.985, "dur": 6.750, + "args": { + "External id": 3287886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595759911.833, "dur": 4.126, + "args": { + "External id": 3287887,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595759914.212, "dur": 1.500, + "args": { + "External id": 3287888,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595759922.296, "dur": 50.040, + "args": { + "External id": 3287889,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595759923.492, "dur": 48.227, + "args": { + "External id": 3287890,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595759976.608, "dur": 54.318, + "args": { + "External id": 3287891,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595760039.800, "dur": 5.066, + "args": { + "External id": 3287892,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595760042.507, "dur": 1.050, + "args": { + "External id": 3287893,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595760050.019, "dur": 58.212, + "args": { + "External id": 3287894,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595760051.068, "dur": 6.777, + "args": { + "External id": 3287895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595760054.639, "dur": 2.467, + "args": { + "External id": 3287896,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595760056.257, "dur": 0.688, + "args": { + "External id": 3287897,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595760058.666, "dur": 49.161, + "args": { + "External id": 3287898,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595760059.614, "dur": 47.530, + "args": { + "External id": 3287899,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595760112.881, "dur": 3.610, + "args": { + "External id": 3287900,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595760114.664, "dur": 0.701, + "args": { + "External id": 3287901,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595760126.200, "dur": 1.649, + "args": { + "External id": 3287902,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595760135.753, "dur": 8.757, + "args": { + "External id": 3287903,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595760137.857, "dur": 6.255, + "args": { + "External id": 3287904,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595760239.421, "dur": 210.445, + "args": { + "External id": 3287905,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595760246.739, "dur": 2.240, + "args": { + "External id": 3287906,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595760250.443, "dur": 198.947, + "args": { + "External id": 3287907,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595760256.673, "dur": 0.233, + "args": { + "External id": 3287908,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595760257.940, "dur": 23.563, + "args": { + "External id": 3287909,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595760283.227, "dur": 7.583, + "args": { + "External id": 3287910,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595760287.563, "dur": 2.904, + "args": { + "External id": 3287911,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595760291.876, "dur": 24.046, + "args": { + "External id": 3287912,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595760293.260, "dur": 1.551, + "args": { + "External id": 3287913,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595760296.356, "dur": 19.301, + "args": { + "External id": 3287914,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595760299.055, "dur": 2.807, + "args": { + "External id": 3287915,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595760317.662, "dur": 23.590, + "args": { + "External id": 3287916,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595760343.016, "dur": 14.059, + "args": { + "External id": 3287917,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595760362.715, "dur": 16.658, + "args": { + "External id": 3287918,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595760380.667, "dur": 13.287, + "args": { + "External id": 3287919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595760395.413, "dur": 23.079, + "args": { + "External id": 3287920,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595760397.244, "dur": 1.509, + "args": { + "External id": 3287921,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595760400.629, "dur": 2.848, + "args": { + "External id": 3287922,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595760419.957, "dur": 13.221, + "args": { + "External id": 3287923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595760436.410, "dur": 11.846, + "args": { + "External id": 3287924,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595760456.311, "dur": 2.130, + "args": { + "External id": 3287925,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595760466.994, "dur": 3.179, + "args": { + "External id": 3287926,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595760468.992, "dur": 0.377, + "args": { + "External id": 3287927,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595760534.474, "dur": 57.483, + "args": { + "External id": 3287928,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595760596.504, "dur": 4.912, + "args": { + "External id": 3287929,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595760599.396, "dur": 1.110, + "args": { + "External id": 3287930,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595760602.991, "dur": 24.367, + "args": { + "External id": 3287931,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595760631.912, "dur": 8.300, + "args": { + "External id": 3287932,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595760635.677, "dur": 3.828, + "args": { + "External id": 3287933,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595760637.390, "dur": 1.827, + "args": { + "External id": 3287934,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595760642.927, "dur": 42.844, + "args": { + "External id": 3287935,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595760643.959, "dur": 41.043, + "args": { + "External id": 3287936,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595760689.874, "dur": 16.212, + "args": { + "External id": 3287937,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595760711.671, "dur": 6.162, + "args": { + "External id": 3287938,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595760716.377, "dur": 0.749, + "args": { + "External id": 3287939,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595760721.630, "dur": 48.745, + "args": { + "External id": 3287940,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595760722.487, "dur": 3.348, + "args": { + "External id": 3287941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595760723.449, "dur": 1.880, + "args": { + "External id": 3287942,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595760724.523, "dur": 0.677, + "args": { + "External id": 3287943,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595760726.676, "dur": 43.288, + "args": { + "External id": 3287944,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595760727.308, "dur": 42.044, + "args": { + "External id": 3287945,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595760774.670, "dur": 3.658, + "args": { + "External id": 3287946,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595760776.664, "dur": 0.733, + "args": { + "External id": 3287947,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595760786.327, "dur": 1.416, + "args": { + "External id": 3287948,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595760795.293, "dur": 5.337, + "args": { + "External id": 3287949,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595760796.726, "dur": 3.569, + "args": { + "External id": 3287950,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595760897.173, "dur": 247.459, + "args": { + "External id": 3287951,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595760901.793, "dur": 3.181, + "args": { + "External id": 3287952,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595760906.591, "dur": 237.236, + "args": { + "External id": 3287953,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595760910.308, "dur": 0.369, + "args": { + "External id": 3287954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595760911.587, "dur": 27.472, + "args": { + "External id": 3287955,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595760940.642, "dur": 5.208, + "args": { + "External id": 3287956,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595760944.412, "dur": 0.990, + "args": { + "External id": 3287957,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595760946.812, "dur": 21.728, + "args": { + "External id": 3287958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595760948.161, "dur": 1.445, + "args": { + "External id": 3287959,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595760950.780, "dur": 17.512, + "args": { + "External id": 3287960,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595760953.279, "dur": 2.693, + "args": { + "External id": 3287961,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595760969.895, "dur": 60.902, + "args": { + "External id": 3287962,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595761033.353, "dur": 16.166, + "args": { + "External id": 3287963,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595761055.160, "dur": 14.985, + "args": { + "External id": 3287964,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595761071.426, "dur": 14.307, + "args": { + "External id": 3287965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595761087.547, "dur": 23.899, + "args": { + "External id": 3287966,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595761091.315, "dur": 2.002, + "args": { + "External id": 3287967,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595761095.648, "dur": 0.898, + "args": { + "External id": 3287968,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595761112.787, "dur": 13.294, + "args": { + "External id": 3287969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595761129.459, "dur": 12.993, + "args": { + "External id": 3287970,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595761152.503, "dur": 2.346, + "args": { + "External id": 3287971,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595761164.140, "dur": 3.663, + "args": { + "External id": 3287972,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595761166.627, "dur": 0.405, + "args": { + "External id": 3287973,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595761244.637, "dur": 87.284, + "args": { + "External id": 3287974,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595761336.944, "dur": 4.488, + "args": { + "External id": 3287975,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595761339.463, "dur": 0.842, + "args": { + "External id": 3287976,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595761342.874, "dur": 25.217, + "args": { + "External id": 3287977,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595761374.802, "dur": 5.449, + "args": { + "External id": 3287978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595761376.272, "dur": 3.401, + "args": { + "External id": 3287979,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595761378.366, "dur": 1.127, + "args": { + "External id": 3287980,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595761383.201, "dur": 43.072, + "args": { + "External id": 3287981,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595761384.445, "dur": 41.117, + "args": { + "External id": 3287982,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595761429.946, "dur": 17.371, + "args": { + "External id": 3287983,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595761453.044, "dur": 6.167, + "args": { + "External id": 3287984,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595761457.647, "dur": 0.815, + "args": { + "External id": 3287985,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595761463.364, "dur": 49.104, + "args": { + "External id": 3287986,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595761464.143, "dur": 3.687, + "args": { + "External id": 3287987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595761464.885, "dur": 2.393, + "args": { + "External id": 3287988,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595761466.227, "dur": 0.914, + "args": { + "External id": 3287989,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595761468.501, "dur": 43.537, + "args": { + "External id": 3287990,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595761471.711, "dur": 39.885, + "args": { + "External id": 3287991,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595761516.150, "dur": 3.380, + "args": { + "External id": 3287992,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595761517.793, "dur": 0.568, + "args": { + "External id": 3287993,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595761524.752, "dur": 1.353, + "args": { + "External id": 3287994,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595761533.253, "dur": 7.886, + "args": { + "External id": 3287995,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595761537.077, "dur": 3.769, + "args": { + "External id": 3287996,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595761618.853, "dur": 183.939, + "args": { + "External id": 3287997,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595761623.267, "dur": 2.361, + "args": { + "External id": 3287998,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595761626.759, "dur": 175.400, + "args": { + "External id": 3287999,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595761627.907, "dur": 0.272, + "args": { + "External id": 3288000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595761629.300, "dur": 20.168, + "args": { + "External id": 3288001,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595761651.003, "dur": 5.455, + "args": { + "External id": 3288002,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595761655.363, "dur": 0.816, + "args": { + "External id": 3288003,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595761657.308, "dur": 23.745, + "args": { + "External id": 3288004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595761660.284, "dur": 2.017, + "args": { + "External id": 3288005,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595761663.303, "dur": 17.503, + "args": { + "External id": 3288006,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595761665.759, "dur": 2.307, + "args": { + "External id": 3288007,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595761682.326, "dur": 20.931, + "args": { + "External id": 3288008,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595761704.423, "dur": 15.076, + "args": { + "External id": 3288009,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595761722.242, "dur": 14.849, + "args": { + "External id": 3288010,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595761738.034, "dur": 12.695, + "args": { + "External id": 3288011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595761752.040, "dur": 22.010, + "args": { + "External id": 3288012,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595761753.602, "dur": 1.702, + "args": { + "External id": 3288013,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595761758.985, "dur": 0.769, + "args": { + "External id": 3288014,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595761775.225, "dur": 12.904, + "args": { + "External id": 3288015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595761789.044, "dur": 12.173, + "args": { + "External id": 3288016,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595761808.692, "dur": 1.538, + "args": { + "External id": 3288017,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595761818.704, "dur": 3.102, + "args": { + "External id": 3288018,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595761820.681, "dur": 0.346, + "args": { + "External id": 3288019,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595761897.884, "dur": 55.776, + "args": { + "External id": 3288020,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595761959.292, "dur": 7.738, + "args": { + "External id": 3288021,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595761964.600, "dur": 1.151, + "args": { + "External id": 3288022,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595761968.232, "dur": 64.612, + "args": { + "External id": 3288023,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595762039.117, "dur": 5.618, + "args": { + "External id": 3288024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595762040.637, "dur": 3.409, + "args": { + "External id": 3288025,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595762042.393, "dur": 1.429, + "args": { + "External id": 3288026,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595762047.961, "dur": 47.131, + "args": { + "External id": 3288027,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595762051.505, "dur": 42.896, + "args": { + "External id": 3288028,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595762099.047, "dur": 16.509, + "args": { + "External id": 3288029,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595762121.951, "dur": 4.246, + "args": { + "External id": 3288030,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595762124.220, "dur": 1.030, + "args": { + "External id": 3288031,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595762130.256, "dur": 48.276, + "args": { + "External id": 3288032,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595762131.041, "dur": 5.502, + "args": { + "External id": 3288033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595762132.009, "dur": 4.011, + "args": { + "External id": 3288034,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595762135.371, "dur": 0.485, + "args": { + "External id": 3288035,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595762137.091, "dur": 40.961, + "args": { + "External id": 3288036,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595762137.750, "dur": 39.747, + "args": { + "External id": 3288037,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595762182.673, "dur": 4.275, + "args": { + "External id": 3288038,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595762184.795, "dur": 1.064, + "args": { + "External id": 3288039,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595762192.960, "dur": 1.563, + "args": { + "External id": 3288040,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595762201.743, "dur": 10.857, + "args": { + "External id": 3288041,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595762205.868, "dur": 6.419, + "args": { + "External id": 3288042,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595762294.055, "dur": 185.596, + "args": { + "External id": 3288043,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595762295.881, "dur": 2.039, + "args": { + "External id": 3288044,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595762299.462, "dur": 179.521, + "args": { + "External id": 3288045,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595762300.616, "dur": 0.255, + "args": { + "External id": 3288046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595762301.805, "dur": 24.261, + "args": { + "External id": 3288047,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595762327.578, "dur": 3.125, + "args": { + "External id": 3288048,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595762329.449, "dur": 1.020, + "args": { + "External id": 3288049,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595762331.574, "dur": 26.178, + "args": { + "External id": 3288050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595762335.301, "dur": 1.948, + "args": { + "External id": 3288051,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595762338.255, "dur": 19.205, + "args": { + "External id": 3288052,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595762342.924, "dur": 2.385, + "args": { + "External id": 3288053,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595762359.240, "dur": 20.522, + "args": { + "External id": 3288054,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595762381.045, "dur": 14.785, + "args": { + "External id": 3288055,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595762398.175, "dur": 13.973, + "args": { + "External id": 3288056,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595762413.392, "dur": 13.772, + "args": { + "External id": 3288057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595762429.001, "dur": 23.180, + "args": { + "External id": 3288058,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595762430.768, "dur": 1.613, + "args": { + "External id": 3288059,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595762436.708, "dur": 1.231, + "args": { + "External id": 3288060,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595762453.393, "dur": 12.781, + "args": { + "External id": 3288061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595762467.171, "dur": 11.050, + "args": { + "External id": 3288062,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595762485.379, "dur": 1.595, + "args": { + "External id": 3288063,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595762494.406, "dur": 40.297, + "args": { + "External id": 3288064,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595762533.339, "dur": 0.493, + "args": { + "External id": 3288065,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595762599.293, "dur": 48.599, + "args": { + "External id": 3288066,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595762655.058, "dur": 4.167, + "args": { + "External id": 3288067,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595762657.396, "dur": 0.793, + "args": { + "External id": 3288068,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595762660.394, "dur": 22.494, + "args": { + "External id": 3288069,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595762686.844, "dur": 6.976, + "args": { + "External id": 3288070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595762687.955, "dur": 5.231, + "args": { + "External id": 3288071,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595762689.664, "dur": 3.263, + "args": { + "External id": 3288072,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595762698.813, "dur": 40.043, + "args": { + "External id": 3288073,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595762699.669, "dur": 38.432, + "args": { + "External id": 3288074,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595762742.246, "dur": 14.909, + "args": { + "External id": 3288075,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595762762.146, "dur": 3.326, + "args": { + "External id": 3288076,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595762764.137, "dur": 0.647, + "args": { + "External id": 3288077,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595762769.137, "dur": 48.075, + "args": { + "External id": 3288078,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595762769.855, "dur": 5.908, + "args": { + "External id": 3288079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595762772.912, "dur": 2.287, + "args": { + "External id": 3288080,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595762774.404, "dur": 0.657, + "args": { + "External id": 3288081,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595762776.202, "dur": 40.572, + "args": { + "External id": 3288082,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595762776.645, "dur": 39.682, + "args": { + "External id": 3288083,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595762820.654, "dur": 3.161, + "args": { + "External id": 3288084,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595762822.400, "dur": 0.553, + "args": { + "External id": 3288085,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595762831.243, "dur": 1.275, + "args": { + "External id": 3288086,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595762839.296, "dur": 5.935, + "args": { + "External id": 3288087,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595762840.909, "dur": 3.992, + "args": { + "External id": 3288088,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595762950.473, "dur": 238.481, + "args": { + "External id": 3288089,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595762953.046, "dur": 2.997, + "args": { + "External id": 3288090,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595762959.447, "dur": 228.880, + "args": { + "External id": 3288091,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595762962.877, "dur": 0.376, + "args": { + "External id": 3288092,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595762964.743, "dur": 59.798, + "args": { + "External id": 3288093,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595763027.375, "dur": 5.429, + "args": { + "External id": 3288094,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595763029.716, "dur": 2.716, + "args": { + "External id": 3288095,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595763033.711, "dur": 24.546, + "args": { + "External id": 3288096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595763034.603, "dur": 2.111, + "args": { + "External id": 3288097,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595763038.013, "dur": 19.918, + "args": { + "External id": 3288098,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595763041.287, "dur": 2.967, + "args": { + "External id": 3288099,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595763059.782, "dur": 22.064, + "args": { + "External id": 3288100,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595763083.041, "dur": 14.639, + "args": { + "External id": 3288101,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595763103.274, "dur": 15.016, + "args": { + "External id": 3288102,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595763119.627, "dur": 13.869, + "args": { + "External id": 3288103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595763135.355, "dur": 22.737, + "args": { + "External id": 3288104,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595763136.840, "dur": 1.788, + "args": { + "External id": 3288105,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595763140.690, "dur": 2.283, + "args": { + "External id": 3288106,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595763159.470, "dur": 12.670, + "args": { + "External id": 3288107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595763175.321, "dur": 11.935, + "args": { + "External id": 3288108,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595763196.359, "dur": 1.994, + "args": { + "External id": 3288109,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595763208.654, "dur": 3.940, + "args": { + "External id": 3288110,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595763211.100, "dur": 0.360, + "args": { + "External id": 3288111,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595763280.800, "dur": 57.703, + "args": { + "External id": 3288112,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595763343.457, "dur": 4.580, + "args": { + "External id": 3288113,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595763346.065, "dur": 0.923, + "args": { + "External id": 3288114,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595763349.649, "dur": 25.651, + "args": { + "External id": 3288115,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595763379.908, "dur": 8.220, + "args": { + "External id": 3288116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595763383.620, "dur": 3.898, + "args": { + "External id": 3288117,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595763385.673, "dur": 1.654, + "args": { + "External id": 3288118,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595763390.820, "dur": 40.726, + "args": { + "External id": 3288119,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595763391.643, "dur": 39.148, + "args": { + "External id": 3288120,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595763435.048, "dur": 15.243, + "args": { + "External id": 3288121,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595763455.451, "dur": 5.734, + "args": { + "External id": 3288122,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595763459.565, "dur": 0.886, + "args": { + "External id": 3288123,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595763464.751, "dur": 45.872, + "args": { + "External id": 3288124,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595763465.501, "dur": 3.490, + "args": { + "External id": 3288125,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595763466.124, "dur": 2.319, + "args": { + "External id": 3288126,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595763467.526, "dur": 0.785, + "args": { + "External id": 3288127,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595763469.625, "dur": 40.627, + "args": { + "External id": 3288128,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595763470.083, "dur": 39.537, + "args": { + "External id": 3288129,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595763514.499, "dur": 3.500, + "args": { + "External id": 3288130,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595763516.179, "dur": 0.794, + "args": { + "External id": 3288131,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595763525.354, "dur": 1.260, + "args": { + "External id": 3288132,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595763533.463, "dur": 6.522, + "args": { + "External id": 3288133,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595763535.249, "dur": 4.425, + "args": { + "External id": 3288134,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595763616.657, "dur": 190.375, + "args": { + "External id": 3288135,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595763621.089, "dur": 2.129, + "args": { + "External id": 3288136,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595763626.307, "dur": 180.256, + "args": { + "External id": 3288137,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595763627.411, "dur": 0.334, + "args": { + "External id": 3288138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595763628.854, "dur": 20.126, + "args": { + "External id": 3288139,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595763650.274, "dur": 5.653, + "args": { + "External id": 3288140,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595763654.328, "dur": 1.332, + "args": { + "External id": 3288141,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595763656.707, "dur": 20.994, + "args": { + "External id": 3288142,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595763657.688, "dur": 1.584, + "args": { + "External id": 3288143,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595763660.159, "dur": 17.285, + "args": { + "External id": 3288144,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595763662.446, "dur": 2.303, + "args": { + "External id": 3288145,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595763678.984, "dur": 19.248, + "args": { + "External id": 3288146,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595763699.528, "dur": 13.733, + "args": { + "External id": 3288147,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595763718.067, "dur": 14.128, + "args": { + "External id": 3288148,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595763733.179, "dur": 12.644, + "args": { + "External id": 3288149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595763747.598, "dur": 30.280, + "args": { + "External id": 3288150,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595763751.303, "dur": 1.668, + "args": { + "External id": 3288151,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595763762.428, "dur": 0.976, + "args": { + "External id": 3288152,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595763781.663, "dur": 12.060, + "args": { + "External id": 3288153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595763794.624, "dur": 10.915, + "args": { + "External id": 3288154,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595763812.945, "dur": 1.461, + "args": { + "External id": 3288155,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595763822.108, "dur": 3.624, + "args": { + "External id": 3288156,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595763824.394, "dur": 0.470, + "args": { + "External id": 3288157,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595763903.409, "dur": 53.903, + "args": { + "External id": 3288158,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595763963.137, "dur": 6.054, + "args": { + "External id": 3288159,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595763966.013, "dur": 1.540, + "args": { + "External id": 3288160,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595763972.897, "dur": 58.508, + "args": { + "External id": 3288161,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595764040.259, "dur": 8.040, + "args": { + "External id": 3288162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595764041.922, "dur": 5.554, + "args": { + "External id": 3288163,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764043.945, "dur": 3.279, + "args": { + "External id": 3288164,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595764051.220, "dur": 46.481, + "args": { + "External id": 3288165,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595764052.248, "dur": 44.748, + "args": { + "External id": 3288166,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595764104.347, "dur": 16.344, + "args": { + "External id": 3288167,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595764127.022, "dur": 4.005, + "args": { + "External id": 3288168,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764129.168, "dur": 0.927, + "args": { + "External id": 3288169,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595764134.762, "dur": 47.737, + "args": { + "External id": 3288170,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595764135.468, "dur": 5.779, + "args": { + "External id": 3288171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595764136.077, "dur": 4.530, + "args": { + "External id": 3288172,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764139.511, "dur": 0.956, + "args": { + "External id": 3288173,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595764141.718, "dur": 40.424, + "args": { + "External id": 3288174,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595764142.280, "dur": 39.392, + "args": { + "External id": 3288175,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595764186.172, "dur": 3.866, + "args": { + "External id": 3288176,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764188.193, "dur": 0.914, + "args": { + "External id": 3288177,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595764195.757, "dur": 1.652, + "args": { + "External id": 3288178,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595764206.606, "dur": 6.657, + "args": { + "External id": 3288179,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595764208.237, "dur": 4.720, + "args": { + "External id": 3288180,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595764294.928, "dur": 188.807, + "args": { + "External id": 3288181,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595764296.737, "dur": 2.386, + "args": { + "External id": 3288182,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595764302.650, "dur": 180.603, + "args": { + "External id": 3288183,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595764303.562, "dur": 0.297, + "args": { + "External id": 3288184,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595764304.779, "dur": 22.917, + "args": { + "External id": 3288185,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595764331.756, "dur": 4.879, + "args": { + "External id": 3288186,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764333.641, "dur": 2.736, + "args": { + "External id": 3288187,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595764337.713, "dur": 20.876, + "args": { + "External id": 3288188,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595764338.756, "dur": 1.317, + "args": { + "External id": 3288189,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595764341.069, "dur": 17.277, + "args": { + "External id": 3288190,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595764343.502, "dur": 2.584, + "args": { + "External id": 3288191,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595764359.975, "dur": 21.779, + "args": { + "External id": 3288192,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595764382.887, "dur": 15.524, + "args": { + "External id": 3288193,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595764401.145, "dur": 15.018, + "args": { + "External id": 3288194,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595764417.442, "dur": 13.703, + "args": { + "External id": 3288195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595764434.963, "dur": 20.597, + "args": { + "External id": 3288196,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595764436.835, "dur": 1.355, + "args": { + "External id": 3288197,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764440.184, "dur": 0.793, + "args": { + "External id": 3288198,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595764457.000, "dur": 13.039, + "args": { + "External id": 3288199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595764470.956, "dur": 11.333, + "args": { + "External id": 3288200,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595764489.495, "dur": 1.670, + "args": { + "External id": 3288201,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595764499.166, "dur": 3.485, + "args": { + "External id": 3288202,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764501.257, "dur": 0.610, + "args": { + "External id": 3288203,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595764562.862, "dur": 47.294, + "args": { + "External id": 3288204,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595764614.794, "dur": 5.811, + "args": { + "External id": 3288205,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764617.196, "dur": 2.357, + "args": { + "External id": 3288206,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595764622.053, "dur": 24.160, + "args": { + "External id": 3288207,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595764650.133, "dur": 7.232, + "args": { + "External id": 3288208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595764651.307, "dur": 5.442, + "args": { + "External id": 3288209,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764655.622, "dur": 0.959, + "args": { + "External id": 3288210,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595764659.403, "dur": 40.264, + "args": { + "External id": 3288211,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595764660.345, "dur": 38.704, + "args": { + "External id": 3288212,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595764702.581, "dur": 15.633, + "args": { + "External id": 3288213,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595764722.317, "dur": 24.211, + "args": { + "External id": 3288214,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595764724.414, "dur": 21.755, + "args": { + "External id": 3288215,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764729.677, "dur": 1.031, + "args": { + "External id": 3288216,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595764751.277, "dur": 27.554, + "args": { + "External id": 3288217,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595764753.124, "dur": 25.493, + "args": { + "External id": 3288218,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 9521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764759.069, "dur": 4.186, + "args": { + "External id": 3288219,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595764764.310, "dur": 13.841, + "args": { + "External id": 3288220,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595764789.854, "dur": 4.733, + "args": { + "External id": 3288221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595764791.656, "dur": 2.680, + "args": { + "External id": 3288222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595764795.691, "dur": 1.139, + "args": { + "External id": 3288223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595764796.213, "dur": 0.530, + "args": { + "External id": 3288224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595764833.502, "dur": 22.429, + "args": { + "External id": 3288225,"Sequence number": 32987316, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 9528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595764858.140, "dur": 36.559, + "args": { + "External id": 3288226,"Sequence number": 32987317, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 9529 + } + }, + { + "ph": "s", "id": 16, "pid": 1336756, "tid": 1336756, "ts": 1587595764858.140, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595764901.919, "dur": 7.489, + "args": { + "External id": 3288227,"Sequence number": 32987318, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 4, 4096], [], [], [], []], "Ev Idx": 9530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764906.259, "dur": 1.626, + "args": { + "External id": 3288228,"Record function id": 0, "Concrete Inputs": ["", "[16, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 9531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1336756, + "ts": 1587595764911.699, "dur": 6.031, + "args": { + "External id": 3288229,"Sequence number": 32987318, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[16, 4, 4096], [], []], "Ev Idx": 9532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764915.893, "dur": 0.669, + "args": { + "External id": 3288230,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 9533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595764918.819, "dur": 3.199, + "args": { + "External id": 3288231,"Sequence number": 32987318, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 9534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764920.508, "dur": 0.929, + "args": { + "External id": 3288232,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 9535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595764928.180, "dur": 6.441, + "args": { + "External id": 3288233,"Sequence number": 32987318, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 9536 + } + }, + { + "ph": "s", "id": 15, "pid": 1336756, "tid": 1336756, "ts": 1587595764928.180, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764931.570, "dur": 1.845, + "args": { + "External id": 3288234,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595764935.744, "dur": 5.075, + "args": { + "External id": 3288235,"Sequence number": 32987319, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 9538 + } + }, + { + "ph": "s", "id": 14, "pid": 1336756, "tid": 1336756, "ts": 1587595764935.744, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764939.075, "dur": 0.941, + "args": { + "External id": 3288236,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1336756, + "ts": 1587595764941.759, "dur": 7.232, + "args": { + "External id": 3288237,"Sequence number": 32987320, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 9540 + } + }, + { + "ph": "s", "id": 13, "pid": 1336756, "tid": 1336756, "ts": 1587595764941.759, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764947.509, "dur": 0.662, + "args": { + "External id": 3288238,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595764949.999, "dur": 4.663, + "args": { + "External id": 3288239,"Sequence number": 32987321, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 9542 + } + }, + { + "ph": "s", "id": 12, "pid": 1336756, "tid": 1336756, "ts": 1587595764949.999, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595764952.968, "dur": 0.940, + "args": { + "External id": 3288240,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 9543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336756, "tid": 1336756, + "ts": 1587595764957.786, "dur": 70.473, + "args": { + "External id": 3288241,"Sequence number": 32987322, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1336756, + "ts": 1587595764959.205, "dur": 68.779, + "args": { + "External id": 3288242,"Sequence number": 32987322, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595764961.794, "dur": 6.498, + "args": { + "External id": 3288243,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 9546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595764964.012, "dur": 3.731, + "args": { + "External id": 3288244,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595764969.174, "dur": 57.853, + "args": { + "External id": 3288245,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 9548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595765055.912, "dur": 6.529, + "args": { + "External id": 3288246,"Sequence number": 32987322, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 9549 + } + }, + { + "ph": "s", "id": 11, "pid": 1336756, "tid": 1336756, "ts": 1587595765055.912, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595765064.652, "dur": 3.988, + "args": { + "External id": 3288247,"Sequence number": 32987323, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595765104.486, "dur": 43675.856, + "args": { + "External id": 3288248,"Sequence number": 32987323, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [1], [2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536], [32000, 2048], [], [], [], [], []], "Ev Idx": 9551 + } + }, + { + "ph": "s", "id": 10, "pid": 1336756, "tid": 1336756, "ts": 1587595765104.486, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336756, "tid": 1336756, + "ts": 1587595765118.831, "dur": 28.672, + "args": { + "External id": 3288249,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1336756, + "ts": 1587595765119.680, "dur": 27.596, + "args": { + "External id": 3288250,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595765121.128, "dur": 6.757, + "args": { + "External id": 3288251,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595765123.007, "dur": 4.528, + "args": { + "External id": 3288252,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595765128.819, "dur": 18.071, + "args": { + "External id": 3288253,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [8192, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 9556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595765162.354, "dur": 27.664, + "args": { + "External id": 3288254,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595765163.834, "dur": 8.044, + "args": { + "External id": 3288255,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595765167.584, "dur": 3.967, + "args": { + "External id": 3288256,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587595765173.021, "dur": 16.803, + "args": { + "External id": 3288257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595765174.559, "dur": 14.906, + "args": { + "External id": 3288258,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595765193.507, "dur": 19.066, + "args": { + "External id": 3288259,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595765194.052, "dur": 5.143, + "args": { + "External id": 3288260,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595765195.876, "dur": 3.074, + "args": { + "External id": 3288261,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587595765199.699, "dur": 12.694, + "args": { + "External id": 3288262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595765200.198, "dur": 11.917, + "args": { + "External id": 3288263,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 9566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1336756, + "ts": 1587595765218.103, "dur": 21.419, + "args": { + "External id": 3288264,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595765220.071, "dur": 4.560, + "args": { + "External id": 3288265,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587595765227.512, "dur": 11.723, + "args": { + "External id": 3288266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 9569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595765228.140, "dur": 10.812, + "args": { + "External id": 3288267,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336756, "tid": 1336756, + "ts": 1587595765243.997, "dur": 24.881, + "args": { + "External id": 3288268,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595765271.541, "dur": 48.937, + "args": { + "External id": 3288269,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595765273.310, "dur": 46.793, + "args": { + "External id": 3288270,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595765277.889, "dur": 1.153, + "args": { + "External id": 3288271,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595765280.141, "dur": 23.453, + "args": { + "External id": 3288272,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595765281.607, "dur": 21.758, + "args": { + "External id": 3288273,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 9576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595765286.234, "dur": 2.894, + "args": { + "External id": 3288274,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595765289.906, "dur": 13.130, + "args": { + "External id": 3288275,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 9578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336756, "tid": 1336756, + "ts": 1587595765326.526, "dur": 37754.401, + "args": { + "External id": 3288276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336756, "tid": 1336756, + "ts": 1587595765327.841, "dur": 37752.276, + "args": { + "External id": 3288277,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595803090.285, "dur": 6.226, + "args": { + "External id": 3288278,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595803093.674, "dur": 1.238, + "args": { + "External id": 3288279,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595803101.192, "dur": 103.219, + "args": { + "External id": 3288280,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595803102.612, "dur": 8.109, + "args": { + "External id": 3288281,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595803104.513, "dur": 5.330, + "args": { + "External id": 3288282,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595803108.190, "dur": 1.320, + "args": { + "External id": 3288283,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595803111.852, "dur": 91.876, + "args": { + "External id": 3288284,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595803113.552, "dur": 89.304, + "args": { + "External id": 3288285,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595803207.658, "dur": 5.324, + "args": { + "External id": 3288286,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595803209.923, "dur": 1.400, + "args": { + "External id": 3288287,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595803219.811, "dur": 2.257, + "args": { + "External id": 3288288,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595803230.510, "dur": 7.967, + "args": { + "External id": 3288289,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595803234.646, "dur": 3.572, + "args": { + "External id": 3288290,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595803365.318, "dur": 207.906, + "args": { + "External id": 3288291,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595803368.692, "dur": 2.788, + "args": { + "External id": 3288292,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595803372.844, "dur": 199.981, + "args": { + "External id": 3288293,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595803374.694, "dur": 0.575, + "args": { + "External id": 3288294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595803378.784, "dur": 25.424, + "args": { + "External id": 3288295,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595803405.861, "dur": 3.236, + "args": { + "External id": 3288296,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595803408.157, "dur": 0.686, + "args": { + "External id": 3288297,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595803412.458, "dur": 26.079, + "args": { + "External id": 3288298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595803413.547, "dur": 3.626, + "args": { + "External id": 3288299,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595803418.566, "dur": 19.699, + "args": { + "External id": 3288300,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595803422.063, "dur": 3.232, + "args": { + "External id": 3288301,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595803439.970, "dur": 23.733, + "args": { + "External id": 3288302,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595803465.895, "dur": 14.912, + "args": { + "External id": 3288303,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595803483.851, "dur": 16.621, + "args": { + "External id": 3288304,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595803502.125, "dur": 14.719, + "args": { + "External id": 3288305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595803518.711, "dur": 23.811, + "args": { + "External id": 3288306,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595803523.083, "dur": 2.038, + "args": { + "External id": 3288307,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595803527.083, "dur": 0.802, + "args": { + "External id": 3288308,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595803543.991, "dur": 14.180, + "args": { + "External id": 3288309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595803559.520, "dur": 12.159, + "args": { + "External id": 3288310,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595803579.726, "dur": 1.697, + "args": { + "External id": 3288311,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595803587.958, "dur": 4.161, + "args": { + "External id": 3288312,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595803589.971, "dur": 1.099, + "args": { + "External id": 3288313,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595803666.195, "dur": 64.245, + "args": { + "External id": 3288314,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595803735.589, "dur": 5.915, + "args": { + "External id": 3288315,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595803738.191, "dur": 0.815, + "args": { + "External id": 3288316,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595803743.277, "dur": 25.526, + "args": { + "External id": 3288317,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595803774.341, "dur": 9.947, + "args": { + "External id": 3288318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595803776.009, "dur": 7.533, + "args": { + "External id": 3288319,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595803780.023, "dur": 3.205, + "args": { + "External id": 3288320,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595803787.405, "dur": 42.788, + "args": { + "External id": 3288321,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595803788.538, "dur": 41.065, + "args": { + "External id": 3288322,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595803834.461, "dur": 15.878, + "args": { + "External id": 3288323,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595803856.756, "dur": 4.220, + "args": { + "External id": 3288324,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595803858.647, "dur": 1.351, + "args": { + "External id": 3288325,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595803864.926, "dur": 75.313, + "args": { + "External id": 3288326,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595803868.110, "dur": 20.704, + "args": { + "External id": 3288327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595803868.842, "dur": 19.118, + "args": { + "External id": 3288328,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595803886.483, "dur": 1.026, + "args": { + "External id": 3288329,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595803889.660, "dur": 50.157, + "args": { + "External id": 3288330,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595803890.437, "dur": 48.773, + "args": { + "External id": 3288331,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595803945.024, "dur": 4.629, + "args": { + "External id": 3288332,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595803947.437, "dur": 0.779, + "args": { + "External id": 3288333,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595803957.974, "dur": 1.783, + "args": { + "External id": 3288334,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595803967.717, "dur": 6.807, + "args": { + "External id": 3288335,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595803969.598, "dur": 4.672, + "args": { + "External id": 3288336,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595804112.226, "dur": 200.429, + "args": { + "External id": 3288337,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595804115.018, "dur": 3.358, + "args": { + "External id": 3288338,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595804124.886, "dur": 187.238, + "args": { + "External id": 3288339,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595804126.334, "dur": 0.279, + "args": { + "External id": 3288340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595804127.533, "dur": 24.171, + "args": { + "External id": 3288341,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595804153.447, "dur": 5.390, + "args": { + "External id": 3288342,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595804155.508, "dur": 3.004, + "args": { + "External id": 3288343,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595804159.854, "dur": 23.775, + "args": { + "External id": 3288344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595804160.930, "dur": 1.480, + "args": { + "External id": 3288345,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595804163.562, "dur": 19.796, + "args": { + "External id": 3288346,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595804166.758, "dur": 2.926, + "args": { + "External id": 3288347,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595804188.007, "dur": 21.587, + "args": { + "External id": 3288348,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595804211.359, "dur": 15.821, + "args": { + "External id": 3288349,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595804230.179, "dur": 14.045, + "args": { + "External id": 3288350,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595804245.620, "dur": 13.251, + "args": { + "External id": 3288351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595804260.547, "dur": 21.705, + "args": { + "External id": 3288352,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595804262.407, "dur": 1.674, + "args": { + "External id": 3288353,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595804265.646, "dur": 2.653, + "args": { + "External id": 3288354,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595804286.165, "dur": 12.977, + "args": { + "External id": 3288355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595804300.070, "dur": 11.001, + "args": { + "External id": 3288356,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595804319.865, "dur": 1.910, + "args": { + "External id": 3288357,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595804331.653, "dur": 3.381, + "args": { + "External id": 3288358,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595804333.886, "dur": 0.414, + "args": { + "External id": 3288359,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595804403.962, "dur": 57.665, + "args": { + "External id": 3288360,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595804466.242, "dur": 5.558, + "args": { + "External id": 3288361,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595804469.293, "dur": 1.268, + "args": { + "External id": 3288362,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595804473.217, "dur": 24.905, + "args": { + "External id": 3288363,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595804505.349, "dur": 6.345, + "args": { + "External id": 3288364,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595804507.172, "dur": 3.872, + "args": { + "External id": 3288365,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595804509.160, "dur": 1.684, + "args": { + "External id": 3288366,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595804514.612, "dur": 43.155, + "args": { + "External id": 3288367,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595804515.667, "dur": 41.443, + "args": { + "External id": 3288368,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595804561.828, "dur": 14.722, + "args": { + "External id": 3288369,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595804582.177, "dur": 5.611, + "args": { + "External id": 3288370,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595804586.361, "dur": 0.591, + "args": { + "External id": 3288371,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595804591.449, "dur": 49.906, + "args": { + "External id": 3288372,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595804592.083, "dur": 4.034, + "args": { + "External id": 3288373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595804593.063, "dur": 2.489, + "args": { + "External id": 3288374,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595804594.601, "dur": 0.813, + "args": { + "External id": 3288375,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595804596.855, "dur": 44.102, + "args": { + "External id": 3288376,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595804599.828, "dur": 40.561, + "args": { + "External id": 3288377,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595804645.476, "dur": 5.029, + "args": { + "External id": 3288378,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595804647.969, "dur": 1.043, + "args": { + "External id": 3288379,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595804655.264, "dur": 1.789, + "args": { + "External id": 3288380,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595804664.168, "dur": 5.921, + "args": { + "External id": 3288381,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595804665.742, "dur": 4.096, + "args": { + "External id": 3288382,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595804750.063, "dur": 275.199, + "args": { + "External id": 3288383,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595804754.763, "dur": 2.153, + "args": { + "External id": 3288384,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595804780.380, "dur": 244.370, + "args": { + "External id": 3288385,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595804781.584, "dur": 0.248, + "args": { + "External id": 3288386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595804782.773, "dur": 21.317, + "args": { + "External id": 3288387,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595804805.676, "dur": 4.719, + "args": { + "External id": 3288388,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595804809.315, "dur": 0.847, + "args": { + "External id": 3288389,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595804811.406, "dur": 24.407, + "args": { + "External id": 3288390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595804813.369, "dur": 1.811, + "args": { + "External id": 3288391,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595804816.163, "dur": 19.410, + "args": { + "External id": 3288392,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595804818.522, "dur": 2.556, + "args": { + "External id": 3288393,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595804839.434, "dur": 27.293, + "args": { + "External id": 3288394,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595804867.996, "dur": 33.514, + "args": { + "External id": 3288395,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595804905.778, "dur": 16.682, + "args": { + "External id": 3288396,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595804923.693, "dur": 13.346, + "args": { + "External id": 3288397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595804938.581, "dur": 21.724, + "args": { + "External id": 3288398,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595804940.881, "dur": 2.044, + "args": { + "External id": 3288399,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595804945.049, "dur": 0.788, + "args": { + "External id": 3288400,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595804963.948, "dur": 12.071, + "args": { + "External id": 3288401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595804977.181, "dur": 45.487, + "args": { + "External id": 3288402,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595805034.142, "dur": 2.046, + "args": { + "External id": 3288403,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595805045.555, "dur": 3.924, + "args": { + "External id": 3288404,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595805047.966, "dur": 0.734, + "args": { + "External id": 3288405,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595805123.736, "dur": 85.488, + "args": { + "External id": 3288406,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595805214.248, "dur": 5.051, + "args": { + "External id": 3288407,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595805217.131, "dur": 0.907, + "args": { + "External id": 3288408,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595805222.963, "dur": 31.574, + "args": { + "External id": 3288409,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595805259.278, "dur": 5.552, + "args": { + "External id": 3288410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595805260.579, "dur": 3.572, + "args": { + "External id": 3288411,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595805262.527, "dur": 1.419, + "args": { + "External id": 3288412,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595805267.308, "dur": 61.406, + "args": { + "External id": 3288413,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595805271.871, "dur": 56.141, + "args": { + "External id": 3288414,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595805334.728, "dur": 21.149, + "args": { + "External id": 3288415,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595805361.991, "dur": 4.409, + "args": { + "External id": 3288416,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595805364.293, "dur": 1.260, + "args": { + "External id": 3288417,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595805370.262, "dur": 49.816, + "args": { + "External id": 3288418,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595805371.109, "dur": 5.537, + "args": { + "External id": 3288419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595805371.707, "dur": 4.409, + "args": { + "External id": 3288420,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595805375.241, "dur": 0.735, + "args": { + "External id": 3288421,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595805377.174, "dur": 42.591, + "args": { + "External id": 3288422,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595805377.688, "dur": 41.444, + "args": { + "External id": 3288423,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595805423.799, "dur": 4.058, + "args": { + "External id": 3288424,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595805425.990, "dur": 0.534, + "args": { + "External id": 3288425,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595805433.071, "dur": 1.326, + "args": { + "External id": 3288426,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595805444.378, "dur": 8.137, + "args": { + "External id": 3288427,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595805446.052, "dur": 6.147, + "args": { + "External id": 3288428,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595805533.804, "dur": 186.704, + "args": { + "External id": 3288429,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595805535.681, "dur": 2.140, + "args": { + "External id": 3288430,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595805539.653, "dur": 180.419, + "args": { + "External id": 3288431,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595805541.040, "dur": 0.290, + "args": { + "External id": 3288432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595805542.317, "dur": 24.005, + "args": { + "External id": 3288433,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595805570.136, "dur": 3.128, + "args": { + "External id": 3288434,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595805572.038, "dur": 0.984, + "args": { + "External id": 3288435,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595805574.171, "dur": 23.578, + "args": { + "External id": 3288436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595805575.140, "dur": 1.950, + "args": { + "External id": 3288437,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595805578.104, "dur": 19.256, + "args": { + "External id": 3288438,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595805582.723, "dur": 2.571, + "args": { + "External id": 3288439,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595805599.461, "dur": 20.300, + "args": { + "External id": 3288440,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595805620.949, "dur": 14.238, + "args": { + "External id": 3288441,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595805638.397, "dur": 13.391, + "args": { + "External id": 3288442,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595805653.145, "dur": 13.921, + "args": { + "External id": 3288443,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595805671.512, "dur": 20.748, + "args": { + "External id": 3288444,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595805673.268, "dur": 1.790, + "args": { + "External id": 3288445,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595805677.216, "dur": 1.036, + "args": { + "External id": 3288446,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595805693.660, "dur": 13.407, + "args": { + "External id": 3288447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595805708.102, "dur": 10.828, + "args": { + "External id": 3288448,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595805725.989, "dur": 1.526, + "args": { + "External id": 3288449,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595805734.642, "dur": 3.877, + "args": { + "External id": 3288450,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595805737.067, "dur": 0.544, + "args": { + "External id": 3288451,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595805801.730, "dur": 46.377, + "args": { + "External id": 3288452,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595805852.736, "dur": 4.633, + "args": { + "External id": 3288453,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595805855.341, "dur": 0.937, + "args": { + "External id": 3288454,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595805858.499, "dur": 39.237, + "args": { + "External id": 3288455,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595805903.911, "dur": 8.367, + "args": { + "External id": 3288456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595805905.216, "dur": 6.189, + "args": { + "External id": 3288457,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595805909.752, "dur": 1.494, + "args": { + "External id": 3288458,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595805914.925, "dur": 44.961, + "args": { + "External id": 3288459,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595805916.108, "dur": 43.005, + "args": { + "External id": 3288460,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595805963.107, "dur": 15.831, + "args": { + "External id": 3288461,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595806018.329, "dur": 5.034, + "args": { + "External id": 3288462,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595806021.116, "dur": 1.035, + "args": { + "External id": 3288463,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595806028.139, "dur": 55.663, + "args": { + "External id": 3288464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595806031.580, "dur": 3.757, + "args": { + "External id": 3288465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595806032.469, "dur": 2.307, + "args": { + "External id": 3288466,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595806033.993, "dur": 0.668, + "args": { + "External id": 3288467,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595806035.947, "dur": 47.437, + "args": { + "External id": 3288468,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595806036.693, "dur": 45.943, + "args": { + "External id": 3288469,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595806088.003, "dur": 6.761, + "args": { + "External id": 3288470,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595806089.924, "dur": 3.469, + "args": { + "External id": 3288471,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595806100.911, "dur": 1.607, + "args": { + "External id": 3288472,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595806111.111, "dur": 6.476, + "args": { + "External id": 3288473,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595806112.969, "dur": 4.355, + "args": { + "External id": 3288474,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595806204.724, "dur": 201.830, + "args": { + "External id": 3288475,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595806207.103, "dur": 2.429, + "args": { + "External id": 3288476,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595806221.424, "dur": 184.695, + "args": { + "External id": 3288477,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595806222.972, "dur": 0.437, + "args": { + "External id": 3288478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595806229.646, "dur": 23.465, + "args": { + "External id": 3288479,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595806254.442, "dur": 5.307, + "args": { + "External id": 3288480,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595806256.765, "dur": 2.600, + "args": { + "External id": 3288481,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595806260.627, "dur": 22.002, + "args": { + "External id": 3288482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595806261.399, "dur": 1.819, + "args": { + "External id": 3288483,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595806264.320, "dur": 17.945, + "args": { + "External id": 3288484,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595806267.118, "dur": 2.589, + "args": { + "External id": 3288485,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595806284.164, "dur": 21.091, + "args": { + "External id": 3288486,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595806306.644, "dur": 13.695, + "args": { + "External id": 3288487,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595806322.900, "dur": 15.086, + "args": { + "External id": 3288488,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595806339.186, "dur": 12.162, + "args": { + "External id": 3288489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595806355.877, "dur": 21.818, + "args": { + "External id": 3288490,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595806358.395, "dur": 2.011, + "args": { + "External id": 3288491,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595806362.152, "dur": 0.834, + "args": { + "External id": 3288492,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595806379.123, "dur": 12.826, + "args": { + "External id": 3288493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595806392.783, "dur": 12.384, + "args": { + "External id": 3288494,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595806415.060, "dur": 1.820, + "args": { + "External id": 3288495,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595806424.891, "dur": 3.431, + "args": { + "External id": 3288496,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595806426.937, "dur": 0.473, + "args": { + "External id": 3288497,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595806491.305, "dur": 53.710, + "args": { + "External id": 3288498,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595806549.834, "dur": 6.393, + "args": { + "External id": 3288499,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595806552.457, "dur": 2.602, + "args": { + "External id": 3288500,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595806557.525, "dur": 26.990, + "args": { + "External id": 3288501,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595806589.597, "dur": 7.005, + "args": { + "External id": 3288502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595806590.989, "dur": 5.036, + "args": { + "External id": 3288503,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595806594.896, "dur": 0.945, + "args": { + "External id": 3288504,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595806599.418, "dur": 41.581, + "args": { + "External id": 3288505,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595806600.478, "dur": 39.686, + "args": { + "External id": 3288506,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595806644.803, "dur": 15.685, + "args": { + "External id": 3288507,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595806665.567, "dur": 3.740, + "args": { + "External id": 3288508,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595806667.789, "dur": 0.665, + "args": { + "External id": 3288509,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595806674.992, "dur": 56.047, + "args": { + "External id": 3288510,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595806675.621, "dur": 3.725, + "args": { + "External id": 3288511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595806676.380, "dur": 2.117, + "args": { + "External id": 3288512,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595806677.526, "dur": 0.838, + "args": { + "External id": 3288513,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595806689.181, "dur": 41.484, + "args": { + "External id": 3288514,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595806690.078, "dur": 39.977, + "args": { + "External id": 3288515,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595806737.579, "dur": 4.000, + "args": { + "External id": 3288516,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595806739.604, "dur": 0.785, + "args": { + "External id": 3288517,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595806746.849, "dur": 1.502, + "args": { + "External id": 3288518,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595806755.225, "dur": 7.304, + "args": { + "External id": 3288519,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595806756.833, "dur": 5.336, + "args": { + "External id": 3288520,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595806846.312, "dur": 252.015, + "args": { + "External id": 3288521,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595806848.215, "dur": 2.188, + "args": { + "External id": 3288522,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595806851.751, "dur": 245.999, + "args": { + "External id": 3288523,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595806852.630, "dur": 0.283, + "args": { + "External id": 3288524,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595806853.910, "dur": 42.902, + "args": { + "External id": 3288525,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595806899.280, "dur": 3.478, + "args": { + "External id": 3288526,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595806901.318, "dur": 1.152, + "args": { + "External id": 3288527,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595806904.098, "dur": 29.241, + "args": { + "External id": 3288528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595806905.690, "dur": 2.321, + "args": { + "External id": 3288529,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595806909.252, "dur": 23.796, + "args": { + "External id": 3288530,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595806916.614, "dur": 3.009, + "args": { + "External id": 3288531,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595806934.769, "dur": 21.320, + "args": { + "External id": 3288532,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595806957.827, "dur": 14.159, + "args": { + "External id": 3288533,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595806974.935, "dur": 50.110, + "args": { + "External id": 3288534,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595807027.601, "dur": 15.336, + "args": { + "External id": 3288535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595807044.603, "dur": 21.917, + "args": { + "External id": 3288536,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595807046.755, "dur": 2.210, + "args": { + "External id": 3288537,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595807050.878, "dur": 0.770, + "args": { + "External id": 3288538,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595807070.251, "dur": 13.869, + "args": { + "External id": 3288539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595807085.077, "dur": 11.475, + "args": { + "External id": 3288540,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595807105.994, "dur": 2.234, + "args": { + "External id": 3288541,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595807116.928, "dur": 3.666, + "args": { + "External id": 3288542,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595807119.114, "dur": 0.640, + "args": { + "External id": 3288543,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595807192.595, "dur": 57.744, + "args": { + "External id": 3288544,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595807255.415, "dur": 5.211, + "args": { + "External id": 3288545,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595807258.047, "dur": 1.475, + "args": { + "External id": 3288546,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595807264.530, "dur": 23.462, + "args": { + "External id": 3288547,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595807292.679, "dur": 4.462, + "args": { + "External id": 3288548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595807293.996, "dur": 2.462, + "args": { + "External id": 3288549,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595807295.534, "dur": 0.720, + "args": { + "External id": 3288550,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595807299.708, "dur": 43.093, + "args": { + "External id": 3288551,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595807300.888, "dur": 41.278, + "args": { + "External id": 3288552,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595807346.555, "dur": 14.748, + "args": { + "External id": 3288553,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595807369.600, "dur": 3.747, + "args": { + "External id": 3288554,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595807371.617, "dur": 0.942, + "args": { + "External id": 3288555,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595807376.948, "dur": 49.813, + "args": { + "External id": 3288556,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595807377.773, "dur": 5.187, + "args": { + "External id": 3288557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595807378.400, "dur": 3.926, + "args": { + "External id": 3288558,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595807379.676, "dur": 2.445, + "args": { + "External id": 3288559,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595807386.026, "dur": 40.202, + "args": { + "External id": 3288560,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595807386.834, "dur": 38.827, + "args": { + "External id": 3288561,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595807431.108, "dur": 4.232, + "args": { + "External id": 3288562,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595807433.122, "dur": 0.919, + "args": { + "External id": 3288563,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595807440.402, "dur": 1.301, + "args": { + "External id": 3288564,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595807448.590, "dur": 5.571, + "args": { + "External id": 3288565,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595807450.206, "dur": 3.693, + "args": { + "External id": 3288566,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595807532.625, "dur": 210.931, + "args": { + "External id": 3288567,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595807534.773, "dur": 1.918, + "args": { + "External id": 3288568,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595807538.156, "dur": 204.895, + "args": { + "External id": 3288569,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595807539.251, "dur": 0.491, + "args": { + "External id": 3288570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595807543.005, "dur": 20.303, + "args": { + "External id": 3288571,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595807564.693, "dur": 3.468, + "args": { + "External id": 3288572,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595807566.673, "dur": 1.285, + "args": { + "External id": 3288573,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595807568.972, "dur": 54.624, + "args": { + "External id": 3288574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595807570.017, "dur": 3.631, + "args": { + "External id": 3288575,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595807575.045, "dur": 48.276, + "args": { + "External id": 3288576,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595807607.312, "dur": 2.877, + "args": { + "External id": 3288577,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595807624.946, "dur": 21.036, + "args": { + "External id": 3288578,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595807647.633, "dur": 13.576, + "args": { + "External id": 3288579,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595807664.152, "dur": 14.274, + "args": { + "External id": 3288580,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595807679.383, "dur": 13.191, + "args": { + "External id": 3288581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595807694.044, "dur": 19.567, + "args": { + "External id": 3288582,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595807695.533, "dur": 1.631, + "args": { + "External id": 3288583,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595807698.859, "dur": 0.694, + "args": { + "External id": 3288584,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595807717.198, "dur": 12.593, + "args": { + "External id": 3288585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595807730.754, "dur": 11.431, + "args": { + "External id": 3288586,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595807749.464, "dur": 1.505, + "args": { + "External id": 3288587,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595807758.169, "dur": 3.020, + "args": { + "External id": 3288588,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595807760.099, "dur": 0.325, + "args": { + "External id": 3288589,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595807819.612, "dur": 49.069, + "args": { + "External id": 3288590,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595807891.898, "dur": 10.493, + "args": { + "External id": 3288591,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595807897.185, "dur": 3.479, + "args": { + "External id": 3288592,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595807903.733, "dur": 28.862, + "args": { + "External id": 3288593,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595807937.717, "dur": 5.532, + "args": { + "External id": 3288594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595807939.407, "dur": 3.154, + "args": { + "External id": 3288595,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595807941.201, "dur": 1.186, + "args": { + "External id": 3288596,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595807946.506, "dur": 79.119, + "args": { + "External id": 3288597,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595807947.557, "dur": 76.855, + "args": { + "External id": 3288598,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595808033.271, "dur": 17.301, + "args": { + "External id": 3288599,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595808057.845, "dur": 4.587, + "args": { + "External id": 3288600,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808060.227, "dur": 1.103, + "args": { + "External id": 3288601,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595808066.509, "dur": 54.568, + "args": { + "External id": 3288602,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 9905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595808067.435, "dur": 5.721, + "args": { + "External id": 3288603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595808068.162, "dur": 4.178, + "args": { + "External id": 3288604,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 9907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808071.627, "dur": 0.602, + "args": { + "External id": 3288605,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 9908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595808073.779, "dur": 46.973, + "args": { + "External id": 3288606,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595808074.676, "dur": 45.333, + "args": { + "External id": 3288607,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 9910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595808124.893, "dur": 4.067, + "args": { + "External id": 3288608,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808127.063, "dur": 0.753, + "args": { + "External id": 3288609,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595808134.981, "dur": 1.521, + "args": { + "External id": 3288610,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 9913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595808145.954, "dur": 6.721, + "args": { + "External id": 3288611,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 9914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595808147.461, "dur": 4.945, + "args": { + "External id": 3288612,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595808236.874, "dur": 196.168, + "args": { + "External id": 3288613,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595808238.745, "dur": 3.813, + "args": { + "External id": 3288614,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595808244.018, "dur": 188.363, + "args": { + "External id": 3288615,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 9918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595808245.432, "dur": 0.523, + "args": { + "External id": 3288616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595808246.969, "dur": 21.496, + "args": { + "External id": 3288617,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 9920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595808272.339, "dur": 5.602, + "args": { + "External id": 3288618,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 9921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808276.650, "dur": 0.983, + "args": { + "External id": 3288619,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 9922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595808279.026, "dur": 21.767, + "args": { + "External id": 3288620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595808280.385, "dur": 1.573, + "args": { + "External id": 3288621,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595808283.168, "dur": 17.331, + "args": { + "External id": 3288622,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 9925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595808285.857, "dur": 2.423, + "args": { + "External id": 3288623,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595808302.014, "dur": 20.556, + "args": { + "External id": 3288624,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595808324.019, "dur": 14.731, + "args": { + "External id": 3288625,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595808341.527, "dur": 14.608, + "args": { + "External id": 3288626,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 9929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595808357.281, "dur": 13.210, + "args": { + "External id": 3288627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 9930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595808374.393, "dur": 23.698, + "args": { + "External id": 3288628,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 9931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595808378.120, "dur": 1.461, + "args": { + "External id": 3288629,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808381.099, "dur": 0.834, + "args": { + "External id": 3288630,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 9933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595808405.592, "dur": 12.939, + "args": { + "External id": 3288631,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 9934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595808419.449, "dur": 11.945, + "args": { + "External id": 3288632,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 9935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595808439.058, "dur": 1.609, + "args": { + "External id": 3288633,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 9936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595808448.726, "dur": 3.516, + "args": { + "External id": 3288634,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 9937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808451.052, "dur": 0.505, + "args": { + "External id": 3288635,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595808514.916, "dur": 47.559, + "args": { + "External id": 3288636,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 9939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595808566.799, "dur": 4.332, + "args": { + "External id": 3288637,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 9940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808569.180, "dur": 1.001, + "args": { + "External id": 3288638,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 9941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595808572.591, "dur": 23.561, + "args": { + "External id": 3288639,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 9942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595808600.408, "dur": 8.261, + "args": { + "External id": 3288640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 9943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595808601.801, "dur": 6.301, + "args": { + "External id": 3288641,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 9944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808606.358, "dur": 1.583, + "args": { + "External id": 3288642,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 9945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595808611.117, "dur": 39.527, + "args": { + "External id": 3288643,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595808612.000, "dur": 37.861, + "args": { + "External id": 3288644,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 9947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595808653.848, "dur": 16.731, + "args": { + "External id": 3288645,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595808675.080, "dur": 25.763, + "args": { + "External id": 3288646,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 9949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595808677.301, "dur": 23.137, + "args": { + "External id": 3288647,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 9950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808684.904, "dur": 0.666, + "args": { + "External id": 3288648,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595808705.652, "dur": 30.298, + "args": { + "External id": 3288649,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595808707.390, "dur": 28.357, + "args": { + "External id": 3288650,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 9953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808713.787, "dur": 4.591, + "args": { + "External id": 3288651,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595808719.487, "dur": 15.738, + "args": { + "External id": 3288652,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 9955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595808747.091, "dur": 4.646, + "args": { + "External id": 3288653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595808748.664, "dur": 2.830, + "args": { + "External id": 3288654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595808752.787, "dur": 1.933, + "args": { + "External id": 3288655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595808753.631, "dur": 1.022, + "args": { + "External id": 3288656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595808796.007, "dur": 24.136, + "args": { + "External id": 3288657,"Sequence number": 32987324, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 9960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595808821.955, "dur": 14.660, + "args": { + "External id": 3288658,"Sequence number": 32987325, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 9961 + } + }, + { + "ph": "s", "id": 9, "pid": 1336756, "tid": 1336756, "ts": 1587595808821.955, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595808842.255, "dur": 5.658, + "args": { + "External id": 3288659,"Sequence number": 32987326, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[16, 4, 4096], [], [], [], []], "Ev Idx": 9962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808845.580, "dur": 1.153, + "args": { + "External id": 3288660,"Record function id": 0, "Concrete Inputs": ["", "[16, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 9963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1336756, + "ts": 1587595808850.268, "dur": 11.195, + "args": { + "External id": 3288661,"Sequence number": 32987326, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "3"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[16, 4, 4096], [], []], "Ev Idx": 9964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808858.745, "dur": 1.376, + "args": { + "External id": 3288662,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[16, 4, 4096], [], [], []], "Ev Idx": 9965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595808862.794, "dur": 5.261, + "args": { + "External id": 3288663,"Sequence number": 32987326, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 9966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808866.739, "dur": 0.730, + "args": { + "External id": 3288664,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 9967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595808895.269, "dur": 7.694, + "args": { + "External id": 3288665,"Sequence number": 32987326, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 9968 + } + }, + { + "ph": "s", "id": 8, "pid": 1336756, "tid": 1336756, "ts": 1587595808895.269, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808899.672, "dur": 1.546, + "args": { + "External id": 3288666,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595808904.023, "dur": 5.813, + "args": { + "External id": 3288667,"Sequence number": 32987327, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], [], []], "Ev Idx": 9970 + } + }, + { + "ph": "s", "id": 7, "pid": 1336756, "tid": 1336756, "ts": 1587595808904.023, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808908.006, "dur": 0.965, + "args": { + "External id": 3288668,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 4, 2048]", "[33554432, 8192, 2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 1336756, "tid": 1336756, + "ts": 1587595808913.077, "dur": 5.820, + "args": { + "External id": 3288669,"Sequence number": 32987328, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], []], "Input Dims": [[16, 4096, 4, 2048], [], []], "Ev Idx": 9972 + } + }, + { + "ph": "s", "id": 6, "pid": 1336756, "tid": 1336756, "ts": 1587595808913.077, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808917.153, "dur": 0.689, + "args": { + "External id": 3288670,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "6144"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 2048, 1], [], [], []], "Input Dims": [[16, 4096, 4, 2048], [], [], []], "Ev Idx": 9973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595808919.915, "dur": 6.419, + "args": { + "External id": 3288671,"Sequence number": 32987329, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], [], []], "Ev Idx": 9974 + } + }, + { + "ph": "s", "id": 5, "pid": 1336756, "tid": 1336756, "ts": 1587595808919.915, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595808922.835, "dur": 2.755, + "args": { + "External id": 3288672,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]", "[33554432, 8192, 1]", "6144"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[33554432, 8192, 1], [], [], []], "Input Dims": [[16, 4096, 2048], [], [], []], "Ev Idx": 9975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336756, "tid": 1336756, + "ts": 1587595808930.481, "dur": 32.855, + "args": { + "External id": 3288673,"Sequence number": 32987330, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1336756, + "ts": 1587595808932.034, "dur": 31.117, + "args": { + "External id": 3288674,"Sequence number": 32987330, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595808934.378, "dur": 7.039, + "args": { + "External id": 3288675,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 9978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595808937.014, "dur": 3.866, + "args": { + "External id": 3288676,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595808942.525, "dur": 20.137, + "args": { + "External id": 3288677,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 9980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595809027.152, "dur": 5.056, + "args": { + "External id": 3288678,"Sequence number": 32987330, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[33554432, 8192, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 9981 + } + }, + { + "ph": "s", "id": 4, "pid": 1336756, "tid": 1336756, "ts": 1587595809027.152, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595809034.940, "dur": 1.290, + "args": { + "External id": 3288679,"Sequence number": 32987331, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 9982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 1336756, "tid": 1336756, + "ts": 1587595809069.275, "dur": 43870.713, + "args": { + "External id": 3288680,"Sequence number": 32987331, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [1], [2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536], [32000, 2048], [], [], [], [], []], "Ev Idx": 9983 + } + }, + { + "ph": "s", "id": 3, "pid": 1336756, "tid": 1336756, "ts": 1587595809069.275, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 1336756, "tid": 1336756, + "ts": 1587595809084.921, "dur": 35.989, + "args": { + "External id": 3288681,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1336756, + "ts": 1587595809085.773, "dur": 34.872, + "args": { + "External id": 3288682,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595809087.372, "dur": 10.983, + "args": { + "External id": 3288683,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595809091.312, "dur": 6.634, + "args": { + "External id": 3288684,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595809099.091, "dur": 20.920, + "args": { + "External id": 3288685,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [8192, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 9988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595809136.973, "dur": 27.400, + "args": { + "External id": 3288686,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595809137.874, "dur": 7.003, + "args": { + "External id": 3288687,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 9990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595809140.082, "dur": 4.478, + "args": { + "External id": 3288688,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587595809146.272, "dur": 17.820, + "args": { + "External id": 3288689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 9992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595809147.751, "dur": 15.754, + "args": { + "External id": 3288690,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 9993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595809167.494, "dur": 22.546, + "args": { + "External id": 3288691,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595809168.374, "dur": 4.496, + "args": { + "External id": 3288692,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 9995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595809169.819, "dur": 2.801, + "args": { + "External id": 3288693,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587595809173.378, "dur": 16.398, + "args": { + "External id": 3288694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 9997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595809176.346, "dur": 13.050, + "args": { + "External id": 3288695,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 9998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1336756, + "ts": 1587595809196.036, "dur": 18.093, + "args": { + "External id": 3288696,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595809197.422, "dur": 3.652, + "args": { + "External id": 3288697,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587595809201.650, "dur": 12.172, + "args": { + "External id": 3288698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 10001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595809202.217, "dur": 11.305, + "args": { + "External id": 3288699,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 10002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336756, "tid": 1336756, + "ts": 1587595809218.783, "dur": 22.396, + "args": { + "External id": 3288700,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 10003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595809243.544, "dur": 51.353, + "args": { + "External id": 3288701,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 10004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595809245.538, "dur": 48.832, + "args": { + "External id": 3288702,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595809250.157, "dur": 2.607, + "args": { + "External id": 3288703,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595809254.097, "dur": 24.899, + "args": { + "External id": 3288704,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595809258.002, "dur": 20.552, + "args": { + "External id": 3288705,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 10008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595809259.986, "dur": 2.823, + "args": { + "External id": 3288706,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595809263.676, "dur": 14.405, + "args": { + "External id": 3288707,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 10010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336756, "tid": 1336756, + "ts": 1587595809298.429, "dur": 37981.025, + "args": { + "External id": 3288708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 10011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336756, "tid": 1336756, + "ts": 1587595809300.263, "dur": 37977.490, + "args": { + "External id": 3288709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 10012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595847290.986, "dur": 7.207, + "args": { + "External id": 3288710,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595847295.360, "dur": 1.105, + "args": { + "External id": 3288711,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595847303.452, "dur": 108.180, + "args": { + "External id": 3288712,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595847307.496, "dur": 6.415, + "args": { + "External id": 3288713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595847310.018, "dur": 3.015, + "args": { + "External id": 3288714,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595847311.796, "dur": 0.938, + "args": { + "External id": 3288715,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595847315.072, "dur": 95.820, + "args": { + "External id": 3288716,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595847316.656, "dur": 93.369, + "args": { + "External id": 3288717,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595847415.116, "dur": 4.660, + "args": { + "External id": 3288718,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595847417.201, "dur": 1.003, + "args": { + "External id": 3288719,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595847429.702, "dur": 2.486, + "args": { + "External id": 3288720,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595847441.167, "dur": 6.806, + "args": { + "External id": 3288721,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595847443.409, "dur": 4.290, + "args": { + "External id": 3288722,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595847576.371, "dur": 213.490, + "args": { + "External id": 3288723,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595847581.887, "dur": 2.000, + "args": { + "External id": 3288724,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595847585.465, "dur": 204.031, + "args": { + "External id": 3288725,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595847589.385, "dur": 0.613, + "args": { + "External id": 3288726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595847591.675, "dur": 26.179, + "args": { + "External id": 3288727,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595847619.433, "dur": 6.222, + "args": { + "External id": 3288728,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595847624.354, "dur": 0.986, + "args": { + "External id": 3288729,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595847626.772, "dur": 25.524, + "args": { + "External id": 3288730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595847627.748, "dur": 1.970, + "args": { + "External id": 3288731,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595847631.007, "dur": 20.947, + "args": { + "External id": 3288732,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595847635.312, "dur": 3.080, + "args": { + "External id": 3288733,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595847653.817, "dur": 22.245, + "args": { + "External id": 3288734,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595847677.783, "dur": 16.591, + "args": { + "External id": 3288735,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595847700.081, "dur": 15.860, + "args": { + "External id": 3288736,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595847717.517, "dur": 14.818, + "args": { + "External id": 3288737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595847734.247, "dur": 23.838, + "args": { + "External id": 3288738,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595847738.712, "dur": 1.464, + "args": { + "External id": 3288739,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595847741.952, "dur": 0.907, + "args": { + "External id": 3288740,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595847759.761, "dur": 12.768, + "args": { + "External id": 3288741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595847776.311, "dur": 12.030, + "args": { + "External id": 3288742,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595847796.451, "dur": 2.117, + "args": { + "External id": 3288743,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595847805.527, "dur": 4.024, + "args": { + "External id": 3288744,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595847807.813, "dur": 0.761, + "args": { + "External id": 3288745,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595847899.895, "dur": 71.338, + "args": { + "External id": 3288746,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595847977.502, "dur": 37.992, + "args": { + "External id": 3288747,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595847980.809, "dur": 31.334, + "args": { + "External id": 3288748,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595848017.962, "dur": 31.370, + "args": { + "External id": 3288749,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595848058.071, "dur": 7.101, + "args": { + "External id": 3288750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595848060.100, "dur": 4.147, + "args": { + "External id": 3288751,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595848062.525, "dur": 1.448, + "args": { + "External id": 3288752,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595848068.705, "dur": 52.057, + "args": { + "External id": 3288753,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595848070.433, "dur": 49.677, + "args": { + "External id": 3288754,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595848125.629, "dur": 18.639, + "args": { + "External id": 3288755,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595848150.332, "dur": 6.662, + "args": { + "External id": 3288756,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595848155.157, "dur": 0.934, + "args": { + "External id": 3288757,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595848161.738, "dur": 52.181, + "args": { + "External id": 3288758,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595848162.693, "dur": 4.365, + "args": { + "External id": 3288759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595848163.656, "dur": 2.820, + "args": { + "External id": 3288760,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595848165.183, "dur": 1.134, + "args": { + "External id": 3288761,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595848167.858, "dur": 45.678, + "args": { + "External id": 3288762,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595848171.055, "dur": 41.842, + "args": { + "External id": 3288763,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595848217.749, "dur": 3.966, + "args": { + "External id": 3288764,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595848219.937, "dur": 0.739, + "args": { + "External id": 3288765,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595848228.245, "dur": 1.859, + "args": { + "External id": 3288766,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595848238.237, "dur": 9.286, + "args": { + "External id": 3288767,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595848242.833, "dur": 4.400, + "args": { + "External id": 3288768,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595848344.752, "dur": 200.377, + "args": { + "External id": 3288769,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595848349.500, "dur": 2.500, + "args": { + "External id": 3288770,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595848353.456, "dur": 191.157, + "args": { + "External id": 3288771,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595848355.056, "dur": 0.491, + "args": { + "External id": 3288772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595848356.601, "dur": 23.019, + "args": { + "External id": 3288773,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595848381.317, "dur": 5.466, + "args": { + "External id": 3288774,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595848385.671, "dur": 0.892, + "args": { + "External id": 3288775,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595848387.749, "dur": 25.480, + "args": { + "External id": 3288776,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595848391.470, "dur": 1.979, + "args": { + "External id": 3288777,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595848394.648, "dur": 18.351, + "args": { + "External id": 3288778,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595848397.779, "dur": 3.076, + "args": { + "External id": 3288779,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595848414.746, "dur": 25.445, + "args": { + "External id": 3288780,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595848441.960, "dur": 14.070, + "args": { + "External id": 3288781,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595848458.999, "dur": 15.569, + "args": { + "External id": 3288782,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595848476.137, "dur": 14.518, + "args": { + "External id": 3288783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595848491.993, "dur": 24.031, + "args": { + "External id": 3288784,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595848494.134, "dur": 2.127, + "args": { + "External id": 3288785,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595848500.441, "dur": 0.825, + "args": { + "External id": 3288786,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595848517.677, "dur": 12.666, + "args": { + "External id": 3288787,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595848531.547, "dur": 11.926, + "args": { + "External id": 3288788,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595848551.275, "dur": 1.711, + "args": { + "External id": 3288789,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595848561.564, "dur": 3.476, + "args": { + "External id": 3288790,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595848563.866, "dur": 0.411, + "args": { + "External id": 3288791,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595848629.700, "dur": 51.429, + "args": { + "External id": 3288792,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595848686.055, "dur": 6.648, + "args": { + "External id": 3288793,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595848690.876, "dur": 0.856, + "args": { + "External id": 3288794,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595848694.066, "dur": 23.586, + "args": { + "External id": 3288795,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595848721.645, "dur": 5.664, + "args": { + "External id": 3288796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595848723.189, "dur": 3.584, + "args": { + "External id": 3288797,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595848724.909, "dur": 1.696, + "args": { + "External id": 3288798,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595848729.894, "dur": 42.720, + "args": { + "External id": 3288799,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595848733.677, "dur": 38.185, + "args": { + "External id": 3288800,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595848776.379, "dur": 15.645, + "args": { + "External id": 3288801,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595848797.726, "dur": 4.145, + "args": { + "External id": 3288802,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595848800.004, "dur": 1.142, + "args": { + "External id": 3288803,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595848805.394, "dur": 49.227, + "args": { + "External id": 3288804,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595848806.136, "dur": 6.174, + "args": { + "External id": 3288805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595848807.043, "dur": 4.729, + "args": { + "External id": 3288806,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595848810.768, "dur": 0.855, + "args": { + "External id": 3288807,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595848812.882, "dur": 41.436, + "args": { + "External id": 3288808,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595848813.557, "dur": 40.328, + "args": { + "External id": 3288809,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595848858.347, "dur": 5.838, + "args": { + "External id": 3288810,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595848860.492, "dur": 2.691, + "args": { + "External id": 3288811,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595848869.185, "dur": 17.511, + "args": { + "External id": 3288812,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595848896.572, "dur": 8.447, + "args": { + "External id": 3288813,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595848900.389, "dur": 4.382, + "args": { + "External id": 3288814,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595849026.692, "dur": 197.485, + "args": { + "External id": 3288815,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595849029.203, "dur": 3.369, + "args": { + "External id": 3288816,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595849034.457, "dur": 189.162, + "args": { + "External id": 3288817,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595849035.781, "dur": 0.311, + "args": { + "External id": 3288818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595849036.988, "dur": 28.940, + "args": { + "External id": 3288819,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595849067.740, "dur": 3.327, + "args": { + "External id": 3288820,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595849070.085, "dur": 0.686, + "args": { + "External id": 3288821,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595849072.395, "dur": 28.848, + "args": { + "External id": 3288822,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595849075.596, "dur": 1.689, + "args": { + "External id": 3288823,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595849078.444, "dur": 22.508, + "args": { + "External id": 3288824,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595849083.711, "dur": 2.954, + "args": { + "External id": 3288825,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595849102.607, "dur": 20.548, + "args": { + "External id": 3288826,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595849124.724, "dur": 15.742, + "args": { + "External id": 3288827,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595849143.326, "dur": 14.616, + "args": { + "External id": 3288828,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595849159.453, "dur": 13.364, + "args": { + "External id": 3288829,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595849174.190, "dur": 22.569, + "args": { + "External id": 3288830,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595849176.146, "dur": 1.763, + "args": { + "External id": 3288831,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595849181.875, "dur": 1.125, + "args": { + "External id": 3288832,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595849198.041, "dur": 12.970, + "args": { + "External id": 3288833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595849212.056, "dur": 10.589, + "args": { + "External id": 3288834,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595849231.165, "dur": 1.947, + "args": { + "External id": 3288835,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595849242.696, "dur": 3.557, + "args": { + "External id": 3288836,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595849244.990, "dur": 0.341, + "args": { + "External id": 3288837,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595849316.009, "dur": 85.655, + "args": { + "External id": 3288838,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595849408.936, "dur": 5.863, + "args": { + "External id": 3288839,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595849411.745, "dur": 1.957, + "args": { + "External id": 3288840,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595849416.233, "dur": 26.212, + "args": { + "External id": 3288841,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595849446.847, "dur": 5.150, + "args": { + "External id": 3288842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595849448.285, "dur": 3.142, + "args": { + "External id": 3288843,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595849450.091, "dur": 1.153, + "args": { + "External id": 3288844,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595849457.324, "dur": 55.591, + "args": { + "External id": 3288845,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595849458.205, "dur": 54.008, + "args": { + "External id": 3288846,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595849516.452, "dur": 24.982, + "args": { + "External id": 3288847,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595849546.588, "dur": 3.657, + "args": { + "External id": 3288848,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595849548.597, "dur": 0.876, + "args": { + "External id": 3288849,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595849554.214, "dur": 53.692, + "args": { + "External id": 3288850,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595849555.197, "dur": 8.838, + "args": { + "External id": 3288851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595849558.728, "dur": 4.678, + "args": { + "External id": 3288852,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595849562.451, "dur": 0.846, + "args": { + "External id": 3288853,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595849564.592, "dur": 42.921, + "args": { + "External id": 3288854,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595849565.517, "dur": 41.224, + "args": { + "External id": 3288855,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595849611.328, "dur": 4.242, + "args": { + "External id": 3288856,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595849613.485, "dur": 0.889, + "args": { + "External id": 3288857,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595849622.676, "dur": 1.384, + "args": { + "External id": 3288858,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595849631.672, "dur": 5.380, + "args": { + "External id": 3288859,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595849633.273, "dur": 3.506, + "args": { + "External id": 3288860,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595849715.058, "dur": 220.632, + "args": { + "External id": 3288861,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595849716.978, "dur": 2.113, + "args": { + "External id": 3288862,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595849720.931, "dur": 214.260, + "args": { + "External id": 3288863,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595849726.608, "dur": 0.243, + "args": { + "External id": 3288864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595849742.349, "dur": 21.572, + "args": { + "External id": 3288865,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595849765.240, "dur": 5.283, + "args": { + "External id": 3288866,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595849767.154, "dur": 3.053, + "args": { + "External id": 3288867,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595849771.417, "dur": 21.161, + "args": { + "External id": 3288868,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595849772.635, "dur": 1.623, + "args": { + "External id": 3288869,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595849775.257, "dur": 17.057, + "args": { + "External id": 3288870,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595849777.626, "dur": 2.260, + "args": { + "External id": 3288871,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595849793.669, "dur": 20.944, + "args": { + "External id": 3288872,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595849816.089, "dur": 13.196, + "args": { + "External id": 3288873,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595849833.715, "dur": 14.178, + "args": { + "External id": 3288874,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595849849.042, "dur": 12.463, + "args": { + "External id": 3288875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595849862.997, "dur": 41.100, + "args": { + "External id": 3288876,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595849864.950, "dur": 1.729, + "args": { + "External id": 3288877,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595849868.449, "dur": 17.809, + "args": { + "External id": 3288878,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595849906.461, "dur": 12.704, + "args": { + "External id": 3288879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595849922.288, "dur": 11.720, + "args": { + "External id": 3288880,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595849943.057, "dur": 2.371, + "args": { + "External id": 3288881,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595849954.803, "dur": 4.125, + "args": { + "External id": 3288882,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595849957.548, "dur": 0.645, + "args": { + "External id": 3288883,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595850064.373, "dur": 60.517, + "args": { + "External id": 3288884,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595850130.802, "dur": 6.094, + "args": { + "External id": 3288885,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595850134.234, "dur": 1.344, + "args": { + "External id": 3288886,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595850138.365, "dur": 26.877, + "args": { + "External id": 3288887,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595850170.745, "dur": 8.796, + "args": { + "External id": 3288888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595850174.877, "dur": 3.784, + "args": { + "External id": 3288889,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595850176.670, "dur": 1.738, + "args": { + "External id": 3288890,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595850182.545, "dur": 41.755, + "args": { + "External id": 3288891,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595850183.780, "dur": 39.853, + "args": { + "External id": 3288892,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595850227.903, "dur": 16.474, + "args": { + "External id": 3288893,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595850250.281, "dur": 6.380, + "args": { + "External id": 3288894,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595850254.971, "dur": 0.912, + "args": { + "External id": 3288895,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595850260.825, "dur": 47.187, + "args": { + "External id": 3288896,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595850261.702, "dur": 3.712, + "args": { + "External id": 3288897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595850262.480, "dur": 2.343, + "args": { + "External id": 3288898,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595850263.793, "dur": 0.900, + "args": { + "External id": 3288899,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595850265.946, "dur": 41.602, + "args": { + "External id": 3288900,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595850266.806, "dur": 40.138, + "args": { + "External id": 3288901,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595850311.474, "dur": 4.381, + "args": { + "External id": 3288902,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595850313.402, "dur": 1.412, + "args": { + "External id": 3288903,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595850323.799, "dur": 1.620, + "args": { + "External id": 3288904,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595850332.724, "dur": 7.036, + "args": { + "External id": 3288905,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595850334.430, "dur": 5.042, + "args": { + "External id": 3288906,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595850425.009, "dur": 196.311, + "args": { + "External id": 3288907,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595850429.434, "dur": 2.210, + "args": { + "External id": 3288908,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595850435.273, "dur": 185.380, + "args": { + "External id": 3288909,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595850436.379, "dur": 0.320, + "args": { + "External id": 3288910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595850437.722, "dur": 20.833, + "args": { + "External id": 3288911,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595850460.146, "dur": 5.340, + "args": { + "External id": 3288912,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595850464.372, "dur": 0.840, + "args": { + "External id": 3288913,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595850471.164, "dur": 22.710, + "args": { + "External id": 3288914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595850472.268, "dur": 1.958, + "args": { + "External id": 3288915,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595850475.293, "dur": 18.303, + "args": { + "External id": 3288916,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595850478.105, "dur": 2.554, + "args": { + "External id": 3288917,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595850495.191, "dur": 20.533, + "args": { + "External id": 3288918,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595850517.048, "dur": 15.788, + "args": { + "External id": 3288919,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595850537.857, "dur": 13.928, + "args": { + "External id": 3288920,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595850553.053, "dur": 12.768, + "args": { + "External id": 3288921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595850567.562, "dur": 23.715, + "args": { + "External id": 3288922,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595850571.657, "dur": 2.087, + "args": { + "External id": 3288923,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595850575.904, "dur": 1.016, + "args": { + "External id": 3288924,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595850592.537, "dur": 12.967, + "args": { + "External id": 3288925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595850608.940, "dur": 10.726, + "args": { + "External id": 3288926,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595850627.064, "dur": 1.812, + "args": { + "External id": 3288927,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595850637.329, "dur": 4.362, + "args": { + "External id": 3288928,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595850639.996, "dur": 0.782, + "args": { + "External id": 3288929,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595850705.171, "dur": 46.642, + "args": { + "External id": 3288930,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595850756.355, "dur": 4.870, + "args": { + "External id": 3288931,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595850759.169, "dur": 1.102, + "args": { + "External id": 3288932,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595850762.498, "dur": 22.136, + "args": { + "External id": 3288933,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595850790.858, "dur": 4.793, + "args": { + "External id": 3288934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595850791.990, "dur": 3.084, + "args": { + "External id": 3288935,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595850793.699, "dur": 1.218, + "args": { + "External id": 3288936,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595850797.730, "dur": 40.419, + "args": { + "External id": 3288937,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595850798.611, "dur": 38.991, + "args": { + "External id": 3288938,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595850841.705, "dur": 15.112, + "args": { + "External id": 3288939,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595850862.099, "dur": 5.646, + "args": { + "External id": 3288940,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595850866.194, "dur": 0.773, + "args": { + "External id": 3288941,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595850887.907, "dur": 56.270, + "args": { + "External id": 3288942,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595850888.680, "dur": 5.069, + "args": { + "External id": 3288943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595850889.790, "dur": 3.224, + "args": { + "External id": 3288944,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595850891.517, "dur": 1.190, + "args": { + "External id": 3288945,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595850894.358, "dur": 49.370, + "args": { + "External id": 3288946,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595850897.541, "dur": 45.488, + "args": { + "External id": 3288947,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595850948.640, "dur": 4.163, + "args": { + "External id": 3288948,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595850950.785, "dur": 0.958, + "args": { + "External id": 3288949,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595850958.750, "dur": 1.345, + "args": { + "External id": 3288950,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595850967.580, "dur": 9.152, + "args": { + "External id": 3288951,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595850972.134, "dur": 4.332, + "args": { + "External id": 3288952,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595851100.694, "dur": 196.546, + "args": { + "External id": 3288953,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595851105.463, "dur": 3.242, + "args": { + "External id": 3288954,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595851109.810, "dur": 186.740, + "args": { + "External id": 3288955,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595851111.230, "dur": 0.549, + "args": { + "External id": 3288956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595851113.163, "dur": 23.844, + "args": { + "External id": 3288957,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595851138.704, "dur": 5.656, + "args": { + "External id": 3288958,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595851142.990, "dur": 1.092, + "args": { + "External id": 3288959,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595851145.226, "dur": 25.760, + "args": { + "External id": 3288960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595851148.871, "dur": 2.092, + "args": { + "External id": 3288961,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595851151.966, "dur": 18.740, + "args": { + "External id": 3288962,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595851155.116, "dur": 2.544, + "args": { + "External id": 3288963,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595851172.388, "dur": 21.852, + "args": { + "External id": 3288964,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595851195.468, "dur": 16.069, + "args": { + "External id": 3288965,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595851214.427, "dur": 15.275, + "args": { + "External id": 3288966,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595851231.256, "dur": 12.942, + "args": { + "External id": 3288967,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595851245.605, "dur": 22.938, + "args": { + "External id": 3288968,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595851247.305, "dur": 1.819, + "args": { + "External id": 3288969,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595851252.975, "dur": 0.992, + "args": { + "External id": 3288970,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595851269.843, "dur": 13.044, + "args": { + "External id": 3288971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595851283.916, "dur": 11.561, + "args": { + "External id": 3288972,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595851303.844, "dur": 1.840, + "args": { + "External id": 3288973,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595851314.232, "dur": 3.722, + "args": { + "External id": 3288974,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595851316.755, "dur": 0.392, + "args": { + "External id": 3288975,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595851378.702, "dur": 53.952, + "args": { + "External id": 3288976,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595851437.618, "dur": 7.030, + "args": { + "External id": 3288977,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595851442.821, "dur": 0.879, + "args": { + "External id": 3288978,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595851446.002, "dur": 25.377, + "args": { + "External id": 3288979,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595851475.603, "dur": 5.340, + "args": { + "External id": 3288980,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595851477.068, "dur": 3.290, + "args": { + "External id": 3288981,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595851478.695, "dur": 1.512, + "args": { + "External id": 3288982,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595851483.514, "dur": 42.280, + "args": { + "External id": 3288983,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595851486.940, "dur": 38.258, + "args": { + "External id": 3288984,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595851529.546, "dur": 15.539, + "args": { + "External id": 3288985,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595851550.689, "dur": 3.690, + "args": { + "External id": 3288986,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595851552.698, "dur": 0.920, + "args": { + "External id": 3288987,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595851557.940, "dur": 48.377, + "args": { + "External id": 3288988,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595851558.704, "dur": 5.944, + "args": { + "External id": 3288989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595851559.410, "dur": 4.714, + "args": { + "External id": 3288990,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595851562.846, "dur": 1.149, + "args": { + "External id": 3288991,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595851565.155, "dur": 40.789, + "args": { + "External id": 3288992,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595851565.983, "dur": 39.435, + "args": { + "External id": 3288993,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595851610.587, "dur": 3.705, + "args": { + "External id": 3288994,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595851612.563, "dur": 0.777, + "args": { + "External id": 3288995,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595851619.425, "dur": 1.615, + "args": { + "External id": 3288996,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595851627.904, "dur": 10.159, + "args": { + "External id": 3288997,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595851631.569, "dur": 6.179, + "args": { + "External id": 3288998,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595851713.388, "dur": 196.320, + "args": { + "External id": 3288999,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595851715.510, "dur": 1.852, + "args": { + "External id": 3289000,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595851719.409, "dur": 189.672, + "args": { + "External id": 3289001,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595851720.525, "dur": 0.181, + "args": { + "External id": 3289002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595851721.562, "dur": 22.168, + "args": { + "External id": 3289003,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595851745.553, "dur": 3.224, + "args": { + "External id": 3289004,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595851747.651, "dur": 0.769, + "args": { + "External id": 3289005,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595851749.653, "dur": 24.762, + "args": { + "External id": 3289006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595851752.792, "dur": 1.577, + "args": { + "External id": 3289007,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595851755.456, "dur": 18.674, + "args": { + "External id": 3289008,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595851759.706, "dur": 2.134, + "args": { + "External id": 3289009,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595851775.672, "dur": 19.534, + "args": { + "External id": 3289010,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595851796.518, "dur": 13.395, + "args": { + "External id": 3289011,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595851812.390, "dur": 12.770, + "args": { + "External id": 3289012,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595851826.219, "dur": 13.643, + "args": { + "External id": 3289013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595851841.031, "dur": 21.811, + "args": { + "External id": 3289014,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595851842.964, "dur": 1.548, + "args": { + "External id": 3289015,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595851848.628, "dur": 0.860, + "args": { + "External id": 3289016,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595851864.044, "dur": 29.122, + "args": { + "External id": 3289017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595851895.113, "dur": 12.744, + "args": { + "External id": 3289018,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595851916.582, "dur": 2.149, + "args": { + "External id": 3289019,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595851927.289, "dur": 3.367, + "args": { + "External id": 3289020,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595851929.512, "dur": 0.410, + "args": { + "External id": 3289021,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595852031.898, "dur": 54.969, + "args": { + "External id": 3289022,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595852094.892, "dur": 6.086, + "args": { + "External id": 3289023,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595852097.903, "dur": 1.709, + "args": { + "External id": 3289024,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595852102.332, "dur": 24.685, + "args": { + "External id": 3289025,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595852131.337, "dur": 5.219, + "args": { + "External id": 3289026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595852133.094, "dur": 2.769, + "args": { + "External id": 3289027,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595852134.640, "dur": 1.025, + "args": { + "External id": 3289028,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595852141.943, "dur": 40.822, + "args": { + "External id": 3289029,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595852142.936, "dur": 39.159, + "args": { + "External id": 3289030,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595852186.525, "dur": 15.613, + "args": { + "External id": 3289031,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595852207.828, "dur": 4.130, + "args": { + "External id": 3289032,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595852210.067, "dur": 1.089, + "args": { + "External id": 3289033,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 1336756, "tid": 1336756, + "ts": 1587595852215.939, "dur": 51.511, + "args": { + "External id": 3289034,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 10337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595852216.843, "dur": 7.944, + "args": { + "External id": 3289035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595852220.049, "dur": 4.153, + "args": { + "External id": 3289036,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 10339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595852221.371, "dur": 2.604, + "args": { + "External id": 3289037,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 10340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595852225.353, "dur": 41.375, + "args": { + "External id": 3289038,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595852226.115, "dur": 40.015, + "args": { + "External id": 3289039,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 10342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595852271.143, "dur": 4.291, + "args": { + "External id": 3289040,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595852273.130, "dur": 1.021, + "args": { + "External id": 3289041,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595852283.229, "dur": 1.567, + "args": { + "External id": 3289042,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 10345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595852292.039, "dur": 6.251, + "args": { + "External id": 3289043,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 10346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595852293.732, "dur": 4.203, + "args": { + "External id": 3289044,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595852376.600, "dur": 191.303, + "args": { + "External id": 3289045,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595852378.948, "dur": 1.896, + "args": { + "External id": 3289046,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 1336756, "tid": 1336756, + "ts": 1587595852382.640, "dur": 184.769, + "args": { + "External id": 3289047,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 10350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 1336756, "tid": 1336756, + "ts": 1587595852386.351, "dur": 0.562, + "args": { + "External id": 3289048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 1336756, "tid": 1336756, + "ts": 1587595852390.038, "dur": 21.732, + "args": { + "External id": 3289049,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 10352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 1336756, "tid": 1336756, + "ts": 1587595852413.397, "dur": 3.066, + "args": { + "External id": 3289050,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 10353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595852415.450, "dur": 0.758, + "args": { + "External id": 3289051,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 10354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595852417.193, "dur": 24.898, + "args": { + "External id": 3289052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587595852418.558, "dur": 3.524, + "args": { + "External id": 3289053,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587595852423.142, "dur": 18.463, + "args": { + "External id": 3289054,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 10357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595852425.604, "dur": 2.652, + "args": { + "External id": 3289055,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587595852443.343, "dur": 21.537, + "args": { + "External id": 3289056,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595852466.212, "dur": 13.814, + "args": { + "External id": 3289057,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 1336756, "tid": 1336756, + "ts": 1587595852484.730, "dur": 15.410, + "args": { + "External id": 3289058,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 10361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 1336756, "tid": 1336756, + "ts": 1587595852501.497, "dur": 13.285, + "args": { + "External id": 3289059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 10362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595852515.950, "dur": 21.103, + "args": { + "External id": 3289060,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 10363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587595852517.689, "dur": 1.689, + "args": { + "External id": 3289061,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 10364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595852521.069, "dur": 0.887, + "args": { + "External id": 3289062,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 10365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 1336756, "tid": 1336756, + "ts": 1587595852538.586, "dur": 11.738, + "args": { + "External id": 3289063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 10366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595852553.627, "dur": 12.762, + "args": { + "External id": 3289064,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 10367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587595852573.162, "dur": 1.785, + "args": { + "External id": 3289065,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 10368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595852583.154, "dur": 3.330, + "args": { + "External id": 3289066,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 10369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595852585.235, "dur": 0.528, + "args": { + "External id": 3289067,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595852648.229, "dur": 50.174, + "args": { + "External id": 3289068,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 10371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 1336756, "tid": 1336756, + "ts": 1587595852702.684, "dur": 4.669, + "args": { + "External id": 3289069,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 10372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595852705.264, "dur": 1.143, + "args": { + "External id": 3289070,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 10373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595852708.797, "dur": 25.033, + "args": { + "External id": 3289071,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 10374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 1336756, "tid": 1336756, + "ts": 1587595852737.925, "dur": 9.486, + "args": { + "External id": 3289072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 10375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 1336756, "tid": 1336756, + "ts": 1587595852741.819, "dur": 4.885, + "args": { + "External id": 3289073,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 10376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595852743.529, "dur": 2.980, + "args": { + "External id": 3289074,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 10377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 1336756, "tid": 1336756, + "ts": 1587595852749.679, "dur": 40.704, + "args": { + "External id": 3289075,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 1336756, "tid": 1336756, + "ts": 1587595852750.895, "dur": 38.729, + "args": { + "External id": 3289076,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 10379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595852794.014, "dur": 15.245, + "args": { + "External id": 3289077,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595852813.965, "dur": 24.927, + "args": { + "External id": 3289078,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 10381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 1336756, "tid": 1336756, + "ts": 1587595852816.366, "dur": 22.192, + "args": { + "External id": 3289079,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 10382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595852822.082, "dur": 0.972, + "args": { + "External id": 3289080,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587595852843.338, "dur": 48.374, + "args": { + "External id": 3289081,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 10384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 1336756, "tid": 1336756, + "ts": 1587595852847.447, "dur": 44.062, + "args": { + "External id": 3289082,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 10385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595852851.952, "dur": 4.330, + "args": { + "External id": 3289083,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587595852857.463, "dur": 33.155, + "args": { + "External id": 3289084,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 10387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595852906.180, "dur": 5.559, + "args": { + "External id": 3289085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 10388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595852907.980, "dur": 3.482, + "args": { + "External id": 3289086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 10389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595852912.803, "dur": 1.511, + "args": { + "External id": 3289087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 1336756, "tid": 1336756, + "ts": 1587595852913.677, "dur": 0.550, + "args": { + "External id": 3289088,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 10391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595853026.840, "dur": 32.909, + "args": { + "External id": 3289089,"Sequence number": 32987332, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 10392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 1336756, "tid": 1336756, + "ts": 1587595853062.711, "dur": 14.999, + "args": { + "External id": 3289090,"Sequence number": 32987333, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 10393 + } + }, + { + "ph": "s", "id": 2, "pid": 1336756, "tid": 1336756, "ts": 1587595853062.711, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 1336756, "tid": 1336756, + "ts": 1587595853188.432, "dur": 41.314, + "args": { + "External id": 3289091,"Record function id": 0, "Ev Idx": 10394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 1336756, "tid": 1336756, + "ts": 1587595853360.130, "dur": 36.222, + "args": { + "External id": 3289092,"Sequence number": 32987334, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 10395 + } + }, + { + "ph": "s", "id": 1, "pid": 1336756, "tid": 1336756, "ts": 1587595853360.130, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595853432.099, "dur": 28.266, + "args": { + "External id": 3289093,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 10396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587595853433.411, "dur": 8.689, + "args": { + "External id": 3289094,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 10397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587595853437.244, "dur": 4.226, + "args": { + "External id": 3289095,"Record function id": 0, "Concrete Inputs": ["[1]", "[1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587595853443.522, "dur": 16.447, + "args": { + "External id": 3289096,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 10399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 1336756, "tid": 1336756, + "ts": 1587597186726.148, "dur": 52.852, + "args": { + "External id": 3289097,"Sequence number": 32987335, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 1336756, "tid": 1336756, + "ts": 1587597186787.561, "dur": 21.342, + "args": { + "External id": 3289098,"Sequence number": 32987336, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 10401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1336756, + "ts": 1587597186815.774, "dur": 21.784, + "args": { + "External id": 3289099,"Sequence number": 32987337, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 10402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1336756, + "ts": 1587597187409.349, "dur": 30.182, + "args": { + "External id": 3289100,"Sequence number": 32987338, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 10403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1336756, + "ts": 1587597187445.786, "dur": 14.474, + "args": { + "External id": 3289101,"Sequence number": 32987339, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 10404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 1336756, "tid": 1336756, + "ts": 1587597189276.826, "dur": 3012.371, + "args": { + "External id": 3289102,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 10405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 1336756, "tid": 1336756, + "ts": 1587597189844.165, "dur": 909.453, + "args": { + "External id": 3289103,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 10406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 1336756, "tid": 1336756, + "ts": 1587597189881.098, "dur": 63.650, + "args": { + "External id": 3289104,"Record function id": 0, "Concrete Inputs": ["[36375]", "6", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 10407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587597189884.213, "dur": 11.947, + "args": { + "External id": 3289105,"Record function id": 0, "Concrete Inputs": ["[36375]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 1336756, "tid": 1336756, + "ts": 1587597189899.118, "dur": 45.326, + "args": { + "External id": 3289106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[36375]], "Ev Idx": 10409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 1336756, "tid": 1336756, + "ts": 1587597189901.109, "dur": 42.698, + "args": { + "External id": 3289107,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[36375], []], "Ev Idx": 10410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192319.954, "dur": 3.503, + "args": { + "External id": 3289108,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192325.739, "dur": 0.327, + "args": { + "External id": 3289109,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192327.237, "dur": 0.275, + "args": { + "External id": 3289110,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192328.597, "dur": 0.227, + "args": { + "External id": 3289111,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192330.150, "dur": 0.226, + "args": { + "External id": 3289112,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192335.510, "dur": 0.211, + "args": { + "External id": 3289113,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192336.816, "dur": 0.347, + "args": { + "External id": 3289114,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192340.436, "dur": 0.361, + "args": { + "External id": 3289115,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192341.833, "dur": 0.467, + "args": { + "External id": 3289116,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192343.550, "dur": 0.242, + "args": { + "External id": 3289117,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192344.689, "dur": 0.226, + "args": { + "External id": 3289118,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192345.850, "dur": 0.203, + "args": { + "External id": 3289119,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192347.050, "dur": 0.339, + "args": { + "External id": 3289120,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192348.526, "dur": 0.197, + "args": { + "External id": 3289121,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192349.779, "dur": 0.253, + "args": { + "External id": 3289122,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192352.941, "dur": 0.208, + "args": { + "External id": 3289123,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192354.067, "dur": 0.214, + "args": { + "External id": 3289124,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192355.396, "dur": 0.205, + "args": { + "External id": 3289125,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192356.710, "dur": 0.220, + "args": { + "External id": 3289126,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192357.706, "dur": 0.200, + "args": { + "External id": 3289127,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192359.205, "dur": 0.199, + "args": { + "External id": 3289128,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192360.554, "dur": 0.204, + "args": { + "External id": 3289129,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192361.524, "dur": 0.203, + "args": { + "External id": 3289130,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192364.411, "dur": 0.228, + "args": { + "External id": 3289131,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192365.448, "dur": 0.206, + "args": { + "External id": 3289132,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192366.570, "dur": 0.380, + "args": { + "External id": 3289133,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192367.786, "dur": 0.192, + "args": { + "External id": 3289134,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192368.864, "dur": 0.200, + "args": { + "External id": 3289135,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192369.878, "dur": 0.196, + "args": { + "External id": 3289136,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192370.991, "dur": 0.197, + "args": { + "External id": 3289137,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192371.972, "dur": 0.205, + "args": { + "External id": 3289138,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192375.146, "dur": 0.192, + "args": { + "External id": 3289139,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192376.212, "dur": 0.209, + "args": { + "External id": 3289140,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192377.446, "dur": 0.200, + "args": { + "External id": 3289141,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192378.630, "dur": 0.211, + "args": { + "External id": 3289142,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192379.702, "dur": 0.201, + "args": { + "External id": 3289143,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192380.871, "dur": 0.209, + "args": { + "External id": 3289144,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192381.900, "dur": 0.208, + "args": { + "External id": 3289145,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192382.869, "dur": 0.218, + "args": { + "External id": 3289146,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192386.374, "dur": 0.204, + "args": { + "External id": 3289147,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192387.378, "dur": 0.209, + "args": { + "External id": 3289148,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192388.688, "dur": 0.197, + "args": { + "External id": 3289149,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192389.651, "dur": 0.196, + "args": { + "External id": 3289150,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192390.716, "dur": 0.209, + "args": { + "External id": 3289151,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192391.832, "dur": 0.206, + "args": { + "External id": 3289152,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192393.156, "dur": 0.202, + "args": { + "External id": 3289153,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192394.227, "dur": 0.205, + "args": { + "External id": 3289154,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192397.205, "dur": 0.200, + "args": { + "External id": 3289155,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192398.297, "dur": 0.213, + "args": { + "External id": 3289156,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192399.294, "dur": 0.201, + "args": { + "External id": 3289157,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192404.891, "dur": 0.212, + "args": { + "External id": 3289158,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192405.928, "dur": 0.194, + "args": { + "External id": 3289159,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192406.896, "dur": 0.198, + "args": { + "External id": 3289160,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192408.123, "dur": 0.202, + "args": { + "External id": 3289161,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192409.154, "dur": 0.206, + "args": { + "External id": 3289162,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192412.214, "dur": 0.415, + "args": { + "External id": 3289163,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192413.417, "dur": 0.330, + "args": { + "External id": 3289164,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192414.615, "dur": 0.196, + "args": { + "External id": 3289165,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192415.749, "dur": 0.344, + "args": { + "External id": 3289166,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192416.862, "dur": 0.320, + "args": { + "External id": 3289167,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192417.946, "dur": 0.320, + "args": { + "External id": 3289168,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192419.042, "dur": 0.209, + "args": { + "External id": 3289169,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192420.174, "dur": 0.239, + "args": { + "External id": 3289170,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192423.491, "dur": 0.304, + "args": { + "External id": 3289171,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192424.678, "dur": 0.305, + "args": { + "External id": 3289172,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192426.070, "dur": 0.192, + "args": { + "External id": 3289173,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192427.089, "dur": 0.200, + "args": { + "External id": 3289174,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192428.094, "dur": 0.197, + "args": { + "External id": 3289175,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192429.221, "dur": 0.235, + "args": { + "External id": 3289176,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192430.235, "dur": 0.196, + "args": { + "External id": 3289177,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192431.211, "dur": 0.201, + "args": { + "External id": 3289178,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192434.607, "dur": 0.202, + "args": { + "External id": 3289179,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192435.821, "dur": 0.199, + "args": { + "External id": 3289180,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192436.970, "dur": 0.205, + "args": { + "External id": 3289181,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192438.023, "dur": 0.201, + "args": { + "External id": 3289182,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192439.169, "dur": 0.210, + "args": { + "External id": 3289183,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192440.305, "dur": 0.228, + "args": { + "External id": 3289184,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192442.311, "dur": 0.200, + "args": { + "External id": 3289185,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192443.481, "dur": 0.204, + "args": { + "External id": 3289186,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192446.798, "dur": 0.196, + "args": { + "External id": 3289187,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192447.763, "dur": 0.202, + "args": { + "External id": 3289188,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192448.912, "dur": 0.198, + "args": { + "External id": 3289189,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192449.865, "dur": 0.200, + "args": { + "External id": 3289190,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192451.032, "dur": 0.195, + "args": { + "External id": 3289191,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192451.986, "dur": 0.192, + "args": { + "External id": 3289192,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192452.940, "dur": 0.198, + "args": { + "External id": 3289193,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192453.936, "dur": 0.200, + "args": { + "External id": 3289194,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192457.285, "dur": 0.205, + "args": { + "External id": 3289195,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192458.310, "dur": 0.198, + "args": { + "External id": 3289196,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192459.458, "dur": 0.199, + "args": { + "External id": 3289197,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192460.418, "dur": 0.211, + "args": { + "External id": 3289198,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192461.412, "dur": 0.203, + "args": { + "External id": 3289199,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192462.371, "dur": 0.199, + "args": { + "External id": 3289200,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192464.534, "dur": 0.221, + "args": { + "External id": 3289201,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192465.559, "dur": 0.198, + "args": { + "External id": 3289202,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192468.945, "dur": 0.343, + "args": { + "External id": 3289203,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192470.090, "dur": 0.338, + "args": { + "External id": 3289204,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192471.366, "dur": 0.203, + "args": { + "External id": 3289205,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192472.350, "dur": 0.372, + "args": { + "External id": 3289206,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192473.512, "dur": 0.417, + "args": { + "External id": 3289207,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192474.860, "dur": 0.339, + "args": { + "External id": 3289208,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192476.017, "dur": 0.200, + "args": { + "External id": 3289209,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192477.225, "dur": 0.337, + "args": { + "External id": 3289210,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192480.626, "dur": 0.274, + "args": { + "External id": 3289211,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192481.818, "dur": 0.199, + "args": { + "External id": 3289212,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192482.929, "dur": 0.207, + "args": { + "External id": 3289213,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192483.897, "dur": 0.202, + "args": { + "External id": 3289214,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192485.064, "dur": 0.209, + "args": { + "External id": 3289215,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192486.222, "dur": 0.213, + "args": { + "External id": 3289216,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192487.699, "dur": 0.199, + "args": { + "External id": 3289217,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192488.884, "dur": 0.202, + "args": { + "External id": 3289218,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192491.994, "dur": 0.232, + "args": { + "External id": 3289219,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192492.999, "dur": 0.204, + "args": { + "External id": 3289220,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192494.235, "dur": 0.200, + "args": { + "External id": 3289221,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192495.233, "dur": 0.195, + "args": { + "External id": 3289222,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192496.417, "dur": 0.197, + "args": { + "External id": 3289223,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192497.531, "dur": 0.198, + "args": { + "External id": 3289224,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192498.513, "dur": 0.201, + "args": { + "External id": 3289225,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192499.476, "dur": 0.203, + "args": { + "External id": 3289226,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192502.389, "dur": 0.190, + "args": { + "External id": 3289227,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192503.390, "dur": 0.204, + "args": { + "External id": 3289228,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192504.367, "dur": 0.203, + "args": { + "External id": 3289229,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192505.334, "dur": 0.202, + "args": { + "External id": 3289230,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192506.336, "dur": 0.218, + "args": { + "External id": 3289231,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192507.344, "dur": 0.198, + "args": { + "External id": 3289232,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192508.311, "dur": 0.196, + "args": { + "External id": 3289233,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192509.279, "dur": 0.199, + "args": { + "External id": 3289234,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192512.359, "dur": 0.195, + "args": { + "External id": 3289235,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192513.317, "dur": 0.363, + "args": { + "External id": 3289236,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192515.016, "dur": 0.198, + "args": { + "External id": 3289237,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192516.042, "dur": 0.202, + "args": { + "External id": 3289238,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192517.057, "dur": 0.194, + "args": { + "External id": 3289239,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192518.020, "dur": 0.326, + "args": { + "External id": 3289240,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192519.194, "dur": 0.198, + "args": { + "External id": 3289241,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192520.384, "dur": 0.203, + "args": { + "External id": 3289242,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192523.434, "dur": 0.311, + "args": { + "External id": 3289243,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192524.528, "dur": 0.313, + "args": { + "External id": 3289244,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192525.809, "dur": 0.202, + "args": { + "External id": 3289245,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192526.815, "dur": 0.203, + "args": { + "External id": 3289246,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192527.796, "dur": 0.198, + "args": { + "External id": 3289247,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192529.107, "dur": 0.200, + "args": { + "External id": 3289248,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192530.092, "dur": 0.195, + "args": { + "External id": 3289249,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192531.126, "dur": 0.202, + "args": { + "External id": 3289250,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192534.250, "dur": 0.207, + "args": { + "External id": 3289251,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192535.232, "dur": 0.198, + "args": { + "External id": 3289252,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192536.373, "dur": 0.199, + "args": { + "External id": 3289253,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192537.344, "dur": 0.199, + "args": { + "External id": 3289254,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192538.375, "dur": 0.197, + "args": { + "External id": 3289255,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192539.352, "dur": 0.196, + "args": { + "External id": 3289256,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192540.718, "dur": 0.195, + "args": { + "External id": 3289257,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192541.882, "dur": 0.195, + "args": { + "External id": 3289258,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192545.016, "dur": 0.199, + "args": { + "External id": 3289259,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192545.974, "dur": 0.200, + "args": { + "External id": 3289260,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192547.068, "dur": 0.191, + "args": { + "External id": 3289261,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192548.035, "dur": 0.197, + "args": { + "External id": 3289262,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192549.110, "dur": 0.218, + "args": { + "External id": 3289263,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192550.083, "dur": 0.195, + "args": { + "External id": 3289264,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192551.045, "dur": 0.193, + "args": { + "External id": 3289265,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192552.147, "dur": 0.195, + "args": { + "External id": 3289266,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192555.096, "dur": 0.198, + "args": { + "External id": 3289267,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192556.134, "dur": 0.198, + "args": { + "External id": 3289268,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192557.199, "dur": 0.195, + "args": { + "External id": 3289269,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192558.322, "dur": 0.199, + "args": { + "External id": 3289270,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192559.381, "dur": 0.193, + "args": { + "External id": 3289271,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192561.383, "dur": 0.307, + "args": { + "External id": 3289272,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192562.502, "dur": 0.199, + "args": { + "External id": 3289273,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192563.470, "dur": 0.196, + "args": { + "External id": 3289274,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192566.648, "dur": 0.200, + "args": { + "External id": 3289275,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192567.606, "dur": 0.197, + "args": { + "External id": 3289276,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192568.578, "dur": 0.205, + "args": { + "External id": 3289277,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192569.810, "dur": 0.255, + "args": { + "External id": 3289278,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192570.836, "dur": 0.193, + "args": { + "External id": 3289279,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192571.804, "dur": 0.317, + "args": { + "External id": 3289280,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192573.186, "dur": 0.191, + "args": { + "External id": 3289281,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192574.202, "dur": 0.201, + "args": { + "External id": 3289282,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192577.372, "dur": 0.203, + "args": { + "External id": 3289283,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192578.353, "dur": 0.198, + "args": { + "External id": 3289284,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192579.352, "dur": 0.191, + "args": { + "External id": 3289285,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192580.313, "dur": 0.195, + "args": { + "External id": 3289286,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192581.286, "dur": 0.193, + "args": { + "External id": 3289287,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192582.238, "dur": 0.207, + "args": { + "External id": 3289288,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192583.258, "dur": 0.189, + "args": { + "External id": 3289289,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192584.210, "dur": 0.196, + "args": { + "External id": 3289290,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192587.253, "dur": 0.223, + "args": { + "External id": 3289291,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192588.257, "dur": 0.198, + "args": { + "External id": 3289292,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192589.407, "dur": 0.200, + "args": { + "External id": 3289293,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192590.452, "dur": 0.198, + "args": { + "External id": 3289294,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192591.597, "dur": 0.199, + "args": { + "External id": 3289295,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192592.569, "dur": 0.203, + "args": { + "External id": 3289296,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192593.539, "dur": 0.202, + "args": { + "External id": 3289297,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192594.677, "dur": 0.241, + "args": { + "External id": 3289298,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192598.098, "dur": 0.202, + "args": { + "External id": 3289299,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192599.091, "dur": 0.314, + "args": { + "External id": 3289300,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192600.176, "dur": 0.195, + "args": { + "External id": 3289301,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192601.134, "dur": 0.197, + "args": { + "External id": 3289302,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192602.267, "dur": 0.226, + "args": { + "External id": 3289303,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192603.433, "dur": 0.197, + "args": { + "External id": 3289304,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192604.571, "dur": 0.194, + "args": { + "External id": 3289305,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192605.517, "dur": 0.196, + "args": { + "External id": 3289306,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192608.699, "dur": 0.197, + "args": { + "External id": 3289307,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192609.733, "dur": 0.196, + "args": { + "External id": 3289308,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192611.133, "dur": 0.197, + "args": { + "External id": 3289309,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192612.217, "dur": 0.363, + "args": { + "External id": 3289310,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192613.409, "dur": 0.312, + "args": { + "External id": 3289311,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192614.611, "dur": 0.206, + "args": { + "External id": 3289312,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192615.676, "dur": 0.237, + "args": { + "External id": 3289313,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192616.869, "dur": 0.360, + "args": { + "External id": 3289314,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192620.285, "dur": 0.206, + "args": { + "External id": 3289315,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192621.293, "dur": 0.226, + "args": { + "External id": 3289316,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192622.291, "dur": 0.198, + "args": { + "External id": 3289317,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192623.258, "dur": 0.208, + "args": { + "External id": 3289318,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192624.401, "dur": 0.235, + "args": { + "External id": 3289319,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192625.397, "dur": 0.197, + "args": { + "External id": 3289320,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192626.362, "dur": 0.200, + "args": { + "External id": 3289321,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192627.496, "dur": 0.211, + "args": { + "External id": 3289322,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192630.595, "dur": 0.196, + "args": { + "External id": 3289323,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192631.549, "dur": 0.204, + "args": { + "External id": 3289324,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192632.659, "dur": 0.198, + "args": { + "External id": 3289325,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192633.616, "dur": 0.203, + "args": { + "External id": 3289326,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192634.592, "dur": 0.197, + "args": { + "External id": 3289327,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192635.542, "dur": 0.206, + "args": { + "External id": 3289328,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192636.635, "dur": 0.219, + "args": { + "External id": 3289329,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192637.625, "dur": 0.199, + "args": { + "External id": 3289330,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192640.493, "dur": 0.198, + "args": { + "External id": 3289331,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192641.448, "dur": 0.202, + "args": { + "External id": 3289332,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192642.432, "dur": 0.195, + "args": { + "External id": 3289333,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192643.397, "dur": 0.199, + "args": { + "External id": 3289334,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192644.369, "dur": 0.196, + "args": { + "External id": 3289335,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192645.320, "dur": 0.201, + "args": { + "External id": 3289336,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192646.351, "dur": 0.196, + "args": { + "External id": 3289337,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192647.320, "dur": 0.220, + "args": { + "External id": 3289338,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192650.335, "dur": 0.198, + "args": { + "External id": 3289339,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192651.293, "dur": 0.206, + "args": { + "External id": 3289340,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192652.652, "dur": 0.205, + "args": { + "External id": 3289341,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192653.651, "dur": 0.202, + "args": { + "External id": 3289342,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192654.631, "dur": 0.197, + "args": { + "External id": 3289343,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192655.606, "dur": 0.200, + "args": { + "External id": 3289344,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192656.579, "dur": 0.198, + "args": { + "External id": 3289345,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192657.547, "dur": 0.200, + "args": { + "External id": 3289346,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192660.772, "dur": 0.198, + "args": { + "External id": 3289347,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192661.772, "dur": 0.200, + "args": { + "External id": 3289348,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192662.744, "dur": 0.201, + "args": { + "External id": 3289349,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192663.729, "dur": 0.201, + "args": { + "External id": 3289350,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192664.707, "dur": 0.193, + "args": { + "External id": 3289351,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192665.800, "dur": 0.200, + "args": { + "External id": 3289352,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192666.871, "dur": 0.331, + "args": { + "External id": 3289353,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192668.084, "dur": 0.201, + "args": { + "External id": 3289354,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192671.195, "dur": 0.199, + "args": { + "External id": 3289355,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192672.158, "dur": 0.204, + "args": { + "External id": 3289356,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192673.137, "dur": 0.198, + "args": { + "External id": 3289357,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192674.106, "dur": 0.199, + "args": { + "External id": 3289358,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192675.131, "dur": 0.198, + "args": { + "External id": 3289359,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192676.088, "dur": 0.200, + "args": { + "External id": 3289360,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192677.059, "dur": 0.201, + "args": { + "External id": 3289361,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192678.012, "dur": 0.200, + "args": { + "External id": 3289362,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192681.110, "dur": 0.194, + "args": { + "External id": 3289363,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192682.068, "dur": 0.202, + "args": { + "External id": 3289364,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192683.043, "dur": 0.200, + "args": { + "External id": 3289365,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192684.000, "dur": 0.202, + "args": { + "External id": 3289366,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192684.975, "dur": 0.196, + "args": { + "External id": 3289367,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192685.955, "dur": 0.200, + "args": { + "External id": 3289368,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192690.528, "dur": 0.238, + "args": { + "External id": 3289369,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192691.728, "dur": 0.221, + "args": { + "External id": 3289370,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192694.766, "dur": 0.243, + "args": { + "External id": 3289371,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192695.833, "dur": 0.200, + "args": { + "External id": 3289372,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192696.815, "dur": 0.197, + "args": { + "External id": 3289373,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192697.780, "dur": 0.199, + "args": { + "External id": 3289374,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192698.918, "dur": 0.210, + "args": { + "External id": 3289375,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192699.996, "dur": 0.199, + "args": { + "External id": 3289376,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192701.515, "dur": 0.333, + "args": { + "External id": 3289377,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192702.963, "dur": 0.237, + "args": { + "External id": 3289378,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192706.350, "dur": 0.205, + "args": { + "External id": 3289379,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192707.341, "dur": 0.211, + "args": { + "External id": 3289380,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192708.468, "dur": 0.206, + "args": { + "External id": 3289381,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192709.444, "dur": 0.200, + "args": { + "External id": 3289382,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192710.554, "dur": 0.398, + "args": { + "External id": 3289383,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192711.723, "dur": 0.323, + "args": { + "External id": 3289384,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192714.407, "dur": 0.396, + "args": { + "External id": 3289385,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192715.745, "dur": 0.331, + "args": { + "External id": 3289386,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192719.241, "dur": 0.216, + "args": { + "External id": 3289387,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192720.346, "dur": 0.222, + "args": { + "External id": 3289388,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192721.347, "dur": 0.203, + "args": { + "External id": 3289389,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192722.366, "dur": 0.201, + "args": { + "External id": 3289390,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192723.351, "dur": 0.195, + "args": { + "External id": 3289391,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192724.313, "dur": 0.199, + "args": { + "External id": 3289392,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192725.291, "dur": 0.195, + "args": { + "External id": 3289393,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192726.356, "dur": 0.217, + "args": { + "External id": 3289394,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192729.310, "dur": 0.200, + "args": { + "External id": 3289395,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192730.350, "dur": 0.203, + "args": { + "External id": 3289396,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192731.321, "dur": 0.198, + "args": { + "External id": 3289397,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597192732.281, "dur": 0.200, + "args": { + "External id": 3289398,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 10701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 1336756, "tid": 1336756, + "ts": 1587597192787.009, "dur": 1706.674, + "args": { + "External id": 3289399,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 1336756, "tid": 1336756, + "ts": 1587597193282.180, "dur": 1134.624, + "args": { + "External id": 3289400,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193290.407, "dur": 6.979, + "args": { + "External id": 3289401,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193294.009, "dur": 2.800, + "args": { + "External id": 3289402,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193297.972, "dur": 3.788, + "args": { + "External id": 3289403,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193299.037, "dur": 2.591, + "args": { + "External id": 3289404,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193302.292, "dur": 3.171, + "args": { + "External id": 3289405,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193304.727, "dur": 0.671, + "args": { + "External id": 3289406,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193306.178, "dur": 1.355, + "args": { + "External id": 3289407,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193306.629, "dur": 0.840, + "args": { + "External id": 3289408,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193309.886, "dur": 5.178, + "args": { + "External id": 3289409,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193314.464, "dur": 0.530, + "args": { + "External id": 3289410,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193315.471, "dur": 1.519, + "args": { + "External id": 3289411,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193316.152, "dur": 0.766, + "args": { + "External id": 3289412,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193317.385, "dur": 1.664, + "args": { + "External id": 3289413,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193318.034, "dur": 0.945, + "args": { + "External id": 3289414,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193319.331, "dur": 3.736, + "args": { + "External id": 3289415,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193322.270, "dur": 0.730, + "args": { + "External id": 3289416,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193323.413, "dur": 1.795, + "args": { + "External id": 3289417,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193323.973, "dur": 0.937, + "args": { + "External id": 3289418,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193325.430, "dur": 3.264, + "args": { + "External id": 3289419,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193325.963, "dur": 2.660, + "args": { + "External id": 3289420,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193328.943, "dur": 3.726, + "args": { + "External id": 3289421,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193331.415, "dur": 1.192, + "args": { + "External id": 3289422,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193332.894, "dur": 1.401, + "args": { + "External id": 3289423,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193333.249, "dur": 0.985, + "args": { + "External id": 3289424,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193336.731, "dur": 4.936, + "args": { + "External id": 3289425,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193341.035, "dur": 0.562, + "args": { + "External id": 3289426,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193342.191, "dur": 1.354, + "args": { + "External id": 3289427,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193342.869, "dur": 0.611, + "args": { + "External id": 3289428,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193343.917, "dur": 1.470, + "args": { + "External id": 3289429,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193344.547, "dur": 0.771, + "args": { + "External id": 3289430,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193345.874, "dur": 3.540, + "args": { + "External id": 3289431,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193348.553, "dur": 0.795, + "args": { + "External id": 3289432,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193349.638, "dur": 1.300, + "args": { + "External id": 3289433,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193350.181, "dur": 0.683, + "args": { + "External id": 3289434,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193351.195, "dur": 3.175, + "args": { + "External id": 3289435,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193351.597, "dur": 2.482, + "args": { + "External id": 3289436,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193354.715, "dur": 3.092, + "args": { + "External id": 3289437,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193357.169, "dur": 0.568, + "args": { + "External id": 3289438,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193358.155, "dur": 1.731, + "args": { + "External id": 3289439,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193358.812, "dur": 1.008, + "args": { + "External id": 3289440,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193362.125, "dur": 4.955, + "args": { + "External id": 3289441,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193366.386, "dur": 0.627, + "args": { + "External id": 3289442,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193367.322, "dur": 1.497, + "args": { + "External id": 3289443,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193368.095, "dur": 0.657, + "args": { + "External id": 3289444,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193369.190, "dur": 1.631, + "args": { + "External id": 3289445,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193370.049, "dur": 0.712, + "args": { + "External id": 3289446,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193371.051, "dur": 3.674, + "args": { + "External id": 3289447,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193373.742, "dur": 0.722, + "args": { + "External id": 3289448,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193375.086, "dur": 1.160, + "args": { + "External id": 3289449,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193375.562, "dur": 0.622, + "args": { + "External id": 3289450,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193376.468, "dur": 2.810, + "args": { + "External id": 3289451,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193376.848, "dur": 2.350, + "args": { + "External id": 3289452,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193379.506, "dur": 2.661, + "args": { + "External id": 3289453,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193381.404, "dur": 0.700, + "args": { + "External id": 3289454,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193382.387, "dur": 1.257, + "args": { + "External id": 3289455,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193382.746, "dur": 0.833, + "args": { + "External id": 3289456,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193386.055, "dur": 4.660, + "args": { + "External id": 3289457,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193390.104, "dur": 0.546, + "args": { + "External id": 3289458,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193391.080, "dur": 1.502, + "args": { + "External id": 3289459,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193391.837, "dur": 0.562, + "args": { + "External id": 3289460,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193392.967, "dur": 1.202, + "args": { + "External id": 3289461,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193393.511, "dur": 0.595, + "args": { + "External id": 3289462,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193394.530, "dur": 3.657, + "args": { + "External id": 3289463,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193397.340, "dur": 0.595, + "args": { + "External id": 3289464,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193398.572, "dur": 1.278, + "args": { + "External id": 3289465,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193399.138, "dur": 0.650, + "args": { + "External id": 3289466,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193400.072, "dur": 2.670, + "args": { + "External id": 3289467,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193400.580, "dur": 2.097, + "args": { + "External id": 3289468,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193402.970, "dur": 2.485, + "args": { + "External id": 3289469,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193404.527, "dur": 0.656, + "args": { + "External id": 3289470,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193405.671, "dur": 1.395, + "args": { + "External id": 3289471,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193406.037, "dur": 0.762, + "args": { + "External id": 3289472,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193409.582, "dur": 4.693, + "args": { + "External id": 3289473,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193413.739, "dur": 0.467, + "args": { + "External id": 3289474,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193414.516, "dur": 1.180, + "args": { + "External id": 3289475,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193414.901, "dur": 0.733, + "args": { + "External id": 3289476,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193415.941, "dur": 1.425, + "args": { + "External id": 3289477,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193416.374, "dur": 0.752, + "args": { + "External id": 3289478,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193417.590, "dur": 4.846, + "args": { + "External id": 3289479,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193421.550, "dur": 0.645, + "args": { + "External id": 3289480,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193422.774, "dur": 1.416, + "args": { + "External id": 3289481,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193423.289, "dur": 0.648, + "args": { + "External id": 3289482,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193424.413, "dur": 3.168, + "args": { + "External id": 3289483,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193424.934, "dur": 2.579, + "args": { + "External id": 3289484,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193427.942, "dur": 2.720, + "args": { + "External id": 3289485,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193429.976, "dur": 0.617, + "args": { + "External id": 3289486,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193431.093, "dur": 1.568, + "args": { + "External id": 3289487,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193431.467, "dur": 0.959, + "args": { + "External id": 3289488,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193434.919, "dur": 4.964, + "args": { + "External id": 3289489,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193439.237, "dur": 0.583, + "args": { + "External id": 3289490,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193440.119, "dur": 1.435, + "args": { + "External id": 3289491,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193440.519, "dur": 0.784, + "args": { + "External id": 3289492,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193441.772, "dur": 1.709, + "args": { + "External id": 3289493,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193442.288, "dur": 0.934, + "args": { + "External id": 3289494,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193443.710, "dur": 3.691, + "args": { + "External id": 3289495,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193446.688, "dur": 0.650, + "args": { + "External id": 3289496,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193447.625, "dur": 1.352, + "args": { + "External id": 3289497,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193448.150, "dur": 0.761, + "args": { + "External id": 3289498,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193449.201, "dur": 3.637, + "args": { + "External id": 3289499,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193450.134, "dur": 2.552, + "args": { + "External id": 3289500,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193453.203, "dur": 2.101, + "args": { + "External id": 3289501,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193454.380, "dur": 0.652, + "args": { + "External id": 3289502,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193455.673, "dur": 2.088, + "args": { + "External id": 3289503,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193456.947, "dur": 0.745, + "args": { + "External id": 3289504,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193460.427, "dur": 4.210, + "args": { + "External id": 3289505,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193463.934, "dur": 0.635, + "args": { + "External id": 3289506,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193464.901, "dur": 2.490, + "args": { + "External id": 3289507,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193466.391, "dur": 0.938, + "args": { + "External id": 3289508,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193467.778, "dur": 2.107, + "args": { + "External id": 3289509,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193468.789, "dur": 0.835, + "args": { + "External id": 3289510,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193470.260, "dur": 4.157, + "args": { + "External id": 3289511,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193473.522, "dur": 0.830, + "args": { + "External id": 3289512,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193474.678, "dur": 1.769, + "args": { + "External id": 3289513,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193475.653, "dur": 0.729, + "args": { + "External id": 3289514,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193476.673, "dur": 3.801, + "args": { + "External id": 3289515,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193478.061, "dur": 2.338, + "args": { + "External id": 3289516,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193480.725, "dur": 1.683, + "args": { + "External id": 3289517,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193481.565, "dur": 0.597, + "args": { + "External id": 3289518,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193482.637, "dur": 2.262, + "args": { + "External id": 3289519,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193483.872, "dur": 0.962, + "args": { + "External id": 3289520,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193487.073, "dur": 3.645, + "args": { + "External id": 3289521,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193490.007, "dur": 0.640, + "args": { + "External id": 3289522,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193491.087, "dur": 1.920, + "args": { + "External id": 3289523,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193492.333, "dur": 0.609, + "args": { + "External id": 3289524,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193493.382, "dur": 1.743, + "args": { + "External id": 3289525,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193494.476, "dur": 0.585, + "args": { + "External id": 3289526,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193495.452, "dur": 4.068, + "args": { + "External id": 3289527,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193498.767, "dur": 0.688, + "args": { + "External id": 3289528,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193499.766, "dur": 1.614, + "args": { + "External id": 3289529,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193500.592, "dur": 0.724, + "args": { + "External id": 3289530,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193501.629, "dur": 4.075, + "args": { + "External id": 3289531,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193502.982, "dur": 2.449, + "args": { + "External id": 3289532,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193505.955, "dur": 1.772, + "args": { + "External id": 3289533,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193506.807, "dur": 0.575, + "args": { + "External id": 3289534,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193507.980, "dur": 1.887, + "args": { + "External id": 3289535,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193509.062, "dur": 0.739, + "args": { + "External id": 3289536,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193512.156, "dur": 3.603, + "args": { + "External id": 3289537,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193515.118, "dur": 0.579, + "args": { + "External id": 3289538,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193516.145, "dur": 1.603, + "args": { + "External id": 3289539,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193517.078, "dur": 0.606, + "args": { + "External id": 3289540,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193518.163, "dur": 1.695, + "args": { + "External id": 3289541,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193519.033, "dur": 0.762, + "args": { + "External id": 3289542,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193520.377, "dur": 4.401, + "args": { + "External id": 3289543,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193523.823, "dur": 0.890, + "args": { + "External id": 3289544,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193525.156, "dur": 1.726, + "args": { + "External id": 3289545,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193526.217, "dur": 0.600, + "args": { + "External id": 3289546,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193527.136, "dur": 3.421, + "args": { + "External id": 3289547,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193528.337, "dur": 2.152, + "args": { + "External id": 3289548,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193530.817, "dur": 2.033, + "args": { + "External id": 3289549,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193531.977, "dur": 0.712, + "args": { + "External id": 3289550,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193533.163, "dur": 2.232, + "args": { + "External id": 3289551,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193534.511, "dur": 0.820, + "args": { + "External id": 3289552,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193537.710, "dur": 3.313, + "args": { + "External id": 3289553,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193540.357, "dur": 0.601, + "args": { + "External id": 3289554,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193541.285, "dur": 1.986, + "args": { + "External id": 3289555,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193542.506, "dur": 0.700, + "args": { + "External id": 3289556,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193543.607, "dur": 2.081, + "args": { + "External id": 3289557,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193544.612, "dur": 1.014, + "args": { + "External id": 3289558,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193545.913, "dur": 4.356, + "args": { + "External id": 3289559,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193549.524, "dur": 0.682, + "args": { + "External id": 3289560,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193550.517, "dur": 2.003, + "args": { + "External id": 3289561,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193551.562, "dur": 0.720, + "args": { + "External id": 3289562,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193552.870, "dur": 3.792, + "args": { + "External id": 3289563,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193554.210, "dur": 2.196, + "args": { + "External id": 3289564,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193557.008, "dur": 1.609, + "args": { + "External id": 3289565,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193557.920, "dur": 0.635, + "args": { + "External id": 3289566,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193558.946, "dur": 1.998, + "args": { + "External id": 3289567,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193560.011, "dur": 0.870, + "args": { + "External id": 3289568,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193563.087, "dur": 3.472, + "args": { + "External id": 3289569,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193566.022, "dur": 0.472, + "args": { + "External id": 3289570,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193566.800, "dur": 2.125, + "args": { + "External id": 3289571,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193568.204, "dur": 0.649, + "args": { + "External id": 3289572,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193569.163, "dur": 1.888, + "args": { + "External id": 3289573,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193570.103, "dur": 0.883, + "args": { + "External id": 3289574,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193571.406, "dur": 4.256, + "args": { + "External id": 3289575,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193574.537, "dur": 0.962, + "args": { + "External id": 3289576,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193576.037, "dur": 1.517, + "args": { + "External id": 3289577,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193576.899, "dur": 0.591, + "args": { + "External id": 3289578,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193577.919, "dur": 3.953, + "args": { + "External id": 3289579,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193579.087, "dur": 2.625, + "args": { + "External id": 3289580,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193582.119, "dur": 1.670, + "args": { + "External id": 3289581,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193582.909, "dur": 0.619, + "args": { + "External id": 3289582,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193584.005, "dur": 2.239, + "args": { + "External id": 3289583,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193585.235, "dur": 0.742, + "args": { + "External id": 3289584,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193588.425, "dur": 3.566, + "args": { + "External id": 3289585,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193591.457, "dur": 0.468, + "args": { + "External id": 3289586,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193592.292, "dur": 2.169, + "args": { + "External id": 3289587,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193593.768, "dur": 0.627, + "args": { + "External id": 3289588,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193594.697, "dur": 2.007, + "args": { + "External id": 3289589,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193595.926, "dur": 0.714, + "args": { + "External id": 3289590,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193596.936, "dur": 4.105, + "args": { + "External id": 3289591,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193600.202, "dur": 0.675, + "args": { + "External id": 3289592,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193601.315, "dur": 1.458, + "args": { + "External id": 3289593,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193602.088, "dur": 0.618, + "args": { + "External id": 3289594,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193603.021, "dur": 3.873, + "args": { + "External id": 3289595,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193604.431, "dur": 2.396, + "args": { + "External id": 3289596,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193607.169, "dur": 1.651, + "args": { + "External id": 3289597,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193607.903, "dur": 0.600, + "args": { + "External id": 3289598,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193609.051, "dur": 2.121, + "args": { + "External id": 3289599,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193610.396, "dur": 0.710, + "args": { + "External id": 3289600,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193636.427, "dur": 3.640, + "args": { + "External id": 3295745,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193639.540, "dur": 0.464, + "args": { + "External id": 3295746,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193640.347, "dur": 1.933, + "args": { + "External id": 3295747,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193641.593, "dur": 0.424, + "args": { + "External id": 3295748,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193642.560, "dur": 1.427, + "args": { + "External id": 3295749,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193643.239, "dur": 0.687, + "args": { + "External id": 3295750,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193644.210, "dur": 3.595, + "args": { + "External id": 3295751,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193647.146, "dur": 0.594, + "args": { + "External id": 3295752,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193648.043, "dur": 1.222, + "args": { + "External id": 3295753,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193648.686, "dur": 0.409, + "args": { + "External id": 3295754,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193649.479, "dur": 3.133, + "args": { + "External id": 3295755,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193650.331, "dur": 2.208, + "args": { + "External id": 3295756,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193652.854, "dur": 1.263, + "args": { + "External id": 3295757,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193653.510, "dur": 0.540, + "args": { + "External id": 3295758,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193654.379, "dur": 1.620, + "args": { + "External id": 3295759,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193655.223, "dur": 0.712, + "args": { + "External id": 3295760,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193658.366, "dur": 3.227, + "args": { + "External id": 3295761,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193661.040, "dur": 0.485, + "args": { + "External id": 3295762,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193661.888, "dur": 1.923, + "args": { + "External id": 3295763,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193662.950, "dur": 0.799, + "args": { + "External id": 3295764,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193664.050, "dur": 1.370, + "args": { + "External id": 3295765,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193664.692, "dur": 0.663, + "args": { + "External id": 3295766,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193665.647, "dur": 3.579, + "args": { + "External id": 3295767,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193668.524, "dur": 0.640, + "args": { + "External id": 3295768,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193669.500, "dur": 1.567, + "args": { + "External id": 3295769,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193670.226, "dur": 0.779, + "args": { + "External id": 3295770,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193671.320, "dur": 3.351, + "args": { + "External id": 3295771,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193672.481, "dur": 2.121, + "args": { + "External id": 3295772,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193674.912, "dur": 1.326, + "args": { + "External id": 3295773,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193675.556, "dur": 0.619, + "args": { + "External id": 3295774,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193676.466, "dur": 1.824, + "args": { + "External id": 3295775,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193677.540, "dur": 0.685, + "args": { + "External id": 3295776,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193680.462, "dur": 3.843, + "args": { + "External id": 3295777,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193683.602, "dur": 0.640, + "args": { + "External id": 3295778,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193684.548, "dur": 1.994, + "args": { + "External id": 3295779,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193685.862, "dur": 0.615, + "args": { + "External id": 3295780,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193686.782, "dur": 1.980, + "args": { + "External id": 3295781,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193688.051, "dur": 0.646, + "args": { + "External id": 3295782,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193689.012, "dur": 3.890, + "args": { + "External id": 3295783,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193692.280, "dur": 0.553, + "args": { + "External id": 3295784,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193693.142, "dur": 1.538, + "args": { + "External id": 3295785,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193694.001, "dur": 0.613, + "args": { + "External id": 3295786,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193694.899, "dur": 3.397, + "args": { + "External id": 3295787,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193696.024, "dur": 2.004, + "args": { + "External id": 3295788,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193698.538, "dur": 1.516, + "args": { + "External id": 3295789,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193699.387, "dur": 0.605, + "args": { + "External id": 3295790,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193700.275, "dur": 2.044, + "args": { + "External id": 3295791,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193701.614, "dur": 0.640, + "args": { + "External id": 3295792,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193704.805, "dur": 3.755, + "args": { + "External id": 3295793,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193707.923, "dur": 0.569, + "args": { + "External id": 3295794,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193708.808, "dur": 1.815, + "args": { + "External id": 3295795,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193709.922, "dur": 0.639, + "args": { + "External id": 3295796,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193710.862, "dur": 1.147, + "args": { + "External id": 3295797,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193711.491, "dur": 0.453, + "args": { + "External id": 3295798,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193712.302, "dur": 3.652, + "args": { + "External id": 3295799,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193715.338, "dur": 0.551, + "args": { + "External id": 3295800,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193716.199, "dur": 1.554, + "args": { + "External id": 3295801,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193717.073, "dur": 0.612, + "args": { + "External id": 3295802,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193717.975, "dur": 3.328, + "args": { + "External id": 3295803,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193719.105, "dur": 2.126, + "args": { + "External id": 3295804,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193721.542, "dur": 1.481, + "args": { + "External id": 3295805,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193722.347, "dur": 0.605, + "args": { + "External id": 3295806,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193723.346, "dur": 1.773, + "args": { + "External id": 3295807,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193724.553, "dur": 0.500, + "args": { + "External id": 3295808,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193727.542, "dur": 3.317, + "args": { + "External id": 3295809,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193730.235, "dur": 0.558, + "args": { + "External id": 3295810,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193731.127, "dur": 1.948, + "args": { + "External id": 3295811,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193732.317, "dur": 0.694, + "args": { + "External id": 3295812,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193733.497, "dur": 1.449, + "args": { + "External id": 3295813,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193734.173, "dur": 0.710, + "args": { + "External id": 3295814,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193735.374, "dur": 3.803, + "args": { + "External id": 3295815,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193738.491, "dur": 0.622, + "args": { + "External id": 3295816,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193739.417, "dur": 1.111, + "args": { + "External id": 3295817,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193740.066, "dur": 0.397, + "args": { + "External id": 3295818,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193740.755, "dur": 3.662, + "args": { + "External id": 3295819,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193742.159, "dur": 2.191, + "args": { + "External id": 3295820,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193744.688, "dur": 1.491, + "args": { + "External id": 3295821,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193745.341, "dur": 0.769, + "args": { + "External id": 3295822,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193746.673, "dur": 2.134, + "args": { + "External id": 3295823,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193748.059, "dur": 0.684, + "args": { + "External id": 3295824,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193751.144, "dur": 3.452, + "args": { + "External id": 3295825,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193753.911, "dur": 0.618, + "args": { + "External id": 3295826,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193754.858, "dur": 1.992, + "args": { + "External id": 3295827,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193756.130, "dur": 0.653, + "args": { + "External id": 3295828,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193757.089, "dur": 1.627, + "args": { + "External id": 3295829,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193757.960, "dur": 0.694, + "args": { + "External id": 3295830,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193758.946, "dur": 3.857, + "args": { + "External id": 3295831,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193762.093, "dur": 0.641, + "args": { + "External id": 3295832,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193763.048, "dur": 1.364, + "args": { + "External id": 3295833,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193763.735, "dur": 0.609, + "args": { + "External id": 3295834,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193764.842, "dur": 3.464, + "args": { + "External id": 3295835,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193766.001, "dur": 2.236, + "args": { + "External id": 3295836,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193768.555, "dur": 1.788, + "args": { + "External id": 3295837,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193769.437, "dur": 0.842, + "args": { + "External id": 3295838,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193770.808, "dur": 1.910, + "args": { + "External id": 3295839,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 10998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193771.914, "dur": 0.740, + "args": { + "External id": 3295840,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 10999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193774.919, "dur": 3.142, + "args": { + "External id": 3295841,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193777.416, "dur": 0.578, + "args": { + "External id": 3295842,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193778.336, "dur": 1.932, + "args": { + "External id": 3295843,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193779.586, "dur": 0.611, + "args": { + "External id": 3295844,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193780.550, "dur": 1.628, + "args": { + "External id": 3295845,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193781.360, "dur": 0.757, + "args": { + "External id": 3295846,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193782.397, "dur": 4.004, + "args": { + "External id": 3295847,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193785.548, "dur": 0.781, + "args": { + "External id": 3295848,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193786.687, "dur": 1.339, + "args": { + "External id": 3295849,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193787.389, "dur": 0.576, + "args": { + "External id": 3295850,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193788.248, "dur": 3.433, + "args": { + "External id": 3295851,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193789.525, "dur": 2.087, + "args": { + "External id": 3295852,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193791.924, "dur": 1.406, + "args": { + "External id": 3295853,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193792.732, "dur": 0.532, + "args": { + "External id": 3295854,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193793.557, "dur": 1.954, + "args": { + "External id": 3295855,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193794.650, "dur": 0.798, + "args": { + "External id": 3295856,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193797.837, "dur": 3.301, + "args": { + "External id": 3295857,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193800.483, "dur": 0.590, + "args": { + "External id": 3295858,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193801.378, "dur": 1.907, + "args": { + "External id": 3295859,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193802.601, "dur": 0.620, + "args": { + "External id": 3295860,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193803.527, "dur": 1.561, + "args": { + "External id": 3295861,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193804.361, "dur": 0.664, + "args": { + "External id": 3295862,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193805.306, "dur": 4.264, + "args": { + "External id": 3295863,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193808.883, "dur": 0.623, + "args": { + "External id": 3295864,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193809.981, "dur": 1.476, + "args": { + "External id": 3295865,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193810.657, "dur": 0.735, + "args": { + "External id": 3295866,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193811.675, "dur": 3.512, + "args": { + "External id": 3295867,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193812.710, "dur": 2.242, + "args": { + "External id": 3295868,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193815.452, "dur": 1.293, + "args": { + "External id": 3295869,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193816.198, "dur": 0.485, + "args": { + "External id": 3295870,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193816.962, "dur": 1.764, + "args": { + "External id": 3295871,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193817.990, "dur": 0.668, + "args": { + "External id": 3295872,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193821.140, "dur": 3.734, + "args": { + "External id": 3295873,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193824.212, "dur": 0.594, + "args": { + "External id": 3295874,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193825.110, "dur": 2.067, + "args": { + "External id": 3295875,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193826.327, "dur": 0.782, + "args": { + "External id": 3295876,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193827.614, "dur": 1.532, + "args": { + "External id": 3295877,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193828.247, "dur": 0.835, + "args": { + "External id": 3295878,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193829.368, "dur": 5.770, + "args": { + "External id": 3295879,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193834.322, "dur": 0.750, + "args": { + "External id": 3295880,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193835.562, "dur": 1.508, + "args": { + "External id": 3295881,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193836.376, "dur": 0.630, + "args": { + "External id": 3295882,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193837.287, "dur": 3.391, + "args": { + "External id": 3295883,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193838.406, "dur": 2.202, + "args": { + "External id": 3295884,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193840.929, "dur": 1.391, + "args": { + "External id": 3295885,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193841.616, "dur": 0.636, + "args": { + "External id": 3295886,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193842.556, "dur": 2.369, + "args": { + "External id": 3295887,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193843.978, "dur": 0.882, + "args": { + "External id": 3295888,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193847.649, "dur": 3.078, + "args": { + "External id": 3295889,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193850.100, "dur": 0.564, + "args": { + "External id": 3295890,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193851.019, "dur": 1.868, + "args": { + "External id": 3295891,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193852.213, "dur": 0.611, + "args": { + "External id": 3295892,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193853.126, "dur": 1.599, + "args": { + "External id": 3295893,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193853.953, "dur": 0.707, + "args": { + "External id": 3295894,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193855.126, "dur": 3.426, + "args": { + "External id": 3295895,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193857.859, "dur": 0.627, + "args": { + "External id": 3295896,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193858.787, "dur": 1.372, + "args": { + "External id": 3295897,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193859.587, "dur": 0.511, + "args": { + "External id": 3295898,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193860.380, "dur": 22.758, + "args": { + "External id": 3295899,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193861.839, "dur": 20.610, + "args": { + "External id": 3295900,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193884.332, "dur": 1.840, + "args": { + "External id": 3295901,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193885.651, "dur": 0.458, + "args": { + "External id": 3295902,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193886.545, "dur": 1.686, + "args": { + "External id": 3295903,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193887.656, "dur": 0.507, + "args": { + "External id": 3295904,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193891.144, "dur": 3.660, + "args": { + "External id": 3295905,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193894.201, "dur": 0.538, + "args": { + "External id": 3295906,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193895.104, "dur": 1.930, + "args": { + "External id": 3295907,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193896.375, "dur": 0.594, + "args": { + "External id": 3295908,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193897.277, "dur": 1.694, + "args": { + "External id": 3295909,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193898.235, "dur": 0.673, + "args": { + "External id": 3295910,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193899.224, "dur": 4.181, + "args": { + "External id": 3295911,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193902.659, "dur": 0.682, + "args": { + "External id": 3295912,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193903.820, "dur": 1.900, + "args": { + "External id": 3295913,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193904.704, "dur": 0.949, + "args": { + "External id": 3295914,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193906.135, "dur": 3.421, + "args": { + "External id": 3295915,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193907.324, "dur": 2.167, + "args": { + "External id": 3295916,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193909.821, "dur": 1.300, + "args": { + "External id": 3295917,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193910.483, "dur": 0.572, + "args": { + "External id": 3295918,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193911.382, "dur": 2.051, + "args": { + "External id": 3295919,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193912.741, "dur": 0.630, + "args": { + "External id": 3295920,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193915.618, "dur": 3.854, + "args": { + "External id": 3295921,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193918.849, "dur": 0.557, + "args": { + "External id": 3295922,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193919.714, "dur": 1.948, + "args": { + "External id": 3295923,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193920.802, "dur": 0.791, + "args": { + "External id": 3295924,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193921.910, "dur": 1.412, + "args": { + "External id": 3295925,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193922.574, "dur": 0.679, + "args": { + "External id": 3295926,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193923.544, "dur": 3.924, + "args": { + "External id": 3295927,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193926.815, "dur": 0.585, + "args": { + "External id": 3295928,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193927.726, "dur": 1.445, + "args": { + "External id": 3295929,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193928.599, "dur": 0.506, + "args": { + "External id": 3295930,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193929.567, "dur": 3.352, + "args": { + "External id": 3295931,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193930.766, "dur": 1.889, + "args": { + "External id": 3295932,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193933.176, "dur": 1.461, + "args": { + "External id": 3295933,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193933.905, "dur": 0.661, + "args": { + "External id": 3295934,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193934.859, "dur": 2.073, + "args": { + "External id": 3295935,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193936.259, "dur": 0.590, + "args": { + "External id": 3295936,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193939.112, "dur": 3.514, + "args": { + "External id": 3295937,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193942.013, "dur": 0.545, + "args": { + "External id": 3295938,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193942.928, "dur": 1.969, + "args": { + "External id": 3295939,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193944.093, "dur": 0.741, + "args": { + "External id": 3295940,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193945.144, "dur": 1.537, + "args": { + "External id": 3295941,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193945.951, "dur": 0.663, + "args": { + "External id": 3295942,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193946.904, "dur": 3.852, + "args": { + "External id": 3295943,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193949.948, "dur": 0.738, + "args": { + "External id": 3295944,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193950.993, "dur": 1.478, + "args": { + "External id": 3295945,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193951.643, "dur": 0.758, + "args": { + "External id": 3295946,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193952.692, "dur": 3.378, + "args": { + "External id": 3295947,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193953.962, "dur": 2.038, + "args": { + "External id": 3295948,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193956.312, "dur": 1.514, + "args": { + "External id": 3295949,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193957.105, "dur": 0.654, + "args": { + "External id": 3295950,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193958.250, "dur": 2.033, + "args": { + "External id": 3295951,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193959.348, "dur": 0.868, + "args": { + "External id": 3295952,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193962.737, "dur": 3.412, + "args": { + "External id": 3295953,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193965.466, "dur": 0.618, + "args": { + "External id": 3295954,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193966.388, "dur": 1.802, + "args": { + "External id": 3295955,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193967.513, "dur": 0.609, + "args": { + "External id": 3295956,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193968.428, "dur": 1.938, + "args": { + "External id": 3295957,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193969.532, "dur": 0.763, + "args": { + "External id": 3295958,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193970.589, "dur": 3.499, + "args": { + "External id": 3295959,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193973.408, "dur": 0.610, + "args": { + "External id": 3295960,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193974.359, "dur": 1.429, + "args": { + "External id": 3295961,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193975.235, "dur": 0.482, + "args": { + "External id": 3295962,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193976.008, "dur": 3.654, + "args": { + "External id": 3295963,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597193977.108, "dur": 2.203, + "args": { + "External id": 3295964,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597193979.916, "dur": 36.452, + "args": { + "External id": 3295965,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194014.946, "dur": 0.995, + "args": { + "External id": 3295966,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194017.384, "dur": 2.134, + "args": { + "External id": 3295967,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194018.783, "dur": 0.665, + "args": { + "External id": 3295968,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194022.080, "dur": 3.528, + "args": { + "External id": 3295969,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194024.963, "dur": 0.574, + "args": { + "External id": 3295970,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194025.865, "dur": 1.818, + "args": { + "External id": 3295971,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194027.037, "dur": 0.582, + "args": { + "External id": 3295972,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194027.959, "dur": 1.523, + "args": { + "External id": 3295973,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194028.943, "dur": 0.472, + "args": { + "External id": 3295974,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194029.747, "dur": 4.058, + "args": { + "External id": 3295975,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194033.131, "dur": 0.599, + "args": { + "External id": 3295976,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194034.091, "dur": 1.447, + "args": { + "External id": 3295977,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194034.811, "dur": 0.663, + "args": { + "External id": 3295978,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194035.774, "dur": 3.773, + "args": { + "External id": 3295979,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194036.809, "dur": 2.664, + "args": { + "External id": 3295980,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194039.819, "dur": 2.119, + "args": { + "External id": 3295981,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194040.996, "dur": 0.873, + "args": { + "External id": 3295982,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194042.172, "dur": 2.218, + "args": { + "External id": 3295983,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194043.642, "dur": 0.683, + "args": { + "External id": 3295984,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194046.718, "dur": 3.836, + "args": { + "External id": 3295985,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194049.840, "dur": 0.647, + "args": { + "External id": 3295986,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194051.021, "dur": 1.956, + "args": { + "External id": 3295987,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194052.311, "dur": 0.603, + "args": { + "External id": 3295988,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194053.236, "dur": 1.563, + "args": { + "External id": 3295989,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194054.077, "dur": 0.653, + "args": { + "External id": 3295990,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194055.034, "dur": 3.600, + "args": { + "External id": 3295991,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194057.944, "dur": 0.625, + "args": { + "External id": 3295992,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194058.896, "dur": 1.549, + "args": { + "External id": 3295993,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194059.595, "dur": 0.787, + "args": { + "External id": 3295994,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194060.922, "dur": 3.010, + "args": { + "External id": 3295995,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194061.947, "dur": 1.915, + "args": { + "External id": 3295996,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194064.202, "dur": 1.740, + "args": { + "External id": 3295997,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194065.207, "dur": 0.666, + "args": { + "External id": 3295998,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194066.180, "dur": 1.666, + "args": { + "External id": 3295999,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194067.086, "dur": 0.694, + "args": { + "External id": 3296000,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194070.526, "dur": 3.367, + "args": { + "External id": 3296001,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194073.271, "dur": 0.557, + "args": { + "External id": 3296002,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194074.168, "dur": 1.857, + "args": { + "External id": 3296003,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194075.376, "dur": 0.583, + "args": { + "External id": 3296004,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194076.289, "dur": 1.471, + "args": { + "External id": 3296005,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194076.974, "dur": 0.719, + "args": { + "External id": 3296006,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194078.005, "dur": 3.853, + "args": { + "External id": 3296007,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194081.195, "dur": 0.589, + "args": { + "External id": 3296008,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194082.125, "dur": 1.317, + "args": { + "External id": 3296009,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194082.797, "dur": 0.578, + "args": { + "External id": 3296010,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194083.679, "dur": 3.713, + "args": { + "External id": 3296011,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194084.751, "dur": 2.344, + "args": { + "External id": 3296012,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194087.679, "dur": 1.588, + "args": { + "External id": 3296013,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194088.637, "dur": 0.565, + "args": { + "External id": 3296014,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194089.498, "dur": 1.986, + "args": { + "External id": 3296015,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194090.776, "dur": 0.638, + "args": { + "External id": 3296016,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194093.990, "dur": 3.430, + "args": { + "External id": 3296017,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194096.786, "dur": 0.569, + "args": { + "External id": 3296018,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194097.674, "dur": 1.572, + "args": { + "External id": 3296019,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194098.619, "dur": 0.563, + "args": { + "External id": 3296020,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194099.510, "dur": 2.018, + "args": { + "External id": 3296021,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194100.407, "dur": 0.814, + "args": { + "External id": 3296022,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194101.755, "dur": 3.429, + "args": { + "External id": 3296023,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194104.515, "dur": 0.604, + "args": { + "External id": 3296024,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194105.443, "dur": 1.503, + "args": { + "External id": 3296025,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194106.290, "dur": 0.588, + "args": { + "External id": 3296026,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194107.176, "dur": 3.552, + "args": { + "External id": 3296027,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194108.143, "dur": 2.195, + "args": { + "External id": 3296028,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194110.991, "dur": 1.465, + "args": { + "External id": 3296029,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194111.822, "dur": 0.567, + "args": { + "External id": 3296030,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194112.684, "dur": 1.933, + "args": { + "External id": 3296031,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194113.982, "dur": 0.565, + "args": { + "External id": 3296032,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194116.830, "dur": 3.505, + "args": { + "External id": 3296033,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194119.651, "dur": 0.615, + "args": { + "External id": 3296034,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194120.579, "dur": 2.187, + "args": { + "External id": 3296035,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194121.859, "dur": 0.597, + "args": { + "External id": 3296036,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194123.021, "dur": 2.132, + "args": { + "External id": 3296037,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194123.902, "dur": 0.971, + "args": { + "External id": 3296038,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194125.379, "dur": 3.953, + "args": { + "External id": 3296039,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194128.366, "dur": 0.895, + "args": { + "External id": 3296040,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194129.597, "dur": 1.511, + "args": { + "External id": 3296041,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194130.406, "dur": 0.638, + "args": { + "External id": 3296042,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194131.337, "dur": 3.368, + "args": { + "External id": 3296043,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194132.525, "dur": 2.108, + "args": { + "External id": 3296044,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194134.961, "dur": 1.319, + "args": { + "External id": 3296045,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194135.641, "dur": 0.572, + "args": { + "External id": 3296046,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194136.510, "dur": 2.123, + "args": { + "External id": 3296047,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194137.822, "dur": 0.743, + "args": { + "External id": 3296048,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194141.100, "dur": 3.801, + "args": { + "External id": 3296049,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194144.099, "dur": 0.733, + "args": { + "External id": 3296050,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194145.147, "dur": 1.964, + "args": { + "External id": 3296051,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194146.322, "dur": 0.724, + "args": { + "External id": 3296052,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194147.371, "dur": 1.648, + "args": { + "External id": 3296053,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194148.223, "dur": 0.727, + "args": { + "External id": 3296054,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194149.254, "dur": 3.950, + "args": { + "External id": 3296055,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194152.324, "dur": 0.608, + "args": { + "External id": 3296056,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194153.468, "dur": 1.609, + "args": { + "External id": 3296057,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194154.328, "dur": 0.682, + "args": { + "External id": 3296058,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194155.308, "dur": 3.706, + "args": { + "External id": 3296059,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194156.779, "dur": 2.162, + "args": { + "External id": 3296060,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194159.276, "dur": 1.573, + "args": { + "External id": 3296061,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194160.165, "dur": 0.619, + "args": { + "External id": 3296062,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194161.077, "dur": 2.245, + "args": { + "External id": 3296063,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194162.349, "dur": 0.724, + "args": { + "External id": 3296064,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194165.605, "dur": 3.380, + "args": { + "External id": 3296065,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194168.337, "dur": 0.580, + "args": { + "External id": 3296066,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194169.229, "dur": 1.833, + "args": { + "External id": 3296067,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194170.413, "dur": 0.577, + "args": { + "External id": 3296068,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194171.363, "dur": 1.573, + "args": { + "External id": 3296069,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194172.214, "dur": 0.655, + "args": { + "External id": 3296070,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194173.183, "dur": 3.334, + "args": { + "External id": 3296071,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194175.876, "dur": 0.576, + "args": { + "External id": 3296072,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194176.776, "dur": 1.592, + "args": { + "External id": 3296073,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194177.422, "dur": 0.666, + "args": { + "External id": 3296074,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194178.597, "dur": 3.628, + "args": { + "External id": 3296075,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194179.989, "dur": 2.161, + "args": { + "External id": 3296076,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194182.512, "dur": 1.440, + "args": { + "External id": 3296077,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194183.315, "dur": 0.573, + "args": { + "External id": 3296078,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194184.181, "dur": 2.584, + "args": { + "External id": 3296079,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194185.450, "dur": 1.248, + "args": { + "External id": 3296080,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194189.275, "dur": 3.506, + "args": { + "External id": 3296081,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194192.174, "dur": 0.544, + "args": { + "External id": 3296082,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194193.029, "dur": 1.773, + "args": { + "External id": 3296083,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194194.165, "dur": 0.569, + "args": { + "External id": 3296084,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194195.053, "dur": 1.861, + "args": { + "External id": 3296085,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194195.978, "dur": 0.675, + "args": { + "External id": 3296086,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194197.166, "dur": 3.876, + "args": { + "External id": 3296087,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194200.424, "dur": 0.544, + "args": { + "External id": 3296088,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194201.314, "dur": 1.491, + "args": { + "External id": 3296089,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194202.134, "dur": 0.600, + "args": { + "External id": 3296090,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194203.059, "dur": 3.665, + "args": { + "External id": 3296091,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194204.352, "dur": 2.300, + "args": { + "External id": 3296092,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194206.989, "dur": 1.451, + "args": { + "External id": 3296093,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194207.800, "dur": 0.579, + "args": { + "External id": 3296094,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194208.666, "dur": 2.039, + "args": { + "External id": 3296095,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194209.940, "dur": 0.695, + "args": { + "External id": 3296096,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194212.983, "dur": 3.028, + "args": { + "External id": 3296097,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194215.404, "dur": 0.537, + "args": { + "External id": 3296098,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194216.252, "dur": 2.279, + "args": { + "External id": 3296099,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194217.676, "dur": 0.597, + "args": { + "External id": 3296100,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194218.800, "dur": 1.628, + "args": { + "External id": 3296101,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194219.464, "dur": 0.699, + "args": { + "External id": 3296102,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194220.683, "dur": 4.162, + "args": { + "External id": 3296103,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194224.178, "dur": 0.594, + "args": { + "External id": 3296104,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194225.096, "dur": 1.549, + "args": { + "External id": 3296105,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194226.040, "dur": 0.538, + "args": { + "External id": 3296106,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194226.891, "dur": 3.698, + "args": { + "External id": 3296107,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194228.136, "dur": 2.295, + "args": { + "External id": 3296108,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194230.849, "dur": 1.478, + "args": { + "External id": 3296109,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194231.665, "dur": 0.592, + "args": { + "External id": 3296110,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194232.557, "dur": 2.005, + "args": { + "External id": 3296111,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194233.723, "dur": 0.773, + "args": { + "External id": 3296112,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194236.843, "dur": 3.563, + "args": { + "External id": 3296113,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194239.700, "dur": 0.634, + "args": { + "External id": 3296114,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194240.664, "dur": 1.738, + "args": { + "External id": 3296115,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194241.750, "dur": 0.590, + "args": { + "External id": 3296116,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194242.657, "dur": 1.748, + "args": { + "External id": 3296117,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194243.547, "dur": 0.790, + "args": { + "External id": 3296118,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194244.680, "dur": 3.679, + "args": { + "External id": 3296119,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194247.703, "dur": 0.591, + "args": { + "External id": 3296120,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194248.779, "dur": 1.410, + "args": { + "External id": 3296121,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194249.524, "dur": 0.599, + "args": { + "External id": 3296122,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194250.416, "dur": 3.449, + "args": { + "External id": 3296123,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194251.670, "dur": 2.124, + "args": { + "External id": 3296124,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 1336756, "tid": 1336756, + "ts": 1587597194254.157, "dur": 1.623, + "args": { + "External id": 3296125,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194255.135, "dur": 0.582, + "args": { + "External id": 3296126,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 1336756, "tid": 1336756, + "ts": 1587597194278.484, "dur": 127.311, + "args": { + "External id": 3296127,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 1336756, "tid": 1336756, + "ts": 1587597194507.053, "dur": 132.655, + "args": { + "External id": 3296128,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[291], [], [], [], []], "Ev Idx": 11287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 1336756, "tid": 1336756, + "ts": 1587597194570.096, "dur": 48.008, + "args": { + "External id": 3296129,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[291], [], [], [], []], "Ev Idx": 11288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 1336756, "tid": 1336756, + "ts": 1587597194584.453, "dur": 1.145, + "args": { + "External id": 3296130,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 11289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Redistribute", "pid": 1336756, "tid": 1336756, + "ts": 1587597195062.258, "dur": 1147.138, + "args": { + "External id": 3296131,"Sequence number": 32987340, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "False"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 1336756, "tid": 1336756, + "ts": 1587597195120.932, "dur": 60.968, + "args": { + "External id": 3296132,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597195125.869, "dur": 1.552, + "args": { + "External id": 3296133,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597195129.178, "dur": 0.497, + "args": { + "External id": 3296134,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 11293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::all_reduce", "pid": 1336756, "tid": 1336756, + "ts": 1587597195208.504, "dur": 477.752, + "args": { + "External id": 3296135,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["float", "", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 1336756, "tid": 1336756, + "ts": 1587597195212.797, "dur": 52.068, + "args": { + "External id": 3296136,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 1336756, "tid": 1336756, + "ts": 1587597195216.444, "dur": 10.464, + "args": { + "External id": 3296137,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "0"], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587597195222.355, "dur": 3.722, + "args": { + "External id": 3296138,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 1336756, "tid": 1336756, + "ts": 1587597195228.382, "dur": 35.807, + "args": { + "External id": 3296139,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::allreduce_", "pid": 1336756, "tid": 1336756, + "ts": 1587597195276.003, "dur": 406.799, + "args": { + "External id": 3296140,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "-1"], "Input type": ["TensorList", "", "", "", "Scalar"], "Input Strides": [[[]], [], [], [], []], "Input Dims": [[[]], [], [], [], []], "Ev Idx": 11299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587597195308.860, "dur": 367.255, + "args": { + "External id": 3296141,"Record function id": 0, "Collective name": "allreduce", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 3, "Input Strides": [[[]], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "3", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1, "Process Group Name": "0", "Input type": ["TensorList", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[[]], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11300, "In msg nelems": 1 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:all_reduce", "pid": 1336756, "tid": 1336756, + "ts": 1587597195324.818, "dur": 345.410, + "args": { + "External id": 3296142,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 1336756, "tid": 1336756, + "ts": 1587597195887.796, "dur": 278.889, + "args": { + "External id": 3296143,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::wait_tensor", "pid": 1336756, "tid": 1336756, + "ts": 1587597196018.561, "dur": 42.075, + "args": { + "External id": 3296144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 1336756, "tid": 1336756, + "ts": 1587597196043.219, "dur": 7.040, + "args": { + "External id": 3296145,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "3", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11304, "In msg nelems": 0, "Rank": 3, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 1336756, "tid": 1336756, + "ts": 1587597196106.986, "dur": 52.772, + "args": { + "External id": 3296146,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597196110.200, "dur": 1.403, + "args": { + "External id": 3296147,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597196113.129, "dur": 0.668, + "args": { + "External id": 3296148,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 11307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_ToTorchTensor", "pid": 1336756, "tid": 1336756, + "ts": 1587597196226.398, "dur": 21.513, + "args": { + "External id": 3296149,"Sequence number": 32987341, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 1336756, "tid": 1336756, + "ts": 1587597196237.002, "dur": 7.772, + "args": { + "External id": 3296150,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 1336756, "tid": 1336756, + "ts": 1587597196239.122, "dur": 5.422, + "args": { + "External id": 3296151,"Record function id": 0, "Concrete Inputs": ["", "[]"], "Input type": ["float", "ScalarList"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 1336756, "tid": 1336756, + "ts": 1587597196572.720, "dur": 32.491, + "args": { + "External id": 3296152,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "double", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reciprocal", "pid": 1336756, "tid": 1336756, + "ts": 1587597196614.760, "dur": 22.055, + "args": { + "External id": 3296153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 1336756, "tid": 1336756, + "ts": 1587597196651.473, "dur": 27.619, + "args": { + "External id": 3296154,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "double"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clamp", "pid": 1336756, "tid": 1336756, + "ts": 1587597196691.742, "dur": 26.520, + "args": { + "External id": 3296155,"Record function id": 0, "Concrete Inputs": ["", "", "1."], "Input type": ["float", "", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597196695.417, "dur": 0.382, + "args": { + "External id": 3296156,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 11315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 1336756, "tid": 1336756, + "ts": 1587597196734.449, "dur": 0.859, + "args": { + "External id": 3296157,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 11316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 1336756, "tid": 1336756, + "ts": 1587597196854.372, "dur": 746.040, + "args": { + "External id": 3296158,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 1336756, "tid": 1336756, + "ts": 1587597197403.576, "dur": 165.257, + "args": { + "External id": 3296159,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isnan", "pid": 1336756, "tid": 1336756, + "ts": 1587597197646.081, "dur": 28.968, + "args": { + "External id": 3296160,"Sequence number": 32987342, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 1336756, "tid": 1336756, + "ts": 1587597197649.561, "dur": 24.994, + "args": { + "External id": 3296161,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 1336756, "tid": 1336756, + "ts": 1587597197678.546, "dur": 838.117, + "args": { + "External id": 3296162,"Sequence number": 32987342, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336756, "tid": 1336756, + "ts": 1587597197680.114, "dur": 836.307, + "args": { + "External id": 3296163,"Sequence number": 32987342, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336756, "tid": 1336756, + "ts": 1587597197682.376, "dur": 833.258, + "args": { + "External id": 3296164,"Sequence number": 32987342, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isinf", "pid": 1336756, "tid": 1336756, + "ts": 1587597198522.681, "dur": 68.401, + "args": { + "External id": 3296165,"Sequence number": 32987342, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587597198525.291, "dur": 38.373, + "args": { + "External id": 3296166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 1336756, "tid": 1336756, + "ts": 1587597198532.393, "dur": 3.417, + "args": { + "External id": 3296167,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 1336756, "tid": 1336756, + "ts": 1587597198537.539, "dur": 25.754, + "args": { + "External id": 3296168,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], [1]], "Input Dims": [[], [0]], "Ev Idx": 11327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 1336756, "tid": 1336756, + "ts": 1587597198545.089, "dur": 2.686, + "args": { + "External id": 3296169,"Record function id": 0, "Concrete Inputs": ["", "[]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 11328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 1336756, "tid": 1336756, + "ts": 1587597198567.504, "dur": 22.510, + "args": { + "External id": 3296170,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 1336756, "tid": 1336756, + "ts": 1587597198593.646, "dur": 38.040, + "args": { + "External id": 3296171,"Sequence number": 32987342, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 1336756, "tid": 1336756, + "ts": 1587597198594.817, "dur": 36.653, + "args": { + "External id": 3296172,"Sequence number": 32987342, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 1336756, "tid": 1336756, + "ts": 1587597198595.844, "dur": 35.358, + "args": { + "External id": 3296173,"Sequence number": 32987342, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11332 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#OptimizersContainer.step", "pid": 1336756, "tid": 1336756, + "ts": 1587597198660.186, "dur": 5729.602, + "args": { + "External id": 3296174,"Record function id": 0, "Ev Idx": 11333 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#AdamW.step", "pid": 1336756, "tid": 1336756, + "ts": 1587597198691.057, "dur": 5673.651, + "args": { + "External id": 3296175,"Record function id": 0, "Ev Idx": 11334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_add_", "pid": 1336756, "tid": 1336756, + "ts": 1587597200049.080, "dur": 274.882, + "args": { + "External id": 3296176,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200068.815, "dur": 0.893, + "args": { + "External id": 3296177,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200070.985, "dur": 0.051, + "args": { + "External id": 3296178,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200071.561, "dur": 0.266, + "args": { + "External id": 3296179,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200072.344, "dur": 0.269, + "args": { + "External id": 3296180,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200073.147, "dur": 0.097, + "args": { + "External id": 3296181,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200073.670, "dur": 0.092, + "args": { + "External id": 3296182,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200074.193, "dur": 0.096, + "args": { + "External id": 3296183,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200074.712, "dur": 0.335, + "args": { + "External id": 3296184,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200075.554, "dur": 0.053, + "args": { + "External id": 3296185,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200076.156, "dur": 0.061, + "args": { + "External id": 3296186,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200076.760, "dur": 0.053, + "args": { + "External id": 3296187,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200077.177, "dur": 0.069, + "args": { + "External id": 3296188,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200077.633, "dur": 0.066, + "args": { + "External id": 3296189,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200077.992, "dur": 0.068, + "args": { + "External id": 3296190,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200078.411, "dur": 0.071, + "args": { + "External id": 3296191,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200078.980, "dur": 0.072, + "args": { + "External id": 3296192,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200079.402, "dur": 0.069, + "args": { + "External id": 3296193,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200079.804, "dur": 0.067, + "args": { + "External id": 3296194,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200080.220, "dur": 0.066, + "args": { + "External id": 3296195,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200080.555, "dur": 0.158, + "args": { + "External id": 3296196,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200081.060, "dur": 0.100, + "args": { + "External id": 3296197,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200081.536, "dur": 0.299, + "args": { + "External id": 3296198,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200082.143, "dur": 0.101, + "args": { + "External id": 3296199,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200082.562, "dur": 0.332, + "args": { + "External id": 3296200,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200083.265, "dur": 0.066, + "args": { + "External id": 3296201,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200083.660, "dur": 0.086, + "args": { + "External id": 3296202,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200084.099, "dur": 0.272, + "args": { + "External id": 3296203,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200084.776, "dur": 0.277, + "args": { + "External id": 3296204,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200085.410, "dur": 0.251, + "args": { + "External id": 3296205,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200086.011, "dur": 0.293, + "args": { + "External id": 3296206,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200086.672, "dur": 0.077, + "args": { + "External id": 3296207,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200087.035, "dur": 0.069, + "args": { + "External id": 3296208,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200087.444, "dur": 0.065, + "args": { + "External id": 3296209,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200087.847, "dur": 0.068, + "args": { + "External id": 3296210,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200088.261, "dur": 0.070, + "args": { + "External id": 3296211,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200088.663, "dur": 0.067, + "args": { + "External id": 3296212,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200089.148, "dur": 0.071, + "args": { + "External id": 3296213,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200089.508, "dur": 0.069, + "args": { + "External id": 3296214,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200089.905, "dur": 0.094, + "args": { + "External id": 3296215,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200090.452, "dur": 0.072, + "args": { + "External id": 3296216,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200090.858, "dur": 0.066, + "args": { + "External id": 3296217,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200091.345, "dur": 0.066, + "args": { + "External id": 3296218,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200091.780, "dur": 0.068, + "args": { + "External id": 3296219,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200092.271, "dur": 0.068, + "args": { + "External id": 3296220,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200092.689, "dur": 0.062, + "args": { + "External id": 3296221,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200093.222, "dur": 0.070, + "args": { + "External id": 3296222,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200093.575, "dur": 0.071, + "args": { + "External id": 3296223,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200094.014, "dur": 0.070, + "args": { + "External id": 3296224,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200094.469, "dur": 0.054, + "args": { + "External id": 3296225,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200094.858, "dur": 0.068, + "args": { + "External id": 3296226,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200095.412, "dur": 0.058, + "args": { + "External id": 3296227,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200095.924, "dur": 0.285, + "args": { + "External id": 3296228,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200096.543, "dur": 0.070, + "args": { + "External id": 3296229,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200096.945, "dur": 0.339, + "args": { + "External id": 3296230,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200097.620, "dur": 0.099, + "args": { + "External id": 3296231,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200098.076, "dur": 0.101, + "args": { + "External id": 3296232,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200098.699, "dur": 0.066, + "args": { + "External id": 3296233,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200099.097, "dur": 0.271, + "args": { + "External id": 3296234,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200099.695, "dur": 0.310, + "args": { + "External id": 3296235,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200100.335, "dur": 0.087, + "args": { + "External id": 3296236,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200100.776, "dur": 0.102, + "args": { + "External id": 3296237,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200101.415, "dur": 0.127, + "args": { + "External id": 3296238,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200101.903, "dur": 0.068, + "args": { + "External id": 3296239,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200102.328, "dur": 0.066, + "args": { + "External id": 3296240,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200102.745, "dur": 0.067, + "args": { + "External id": 3296241,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200103.283, "dur": 0.068, + "args": { + "External id": 3296242,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200103.762, "dur": 0.067, + "args": { + "External id": 3296243,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200104.357, "dur": 0.064, + "args": { + "External id": 3296244,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200104.688, "dur": 0.074, + "args": { + "External id": 3296245,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200105.120, "dur": 0.072, + "args": { + "External id": 3296246,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200105.746, "dur": 0.075, + "args": { + "External id": 3296247,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200106.170, "dur": 0.074, + "args": { + "External id": 3296248,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200106.644, "dur": 0.068, + "args": { + "External id": 3296249,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200107.057, "dur": 0.067, + "args": { + "External id": 3296250,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200107.472, "dur": 0.069, + "args": { + "External id": 3296251,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200107.845, "dur": 0.071, + "args": { + "External id": 3296252,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200108.199, "dur": 0.070, + "args": { + "External id": 3296253,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200108.683, "dur": 0.068, + "args": { + "External id": 3296254,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200109.065, "dur": 0.111, + "args": { + "External id": 3296255,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200109.493, "dur": 0.082, + "args": { + "External id": 3296256,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200136.662, "dur": 0.069, + "args": { + "External id": 3296257,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200137.468, "dur": 0.060, + "args": { + "External id": 3296258,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200137.956, "dur": 0.058, + "args": { + "External id": 3296259,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200138.395, "dur": 0.068, + "args": { + "External id": 3296260,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200138.801, "dur": 0.060, + "args": { + "External id": 3296261,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200139.288, "dur": 0.071, + "args": { + "External id": 3296262,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200139.677, "dur": 0.068, + "args": { + "External id": 3296263,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200139.996, "dur": 0.067, + "args": { + "External id": 3296264,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200140.363, "dur": 0.075, + "args": { + "External id": 3296265,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200140.710, "dur": 0.062, + "args": { + "External id": 3296266,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200141.124, "dur": 0.066, + "args": { + "External id": 3296267,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200141.448, "dur": 0.059, + "args": { + "External id": 3296268,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200141.868, "dur": 0.055, + "args": { + "External id": 3296269,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200142.208, "dur": 0.071, + "args": { + "External id": 3296270,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200142.657, "dur": 0.055, + "args": { + "External id": 3296271,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200143.061, "dur": 0.070, + "args": { + "External id": 3296272,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200143.382, "dur": 0.068, + "args": { + "External id": 3296273,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200143.799, "dur": 0.063, + "args": { + "External id": 3296274,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200144.256, "dur": 0.067, + "args": { + "External id": 3296275,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200144.702, "dur": 0.065, + "args": { + "External id": 3296276,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200145.087, "dur": 0.068, + "args": { + "External id": 3296277,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200145.466, "dur": 0.057, + "args": { + "External id": 3296278,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200145.888, "dur": 0.066, + "args": { + "External id": 3296279,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200146.352, "dur": 0.051, + "args": { + "External id": 3296280,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200146.767, "dur": 0.070, + "args": { + "External id": 3296281,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200147.180, "dur": 0.067, + "args": { + "External id": 3296282,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200147.627, "dur": 0.056, + "args": { + "External id": 3296283,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200148.003, "dur": 0.069, + "args": { + "External id": 3296284,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200148.437, "dur": 0.072, + "args": { + "External id": 3296285,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200148.845, "dur": 0.071, + "args": { + "External id": 3296286,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200149.179, "dur": 0.069, + "args": { + "External id": 3296287,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200149.563, "dur": 0.071, + "args": { + "External id": 3296288,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200149.971, "dur": 0.069, + "args": { + "External id": 3296289,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200150.360, "dur": 0.066, + "args": { + "External id": 3296290,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200150.813, "dur": 0.069, + "args": { + "External id": 3296291,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200151.456, "dur": 0.063, + "args": { + "External id": 3296292,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200151.841, "dur": 0.064, + "args": { + "External id": 3296293,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200152.210, "dur": 0.064, + "args": { + "External id": 3296294,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200152.688, "dur": 0.052, + "args": { + "External id": 3296295,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200153.093, "dur": 0.066, + "args": { + "External id": 3296296,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200153.605, "dur": 0.069, + "args": { + "External id": 3296297,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200154.016, "dur": 0.069, + "args": { + "External id": 3296298,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200154.432, "dur": 0.068, + "args": { + "External id": 3296299,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200154.885, "dur": 0.069, + "args": { + "External id": 3296300,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200155.284, "dur": 0.070, + "args": { + "External id": 3296301,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200155.902, "dur": 0.072, + "args": { + "External id": 3296302,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200156.270, "dur": 0.069, + "args": { + "External id": 3296303,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200156.660, "dur": 0.066, + "args": { + "External id": 3296304,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200157.025, "dur": 0.067, + "args": { + "External id": 3296305,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200157.399, "dur": 0.067, + "args": { + "External id": 3296306,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200164.640, "dur": 0.087, + "args": { + "External id": 3296307,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200166.361, "dur": 0.051, + "args": { + "External id": 3296308,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200166.852, "dur": 0.052, + "args": { + "External id": 3296309,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200167.204, "dur": 0.071, + "args": { + "External id": 3296310,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200167.587, "dur": 0.067, + "args": { + "External id": 3296311,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200167.961, "dur": 0.073, + "args": { + "External id": 3296312,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200168.408, "dur": 0.068, + "args": { + "External id": 3296313,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200168.891, "dur": 0.052, + "args": { + "External id": 3296314,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200169.260, "dur": 0.067, + "args": { + "External id": 3296315,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200169.577, "dur": 0.068, + "args": { + "External id": 3296316,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200169.973, "dur": 0.063, + "args": { + "External id": 3296317,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200170.360, "dur": 0.068, + "args": { + "External id": 3296318,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200170.773, "dur": 0.067, + "args": { + "External id": 3296319,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200171.166, "dur": 0.071, + "args": { + "External id": 3296320,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200171.540, "dur": 0.070, + "args": { + "External id": 3296321,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200171.930, "dur": 0.068, + "args": { + "External id": 3296322,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200172.327, "dur": 0.064, + "args": { + "External id": 3296323,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200172.714, "dur": 0.065, + "args": { + "External id": 3296324,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200173.131, "dur": 0.067, + "args": { + "External id": 3296325,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200173.505, "dur": 0.071, + "args": { + "External id": 3296326,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200174.063, "dur": 0.069, + "args": { + "External id": 3296327,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200174.433, "dur": 0.071, + "args": { + "External id": 3296328,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200174.856, "dur": 0.067, + "args": { + "External id": 3296329,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200175.204, "dur": 0.054, + "args": { + "External id": 3296330,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200175.616, "dur": 0.055, + "args": { + "External id": 3296331,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200175.994, "dur": 0.068, + "args": { + "External id": 3296332,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200176.407, "dur": 0.070, + "args": { + "External id": 3296333,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200176.773, "dur": 0.070, + "args": { + "External id": 3296334,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200177.123, "dur": 0.065, + "args": { + "External id": 3296335,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200177.569, "dur": 0.071, + "args": { + "External id": 3296336,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200177.961, "dur": 0.089, + "args": { + "External id": 3296337,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200178.440, "dur": 0.056, + "args": { + "External id": 3296338,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200178.811, "dur": 0.065, + "args": { + "External id": 3296339,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200179.244, "dur": 0.064, + "args": { + "External id": 3296340,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200179.619, "dur": 0.064, + "args": { + "External id": 3296341,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200180.040, "dur": 0.070, + "args": { + "External id": 3296342,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200180.438, "dur": 0.073, + "args": { + "External id": 3296343,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200180.891, "dur": 0.067, + "args": { + "External id": 3296344,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200181.309, "dur": 0.068, + "args": { + "External id": 3296345,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200181.713, "dur": 0.065, + "args": { + "External id": 3296346,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200182.125, "dur": 0.064, + "args": { + "External id": 3296347,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200182.503, "dur": 0.063, + "args": { + "External id": 3296348,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200182.943, "dur": 0.066, + "args": { + "External id": 3296349,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200183.382, "dur": 0.054, + "args": { + "External id": 3296350,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200183.810, "dur": 0.073, + "args": { + "External id": 3296351,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200184.190, "dur": 0.068, + "args": { + "External id": 3296352,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200184.574, "dur": 0.069, + "args": { + "External id": 3296353,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200184.964, "dur": 0.072, + "args": { + "External id": 3296354,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200185.334, "dur": 0.069, + "args": { + "External id": 3296355,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200185.710, "dur": 0.068, + "args": { + "External id": 3296356,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200186.101, "dur": 0.071, + "args": { + "External id": 3296357,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200186.505, "dur": 0.066, + "args": { + "External id": 3296358,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200186.894, "dur": 0.070, + "args": { + "External id": 3296359,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200187.229, "dur": 0.062, + "args": { + "External id": 3296360,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200187.642, "dur": 0.065, + "args": { + "External id": 3296361,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200188.150, "dur": 0.056, + "args": { + "External id": 3296362,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200188.595, "dur": 0.057, + "args": { + "External id": 3296363,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200188.968, "dur": 0.068, + "args": { + "External id": 3296364,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200189.367, "dur": 0.071, + "args": { + "External id": 3296365,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200189.823, "dur": 0.053, + "args": { + "External id": 3296366,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200190.185, "dur": 0.071, + "args": { + "External id": 3296367,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200190.548, "dur": 0.071, + "args": { + "External id": 3296368,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200190.952, "dur": 0.060, + "args": { + "External id": 3296369,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200191.304, "dur": 0.068, + "args": { + "External id": 3296370,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200191.760, "dur": 0.068, + "args": { + "External id": 3296371,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200192.167, "dur": 0.065, + "args": { + "External id": 3296372,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200192.554, "dur": 0.069, + "args": { + "External id": 3296373,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200192.963, "dur": 0.069, + "args": { + "External id": 3296374,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200193.370, "dur": 0.068, + "args": { + "External id": 3296375,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200193.724, "dur": 0.063, + "args": { + "External id": 3296376,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200194.285, "dur": 0.066, + "args": { + "External id": 3296377,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200194.662, "dur": 0.067, + "args": { + "External id": 3296378,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200195.060, "dur": 0.066, + "args": { + "External id": 3296379,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200195.456, "dur": 0.066, + "args": { + "External id": 3296380,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200195.906, "dur": 0.052, + "args": { + "External id": 3296381,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200196.317, "dur": 0.068, + "args": { + "External id": 3296382,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200196.653, "dur": 0.072, + "args": { + "External id": 3296383,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200197.050, "dur": 0.066, + "args": { + "External id": 3296384,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200197.495, "dur": 0.055, + "args": { + "External id": 3296385,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200197.886, "dur": 0.067, + "args": { + "External id": 3296386,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200198.321, "dur": 0.065, + "args": { + "External id": 3296387,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200198.767, "dur": 0.064, + "args": { + "External id": 3296388,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200199.147, "dur": 0.065, + "args": { + "External id": 3296389,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200199.569, "dur": 0.070, + "args": { + "External id": 3296390,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200200.076, "dur": 0.071, + "args": { + "External id": 3296391,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200200.529, "dur": 0.053, + "args": { + "External id": 3296392,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200200.895, "dur": 0.067, + "args": { + "External id": 3296393,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200201.220, "dur": 0.068, + "args": { + "External id": 3296394,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200201.618, "dur": 0.064, + "args": { + "External id": 3296395,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200202.048, "dur": 0.067, + "args": { + "External id": 3296396,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200202.387, "dur": 0.069, + "args": { + "External id": 3296397,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200203.036, "dur": 0.069, + "args": { + "External id": 3296398,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200203.460, "dur": 0.069, + "args": { + "External id": 3296399,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200203.816, "dur": 0.071, + "args": { + "External id": 3296400,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200204.287, "dur": 0.052, + "args": { + "External id": 3296401,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200204.633, "dur": 0.051, + "args": { + "External id": 3296402,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200204.989, "dur": 0.066, + "args": { + "External id": 3296403,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200205.397, "dur": 0.067, + "args": { + "External id": 3296404,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200205.755, "dur": 0.068, + "args": { + "External id": 3296405,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200206.225, "dur": 0.064, + "args": { + "External id": 3296406,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200206.653, "dur": 0.065, + "args": { + "External id": 3296407,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200207.035, "dur": 0.066, + "args": { + "External id": 3296408,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200207.384, "dur": 0.068, + "args": { + "External id": 3296409,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200207.813, "dur": 0.057, + "args": { + "External id": 3296410,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200208.226, "dur": 0.065, + "args": { + "External id": 3296411,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200208.607, "dur": 0.062, + "args": { + "External id": 3296412,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200208.970, "dur": 0.064, + "args": { + "External id": 3296413,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200209.412, "dur": 0.069, + "args": { + "External id": 3296414,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200209.907, "dur": 0.068, + "args": { + "External id": 3296415,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200210.556, "dur": 0.073, + "args": { + "External id": 3296416,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200210.984, "dur": 0.067, + "args": { + "External id": 3296417,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200211.375, "dur": 0.068, + "args": { + "External id": 3296418,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200211.794, "dur": 0.068, + "args": { + "External id": 3296419,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200212.172, "dur": 0.064, + "args": { + "External id": 3296420,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200212.600, "dur": 0.054, + "args": { + "External id": 3296421,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200212.982, "dur": 0.065, + "args": { + "External id": 3296422,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200213.354, "dur": 0.067, + "args": { + "External id": 3296423,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200213.738, "dur": 0.069, + "args": { + "External id": 3296424,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200214.144, "dur": 0.069, + "args": { + "External id": 3296425,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200214.544, "dur": 0.067, + "args": { + "External id": 3296426,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200214.930, "dur": 0.064, + "args": { + "External id": 3296427,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200215.358, "dur": 0.065, + "args": { + "External id": 3296428,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200215.840, "dur": 0.063, + "args": { + "External id": 3296429,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200216.380, "dur": 0.065, + "args": { + "External id": 3296430,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200216.693, "dur": 0.066, + "args": { + "External id": 3296431,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200217.427, "dur": 0.058, + "args": { + "External id": 3296432,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200217.803, "dur": 0.057, + "args": { + "External id": 3296433,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200218.570, "dur": 0.059, + "args": { + "External id": 3296434,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200218.870, "dur": 0.054, + "args": { + "External id": 3296435,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200219.700, "dur": 0.071, + "args": { + "External id": 3296436,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200220.034, "dur": 0.056, + "args": { + "External id": 3296437,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200220.781, "dur": 0.060, + "args": { + "External id": 3296438,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200221.082, "dur": 0.053, + "args": { + "External id": 3296439,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200221.875, "dur": 0.065, + "args": { + "External id": 3296440,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200222.187, "dur": 0.052, + "args": { + "External id": 3296441,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200222.755, "dur": 0.069, + "args": { + "External id": 3296442,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200223.272, "dur": 0.070, + "args": { + "External id": 3296443,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200224.228, "dur": 0.069, + "args": { + "External id": 3296444,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200224.542, "dur": 0.058, + "args": { + "External id": 3296445,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200225.409, "dur": 0.070, + "args": { + "External id": 3296446,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200225.719, "dur": 0.070, + "args": { + "External id": 3296447,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200226.542, "dur": 0.067, + "args": { + "External id": 3296448,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200226.859, "dur": 0.048, + "args": { + "External id": 3296449,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200227.835, "dur": 0.067, + "args": { + "External id": 3296450,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200228.145, "dur": 0.053, + "args": { + "External id": 3296451,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200228.873, "dur": 0.061, + "args": { + "External id": 3296452,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200229.178, "dur": 0.055, + "args": { + "External id": 3296453,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200229.885, "dur": 0.071, + "args": { + "External id": 3296454,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200230.197, "dur": 0.057, + "args": { + "External id": 3296455,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200230.925, "dur": 0.094, + "args": { + "External id": 3296456,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200231.264, "dur": 0.053, + "args": { + "External id": 3296457,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200231.979, "dur": 0.060, + "args": { + "External id": 3296458,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200232.283, "dur": 0.054, + "args": { + "External id": 3296459,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200233.115, "dur": 0.070, + "args": { + "External id": 3296460,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200233.427, "dur": 0.065, + "args": { + "External id": 3296461,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200234.359, "dur": 0.072, + "args": { + "External id": 3296462,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200234.679, "dur": 0.070, + "args": { + "External id": 3296463,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200235.280, "dur": 0.065, + "args": { + "External id": 3296464,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200235.598, "dur": 0.052, + "args": { + "External id": 3296465,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200236.358, "dur": 0.068, + "args": { + "External id": 3296466,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 1336756, "tid": 1336756, + "ts": 1587597200236.671, "dur": 0.054, + "args": { + "External id": 3296467,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 11626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 1336756, "tid": 1336756, + "ts": 1587597200826.067, "dur": 3452.926, + "args": { + "External id": 3296468,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "6.1083948902288394e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 1336756, "tid": 1336756, + "ts": 1587597203770.262, "dur": 352.758, + "args": { + "External id": 3296469,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "6.1083948902288394e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11628 + } + }, + { + "name": "process_name", "ph": "M", "ts": 1587595083381.013, "pid": 1336756, "tid": 0, + "args": { + "name": "python3.12" + } + }, + { + "name": "process_labels", "ph": "M", "ts": 1587595083381.013, "pid": 1336756, "tid": 0, + "args": { + "labels": "CPU" + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 1587595083381.013, "pid": 1336756, "tid": 0, + "args": { + "sort_index": 1336756 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 1587595083381.013, "pid": 1336756, "tid": 1381189, + "args": { + "name": "thread 1381189 (pt_autograd_3)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 1587595083381.013, "pid": 1336756, "tid": 1381189, + "args": { + "sort_index": 1381189 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 1587595083381.013, "pid": 1336756, "tid": 1381189, + "args": { + "name": "thread 1381189 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 1587595083381.013, "pid": 1336756, "tid": 1381189, + "args": { + "sort_index": 1381189 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 1587595083381.013, "pid": 1336756, "tid": 1336756, + "args": { + "name": "thread 1336756 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 1587595083381.013, "pid": 1336756, "tid": 1336756, + "args": { + "sort_index": 1336756 + } + }, + { + "ph": "X", "cat": "Trace", "ts": 1587595083313.541, "dur": 2122725.843, + "pid": "Spans", "tid": "PyTorch Profiler", + "name": "PyTorch Profiler (0)", + "args": { + "Op count": 0 + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 1587595083313.541, + "pid": "Spans", "tid": 0, + "args": { + "sort_index": 536870912 + } + }, + { + "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", + "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 1587595083313.541 + }, + { + "name": "Record Window End", "ph": "i", "s": "g", + "pid": "", "tid": "", "ts": 1587597283011.681 + } + ], + "traceName": "exp/mtp.1B.batch16.seqlen4096.context4096.warmup2000.update1.steps200000.lr2e-4.cosine/profile_trace/iteration_137216/rank3_trace.json", + "displayTimeUnit": "ms", + "baseTimeNanoseconds": 1751410836000000000 +} \ No newline at end of file